Merge pull request #1 from antirez/unstable

update
This commit is contained in:
chendianqiang 2018-07-20 19:20:14 +08:00 committed by GitHub
commit 68ceb46697
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
89 changed files with 3081 additions and 2950 deletions

View File

@ -8,7 +8,9 @@ each source file that you contribute.
# IMPORTANT: HOW TO USE REDIS GITHUB ISSUES
* Github issues SHOULD ONLY BE USED to report bugs, and for DETAILED feature
requests. Everything else belongs to the Redis Google Group.
requests. Everything else belongs to the Redis Google Group:
https://groups.google.com/forum/m/#!forum/Redis-db
PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected
bugs in the Github issues system. We'll be very happy to help you and provide
@ -30,7 +32,7 @@ each source file that you contribute.
a. Fork Redis on github ( http://help.github.com/fork-a-repo/ )
b. Create a topic branch (git checkout -b my_branch)
c. Push to your branch (git push origin my_branch)
d. Initiate a pull request on github ( http://help.github.com/send-pull-requests/ )
d. Initiate a pull request on github ( https://help.github.com/articles/creating-a-pull-request/ )
e. Done :)
For minor fixes just open a pull request on Github.

View File

@ -435,7 +435,7 @@ top comment inside `server.c`.
After the command operates in some way, it returns a reply to the client,
usually using `addReply()` or a similar function defined inside `networking.c`.
There are tons of commands implementations inside th Redis source code
There are tons of commands implementations inside the Redis source code
that can serve as examples of actual commands implementations. To write
a few toy commands can be a good exercise to familiarize with the code base.

6
deps/README.md vendored
View File

@ -22,7 +22,7 @@ just following tose steps:
1. Remove the jemalloc directory.
2. Substitute it with the new jemalloc source tree.
3. Edit the Makefile localted in the same directoy as the README you are
3. Edit the Makefile localted in the same directory as the README you are
reading, and change the --with-version in the Jemalloc configure script
options with the version you are using. This is required because otherwise
Jemalloc configuration script is broken and will not work nested in another
@ -50,7 +50,7 @@ This is never upgraded since it's part of the Redis project. If there are change
Hiredis
---
Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is adviced to take a lot of care:
Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is advised to take a lot of care:
1. Check with diff if hiredis API changed and what impact it could have in Redis.
2. Make sure thet the SDS library inside Hiredis and inside Redis are compatible.
@ -83,6 +83,6 @@ and our version:
1. Makefile is modified to allow a different compiler than GCC.
2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`.
3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode exectuion.
3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode execution.

View File

@ -215,4 +215,32 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
}
JEMALLOC_ALWAYS_INLINE int
iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) {
int defrag = 0;
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
szind_t szind;
bool is_slab;
rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &szind, &is_slab);
if (likely(is_slab)) {
/* Small allocation. */
extent_t *slab = iealloc(tsdn, ptr);
arena_t *arena = extent_arena_get(slab);
szind_t binind = extent_szind_get(slab);
bin_t *bin = &arena->bins[binind];
malloc_mutex_lock(tsdn, &bin->lock);
/* don't bother moving allocations from the slab currently used for new allocations */
if (slab != bin->slabcur) {
const bin_info_t *bin_info = &bin_infos[binind];
size_t availregs = bin_info->nregs * bin->stats.curslabs;
*bin_util = ((long long)bin->stats.curregs<<16) / availregs;
*run_util = ((long long)(bin_info->nregs - extent_nfree_get(slab))<<16) / bin_info->nregs;
defrag = 1;
}
malloc_mutex_unlock(tsdn, &bin->lock);
}
return defrag;
}
#endif /* JEMALLOC_INTERNAL_INLINES_C_H */

View File

@ -120,3 +120,7 @@
# define JEMALLOC_RESTRICT_RETURN
# define JEMALLOC_ALLOCATOR
#endif
/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint()
* function. */
#define JEMALLOC_FRAG_HINT

View File

@ -3324,3 +3324,14 @@ jemalloc_postfork_child(void) {
}
/******************************************************************************/
/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation.
* returns 0 if the allocation is in the currently active run,
* or when it is not causing any frag issue (large or huge bin)
* returns the bin utilization and run utilization both in fixed point 16:16.
* If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */
JEMALLOC_EXPORT int JEMALLOC_NOTHROW
get_defrag_hint(void* ptr, int *bin_util, int *run_util) {
assert(ptr != NULL);
return iget_defrag_hint(TSDN_NULL, ptr, bin_util, run_util);
}

View File

@ -385,6 +385,7 @@ void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
#endif
mp_encode_array(L,buf,len);
luaL_checkstack(L, 1, "in function mp_encode_lua_table_as_array");
for (j = 1; j <= len; j++) {
lua_pushnumber(L,j);
lua_gettable(L,-2);
@ -400,6 +401,7 @@ void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
* Lua API, we need to iterate a first time. Note that an alternative
* would be to do a single run, and then hack the buffer to insert the
* map opcodes for message pack. Too hackish for this lib. */
luaL_checkstack(L, 3, "in function mp_encode_lua_table_as_map");
lua_pushnil(L);
while(lua_next(L,-2)) {
lua_pop(L,1); /* remove value, keep key for next iteration. */
@ -515,10 +517,14 @@ int mp_pack(lua_State *L) {
if (nargs == 0)
return luaL_argerror(L, 0, "MessagePack pack needs input.");
if (!lua_checkstack(L, nargs))
return luaL_argerror(L, 0, "Too many arguments for MessagePack pack.");
buf = mp_buf_new(L);
for(i = 1; i <= nargs; i++) {
/* Copy argument i to top of stack for _encode processing;
* the encode function pops it from the stack when complete. */
luaL_checkstack(L, 1, "in function mp_check");
lua_pushvalue(L, i);
mp_encode_lua_type(L,buf,0);
@ -547,6 +553,7 @@ void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
int index = 1;
lua_newtable(L);
luaL_checkstack(L, 1, "in function mp_decode_to_lua_array");
while(len--) {
lua_pushnumber(L,index++);
mp_decode_to_lua_type(L,c);
@ -821,6 +828,9 @@ int mp_unpack_full(lua_State *L, int limit, int offset) {
* subtract the entire buffer size from the unprocessed size
* to get our next start offset */
int offset = len - c.left;
luaL_checkstack(L, 1, "in function mp_unpack_full");
/* Return offset -1 when we have have processed the entire buffer. */
lua_pushinteger(L, c.left == 0 ? -1 : offset);
/* Results are returned with the arg elements still

View File

@ -1,7 +1,7 @@
/*
** {======================================================
** Library for packing/unpacking structures.
** $Id: struct.c,v 1.4 2012/07/04 18:54:29 roberto Exp $
** $Id: struct.c,v 1.7 2018/05/11 22:04:31 roberto Exp $
** See Copyright Notice at the end of this file
** =======================================================
*/
@ -15,8 +15,8 @@
** h/H - signed/unsigned short
** l/L - signed/unsigned long
** T - size_t
** i/In - signed/unsigned integer with size `n' (default is size of int)
** cn - sequence of `n' chars (from/to a string); when packing, n==0 means
** i/In - signed/unsigned integer with size 'n' (default is size of int)
** cn - sequence of 'n' chars (from/to a string); when packing, n==0 means
the whole string; when unpacking, n==0 means use the previous
read number as the string length
** s - zero-terminated string
@ -89,14 +89,12 @@ typedef struct Header {
} Header;
static int getnum (lua_State *L, const char **fmt, int df) {
static int getnum (const char **fmt, int df) {
if (!isdigit(**fmt)) /* no number? */
return df; /* return default value */
else {
int a = 0;
do {
if (a > (INT_MAX / 10) || a * 10 > (INT_MAX - (**fmt - '0')))
luaL_error(L, "integral size overflow");
a = a*10 + *((*fmt)++) - '0';
} while (isdigit(**fmt));
return a;
@ -117,9 +115,9 @@ static size_t optsize (lua_State *L, char opt, const char **fmt) {
case 'f': return sizeof(float);
case 'd': return sizeof(double);
case 'x': return 1;
case 'c': return getnum(L, fmt, 1);
case 'c': return getnum(fmt, 1);
case 'i': case 'I': {
int sz = getnum(L, fmt, sizeof(int));
int sz = getnum(fmt, sizeof(int));
if (sz > MAXINTSIZE)
luaL_error(L, "integral size %d is larger than limit of %d",
sz, MAXINTSIZE);
@ -152,7 +150,7 @@ static void controloptions (lua_State *L, int opt, const char **fmt,
case '>': h->endian = BIG; return;
case '<': h->endian = LITTLE; return;
case '!': {
int a = getnum(L, fmt, MAXALIGN);
int a = getnum(fmt, MAXALIGN);
if (!isp2(a))
luaL_error(L, "alignment %d is not a power of 2", a);
h->align = a;
@ -295,21 +293,26 @@ static int b_unpack (lua_State *L) {
const char *fmt = luaL_checkstring(L, 1);
size_t ld;
const char *data = luaL_checklstring(L, 2, &ld);
size_t pos = luaL_optinteger(L, 3, 1) - 1;
size_t pos = luaL_optinteger(L, 3, 1);
luaL_argcheck(L, pos > 0, 3, "offset must be 1 or greater");
pos--; /* Lua indexes are 1-based, but here we want 0-based for C
* pointer math. */
int n = 0; /* number of results */
defaultoptions(&h);
lua_settop(L, 2);
while (*fmt) {
int opt = *fmt++;
size_t size = optsize(L, opt, &fmt);
pos += gettoalign(pos, &h, opt, size);
luaL_argcheck(L, pos+size <= ld, 2, "data string too short");
luaL_checkstack(L, 1, "too many results");
luaL_argcheck(L, size <= ld && pos <= ld - size,
2, "data string too short");
/* stack space for item + next position */
luaL_checkstack(L, 2, "too many results");
switch (opt) {
case 'b': case 'B': case 'h': case 'H':
case 'l': case 'L': case 'T': case 'i': case 'I': { /* integer types */
int issigned = islower(opt);
lua_Number res = getinteger(data+pos, h.endian, issigned, size);
lua_pushnumber(L, res);
lua_pushnumber(L, res); n++;
break;
}
case 'x': {
@ -319,25 +322,26 @@ static int b_unpack (lua_State *L) {
float f;
memcpy(&f, data+pos, size);
correctbytes((char *)&f, sizeof(f), h.endian);
lua_pushnumber(L, f);
lua_pushnumber(L, f); n++;
break;
}
case 'd': {
double d;
memcpy(&d, data+pos, size);
correctbytes((char *)&d, sizeof(d), h.endian);
lua_pushnumber(L, d);
lua_pushnumber(L, d); n++;
break;
}
case 'c': {
if (size == 0) {
if (!lua_isnumber(L, -1))
luaL_error(L, "format `c0' needs a previous size");
if (n == 0 || !lua_isnumber(L, -1))
luaL_error(L, "format 'c0' needs a previous size");
size = lua_tonumber(L, -1);
lua_pop(L, 1);
luaL_argcheck(L, pos+size <= ld, 2, "data string too short");
lua_pop(L, 1); n--;
luaL_argcheck(L, size <= ld && pos <= ld - size,
2, "data string too short");
}
lua_pushlstring(L, data+pos, size);
lua_pushlstring(L, data+pos, size); n++;
break;
}
case 's': {
@ -345,15 +349,15 @@ static int b_unpack (lua_State *L) {
if (e == NULL)
luaL_error(L, "unfinished string in data");
size = (e - (data+pos)) + 1;
lua_pushlstring(L, data+pos, size - 1);
lua_pushlstring(L, data+pos, size - 1); n++;
break;
}
default: controloptions(L, opt, &fmt, &h);
}
pos += size;
}
lua_pushinteger(L, pos + 1);
return lua_gettop(L) - 2;
lua_pushinteger(L, pos + 1); /* next position */
return n + 1;
}
@ -399,7 +403,7 @@ LUALIB_API int luaopen_struct (lua_State *L) {
/******************************************************************************
* Copyright (C) 2010-2012 Lua.org, PUC-Rio. All rights reserved.
* Copyright (C) 2010-2018 Lua.org, PUC-Rio. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the

View File

@ -639,7 +639,7 @@ slave-priority 100
# it with the specified string.
# 4) During replication, when a slave performs a full resynchronization with
# its master, the content of the whole database is removed in order to
# load the RDB file just transfered.
# load the RDB file just transferred.
#
# In all the above cases the default is to delete objects in a blocking way,
# like if DEL was called. However you can configure each case specifically
@ -1106,6 +1106,17 @@ zset-max-ziplist-value 64
# composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
hll-sparse-max-bytes 3000
# Streams macro node max size / items. The stream data structure is a radix
# tree of big nodes that encode multiple items inside. Using this configuration
# it is possible to configure how big a single node can be in bytes, and the
# maximum number of items it may contain before switching to a new node when
# appending new stream entries. If any of the following settings are set to
# zero, the limit is ignored, so for instance it is possible to set just a
# max entires limit by setting max-bytes to 0 and max-entries to the desired
# value.
stream-node-max-bytes 4096
stream-node-max-entries 100
# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
# order to help rehashing the main Redis hash table (the one mapping top-level
# keys to values). The hash table implementation Redis uses (see dict.c)
@ -1200,6 +1211,12 @@ hz 10
# big latency spikes.
aof-rewrite-incremental-fsync yes
# When redis saves RDB file, if the following option is enabled
# the file will be fsync-ed every 32 MB of data generated. This is useful
# in order to commit the file to the disk more incrementally and avoid
# big latency spikes.
rdb-save-incremental-fsync yes
# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good
# idea to start with the default settings and only change them after investigating
# how to improve the performances and how the keys LFU change over time, which

View File

@ -194,3 +194,31 @@ sentinel failover-timeout mymaster 180000
#
# sentinel client-reconfig-script mymaster /var/redis/reconfig.sh
# SECURITY
#
# By default SENTINEL SET will not be able to change the notification-script
# and client-reconfig-script at runtime. This avoids a trivial security issue
# where clients can set the script to anything and trigger a failover in order
# to get the program executed.
sentinel deny-scripts-reconfig yes
# REDIS COMMANDS RENAMING
#
# Sometimes the Redis server has certain commands, that are needed for Sentinel
# to work correctly, renamed to unguessable strings. This is often the case
# of CONFIG and SLAVEOF in the context of providers that provide Redis as
# a service, and don't want the customers to reconfigure the instances outside
# of the administration console.
#
# In such case it is possible to tell Sentinel to use different command names
# instead of the normal ones. For example if the master "mymaster", and the
# associated slaves, have "CONFIG" all renamed to "GUESSME", I could use:
#
# sentinel rename-command mymaster CONFIG GUESSME
#
# After such configuration is set, every time Sentinel would use CONFIG it will
# use GUESSME instead. Note that there is no actual need to respect the command
# case, so writing "config guessme" is the same in the example above.
#
# SENTINEL SET can also be used in order to perform this configuration at runtime.

View File

@ -144,7 +144,7 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o
REDIS_BENCHMARK_NAME=redis-benchmark

View File

@ -433,7 +433,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
* before replying to a client. */
int invert = fe->mask & AE_BARRIER;
/* Note the "fe->mask & mask & ..." code: maybe an already
/* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
@ -485,7 +485,7 @@ int aeWait(int fd, int mask, long long milliseconds) {
if ((retval = poll(&pfd, 1, milliseconds))== 1) {
if (pfd.revents & POLLIN) retmask |= AE_READABLE;
if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
return retmask;
} else {

View File

@ -228,7 +228,7 @@ static void killAppendOnlyChild(void) {
void stopAppendOnly(void) {
serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
aof_fsync(server.aof_fd);
redis_fsync(server.aof_fd);
close(server.aof_fd);
server.aof_fd = -1;
@ -261,7 +261,7 @@ int startAppendOnly(void) {
serverLog(LL_WARNING,"AOF was enabled but there is already a child process saving an RDB file on disk. An AOF background was scheduled to start when possible.");
} else {
/* If there is a pending AOF rewrite, we need to switch it off and
* start a new one: the old one cannot be reused becuase it is not
* start a new one: the old one cannot be reused because it is not
* accumulating the AOF buffer. */
if (server.aof_child_pid != -1) {
serverLog(LL_WARNING,"AOF was enabled but there is already an AOF rewriting in background. Stopping background AOF and starting a rewrite now.");
@ -476,10 +476,10 @@ void flushAppendOnlyFile(int force) {
/* Perform the fsync if needed. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
/* aof_fsync is defined as fdatasync() for Linux in order to avoid
/* redis_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
latencyStartMonitor(latency);
aof_fsync(server.aof_fd); /* Let's try to get this data on the disk */
redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_last_fsync = server.unixtime;
@ -645,7 +645,7 @@ struct client *createFakeClient(void) {
c->obuf_soft_limit_reached_time = 0;
c->watched_keys = listCreate();
c->peerid = NULL;
listSetFreeMethod(c->reply,decrRefCountVoid);
listSetFreeMethod(c->reply,freeClientReplyValue);
listSetDupMethod(c->reply,dupClientReplyValue);
initClientMultiState(c);
return c;
@ -683,7 +683,7 @@ int loadAppendOnlyFile(char *filename) {
exit(1);
}
/* Handle a zero-length AOF file as a special case. An emtpy AOF file
/* Handle a zero-length AOF file as a special case. An empty AOF file
* is a valid AOF because an empty server with AOF enabled will create
* a zero length file at startup, that will remain like that if no write
* operation is received. */
@ -1221,7 +1221,6 @@ int rewriteAppendOnlyFileRio(rio *aof) {
dictIterator *di = NULL;
dictEntry *de;
size_t processed = 0;
long long now = mstime();
int j;
for (j = 0; j < server.dbnum; j++) {
@ -1247,9 +1246,6 @@ int rewriteAppendOnlyFileRio(rio *aof) {
expiretime = getExpire(db,&key);
/* If this key is already expired skip it */
if (expiretime != -1 && expiretime < now) continue;
/* Save the key and associated value */
if (o->type == OBJ_STRING) {
/* Emit a SET command */
@ -1322,7 +1318,7 @@ int rewriteAppendOnlyFile(char *filename) {
rioInitWithFile(&aof,fp);
if (server.aof_rewrite_incremental_fsync)
rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES);
rioSetAutoSync(&aof,REDIS_AUTOSYNC_BYTES);
if (server.aof_use_rdb_preamble) {
int error;
@ -1690,7 +1686,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
oldfd = server.aof_fd;
server.aof_fd = newfd;
if (server.aof_fsync == AOF_FSYNC_ALWAYS)
aof_fsync(newfd);
redis_fsync(newfd);
else if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
aof_background_fsync(newfd);
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
@ -1717,7 +1713,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
"Background AOF rewrite signal handler took %lldus", ustime()-now);
} else if (!bysignal && exitcode != 0) {
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
* tirggering an error conditon. */
* tirggering an error condition. */
if (bysignal != SIGUSR1)
server.aof_lastbgrewrite_status = C_ERR;
serverLog(LL_WARNING,

View File

@ -16,7 +16,7 @@
* pthread_mutex_t myvar_mutex;
* atomicSet(myvar,12345);
*
* If atomic primitives are availble (tested in config.h) the mutex
* If atomic primitives are available (tested in config.h) the mutex
* is not used.
*
* Never use return value from the macros, instead use the AtomicGetIncr()

View File

@ -187,7 +187,7 @@ void *bioProcessBackgroundJobs(void *arg) {
if (type == BIO_CLOSE_FILE) {
close((long)job->arg1);
} else if (type == BIO_AOF_FSYNC) {
aof_fsync((long)job->arg1);
redis_fsync((long)job->arg1);
} else if (type == BIO_LAZY_FREE) {
/* What we free changes depending on what arguments are set:
* arg1 -> free the object at pointer.

View File

@ -918,7 +918,7 @@ void bitfieldCommand(client *c) {
struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
int readonly = 1;
size_t higest_write_offset = 0;
size_t highest_write_offset = 0;
for (j = 2; j < c->argc; j++) {
int remargs = c->argc-j-1; /* Remaining args other than current. */
@ -968,8 +968,8 @@ void bitfieldCommand(client *c) {
if (opcode != BITFIELDOP_GET) {
readonly = 0;
if (higest_write_offset < bitoffset + bits - 1)
higest_write_offset = bitoffset + bits - 1;
if (highest_write_offset < bitoffset + bits - 1)
highest_write_offset = bitoffset + bits - 1;
/* INCRBY and SET require another argument. */
if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){
zfree(ops);
@ -999,7 +999,7 @@ void bitfieldCommand(client *c) {
/* Lookup by making room up to the farest bit reached by
* this operation. */
if ((o = lookupStringForBitCommand(c,
higest_write_offset)) == NULL) return;
highest_write_offset)) == NULL) return;
}
addReplyMultiBulkLen(c,numops);

View File

@ -314,8 +314,9 @@ void handleClientsBlockedOnKeys(void) {
if (de) {
list *clients = dictGetVal(de);
int numclients = listLength(clients);
unsigned long zcard = zsetLength(o);
while(numclients--) {
while(numclients-- && zcard) {
listNode *clientnode = listFirst(clients);
client *receiver = clientnode->value;
@ -332,6 +333,7 @@ void handleClientsBlockedOnKeys(void) {
? ZSET_MIN : ZSET_MAX;
unblockClient(receiver);
genericZpopCommand(receiver,&rl->key,1,where,1,NULL);
zcard--;
/* Replicate the command. */
robj *argv[2];
@ -368,40 +370,48 @@ void handleClientsBlockedOnKeys(void) {
if (receiver->btype != BLOCKED_STREAM) continue;
streamID *gt = dictFetchValue(receiver->bpop.keys,
rl->key);
if (s->last_id.ms > gt->ms ||
(s->last_id.ms == gt->ms &&
s->last_id.seq > gt->seq))
{
/* If we blocked in the context of a consumer
* group, we need to resolve the group and update the
* last ID the client is blocked for: this is needed
* because serving other clients in the same consumer
* group will alter the "last ID" of the consumer
* group, and clients blocked in a consumer group are
* always blocked for the ">" ID: we need to deliver
* only new messages and avoid unblocking the client
* otherwise. */
streamCG *group = NULL;
if (receiver->bpop.xread_group) {
group = streamLookupCG(s,
receiver->bpop.xread_group->ptr);
/* If the group was not found, send an error
* to the consumer. */
if (!group) {
addReplyError(receiver,
"-NOGROUP the consumer group this client "
"was blocked on no longer exists");
unblockClient(receiver);
continue;
} else {
*gt = group->last_id;
}
}
if (streamCompareID(&s->last_id, gt) > 0) {
streamID start = *gt;
start.seq++; /* Can't overflow, it's an uint64_t */
/* If we blocked in the context of a consumer
* group, we need to resolve the group and
* consumer here. */
streamCG *group = NULL;
/* Lookup the consumer for the group, if any. */
streamConsumer *consumer = NULL;
if (receiver->bpop.xread_group) {
group = streamLookupCG(s,
receiver->bpop.xread_group->ptr);
/* In theory if the group is not found we
* just perform the read without the group,
* but actually when the group, or the key
* itself is deleted (triggering the removal
* of the group), we check for blocked clients
* and send them an error. */
}
int noack = 0;
if (group) {
consumer = streamLookupConsumer(group,
receiver->bpop.xread_consumer->ptr,
1);
noack = receiver->bpop.xread_group_noack;
}
/* Note that after we unblock the client, 'gt'
* and other receiver->bpop stuff are no longer
* valid, so we must do the setup above before
* this call. */
unblockClient(receiver);
/* Emit the two elements sub-array consisting of
* the name of the stream and the data we
* extracted from it. Wrapped in a single-item
@ -416,7 +426,13 @@ void handleClientsBlockedOnKeys(void) {
};
streamReplyWithRange(receiver,s,&start,NULL,
receiver->bpop.xread_count,
0, group, consumer, 0, &pi);
0, group, consumer, noack, &pi);
/* Note that after we unblock the client, 'gt'
* and other receiver->bpop stuff are no longer
* valid, so we must do the setup above before
* this call. */
unblockClient(receiver);
}
}
}

View File

@ -2120,7 +2120,7 @@ void clusterWriteHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
nwritten = write(fd, link->sndbuf, sdslen(link->sndbuf));
if (nwritten <= 0) {
serverLog(LL_DEBUG,"I/O error writing to node link: %s",
strerror(errno));
(nwritten == -1) ? strerror(errno) : "short write");
handleLinkIOError(link);
return;
}
@ -2377,7 +2377,7 @@ void clusterSendPing(clusterLink *link, int type) {
* same time.
*
* Since we have non-voting slaves that lower the probability of an entry
* to feature our node, we set the number of entires per packet as
* to feature our node, we set the number of entries per packet as
* 10% of the total nodes we have. */
wanted = floor(dictSize(server.cluster->nodes)/10);
if (wanted < 3) wanted = 3;
@ -3100,7 +3100,7 @@ void clusterHandleSlaveFailover(void) {
(unsigned long long) myself->configEpoch);
}
/* Take responsability for the cluster slots. */
/* Take responsibility for the cluster slots. */
clusterFailoverReplaceYourMaster();
} else {
clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
@ -3151,11 +3151,11 @@ void clusterHandleSlaveMigration(int max_slaves) {
!nodeTimedOut(mymaster->slaves[j])) okslaves++;
if (okslaves <= server.cluster_migration_barrier) return;
/* Step 3: Idenitfy a candidate for migration, and check if among the
/* Step 3: Identify a candidate for migration, and check if among the
* masters with the greatest number of ok slaves, I'm the one with the
* smallest node ID (the "candidate slave").
*
* Note: this means that eventually a replica migration will occurr
* Note: this means that eventually a replica migration will occur
* since slaves that are reachable again always have their FAIL flag
* cleared, so eventually there must be a candidate. At the same time
* this does not mean that there are no race conditions possible (two
@ -3736,7 +3736,7 @@ void clusterCloseAllSlots(void) {
* -------------------------------------------------------------------------- */
/* The following are defines that are only used in the evaluation function
* and are based on heuristics. Actaully the main point about the rejoin and
* and are based on heuristics. Actually the main point about the rejoin and
* writable delay is that they should be a few orders of magnitude larger
* than the network latency. */
#define CLUSTER_MAX_REJOIN_DELAY 5000
@ -4178,27 +4178,27 @@ void clusterCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"addslots <slot> [slot ...] -- Assign slots to current node.",
"bumpepoch -- Advance the cluster config epoch.",
"count-failure-reports <node-id> -- Return number of failure reports for <node-id>.",
"countkeysinslot <slot> - Return the number of keys in <slot>.",
"delslots <slot> [slot ...] -- Delete slots information from current node.",
"failover [force|takeover] -- Promote current slave node to being a master.",
"forget <node-id> -- Remove a node from the cluster.",
"getkeysinslot <slot> <count> -- Return key names stored by current node in a slot.",
"flushslots -- Delete current node own slots information.",
"info - Return onformation about the cluster.",
"keyslot <key> -- Return the hash slot for <key>.",
"meet <ip> <port> [bus-port] -- Connect nodes into a working cluster.",
"myid -- Return the node id.",
"nodes -- Return cluster configuration seen by node. Output format:",
"ADDSLOTS <slot> [slot ...] -- Assign slots to current node.",
"BUMPEPOCH -- Advance the cluster config epoch.",
"COUNT-failure-reports <node-id> -- Return number of failure reports for <node-id>.",
"COUNTKEYSINSLOT <slot> - Return the number of keys in <slot>.",
"DELSLOTS <slot> [slot ...] -- Delete slots information from current node.",
"FAILOVER [force|takeover] -- Promote current slave node to being a master.",
"FORGET <node-id> -- Remove a node from the cluster.",
"GETKEYSINSLOT <slot> <count> -- Return key names stored by current node in a slot.",
"FLUSHSLOTS -- Delete current node own slots information.",
"INFO - Return onformation about the cluster.",
"KEYSLOT <key> -- Return the hash slot for <key>.",
"MEET <ip> <port> [bus-port] -- Connect nodes into a working cluster.",
"MYID -- Return the node id.",
"NODES -- Return cluster configuration seen by node. Output format:",
" <id> <ip:port> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ... <slot>",
"replicate <node-id> -- Configure current node as slave to <node-id>.",
"reset [hard|soft] -- Reset current node (default: soft).",
"set-config-epoch <epoch> - Set config epoch of current node.",
"setslot <slot> (importing|migrating|stable|node <node-id>) -- Set slot state.",
"slaves <node-id> -- Return <node-id> slaves.",
"slots -- Return information about slots range mappings. Each range is made of:",
"REPLICATE <node-id> -- Configure current node as slave to <node-id>.",
"RESET [hard|soft] -- Reset current node (default: soft).",
"SET-config-epoch <epoch> - Set config epoch of current node.",
"SETSLOT <slot> (importing|migrating|stable|node <node-id>) -- Set slot state.",
"SLAVES <node-id> -- Return <node-id> slaves.",
"SLOTS -- Return information about slots range mappings. Each range is made of:",
" start, end, master and replicas IP addresses, ports and ids",
NULL
};
@ -4746,8 +4746,7 @@ NULL
clusterReset(hard);
addReply(c,shared.ok);
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CLUSTER HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
return;
}
}
@ -4835,15 +4834,39 @@ void dumpCommand(client *c) {
/* RESTORE key ttl serialized-value [REPLACE] */
void restoreCommand(client *c) {
long long ttl;
long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1;
rio payload;
int j, type, replace = 0;
int j, type, replace = 0, absttl = 0;
robj *obj;
/* Parse additional options */
for (j = 4; j < c->argc; j++) {
int additional = c->argc-j-1;
if (!strcasecmp(c->argv[j]->ptr,"replace")) {
replace = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"absttl")) {
absttl = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"idletime") && additional >= 1 &&
lfu_freq == -1)
{
if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lru_idle,NULL)
!= C_OK) return;
if (lru_idle < 0) {
addReplyError(c,"Invalid IDLETIME value, must be >= 0");
return;
}
lru_clock = LRU_CLOCK();
j++; /* Consume additional arg. */
} else if (!strcasecmp(c->argv[j]->ptr,"freq") && additional >= 1 &&
lru_idle == -1)
{
if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lfu_freq,NULL)
!= C_OK) return;
if (lfu_freq < 0 || lfu_freq > 255) {
addReplyError(c,"Invalid FREQ value, must be >= 0 and <= 255");
return;
}
j++; /* Consume additional arg. */
} else {
addReply(c,shared.syntaxerr);
return;
@ -4884,7 +4907,11 @@ void restoreCommand(client *c) {
/* Create the key and set the TTL if any */
dbAdd(c->db,c->argv[1],obj);
if (ttl) setExpire(c,c->db,c->argv[1],mstime()+ttl);
if (ttl) {
if (!absttl) ttl+=mstime();
setExpire(c,c->db,c->argv[1],ttl);
}
objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock);
signalModifiedKey(c->db,c->argv[1]);
addReply(c,shared.ok);
server.dirty++;
@ -5557,7 +5584,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
} else if (error_code == CLUSTER_REDIR_UNSTABLE) {
/* The request spawns mutliple keys in the same slot,
/* The request spawns multiple keys in the same slot,
* but the slot is not "stable" currently as there is
* a migration or import in progress. */
addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
@ -5589,7 +5616,11 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
* longer handles, the client is sent a redirection error, and the function
* returns 1. Otherwise 0 is returned and no operation is performed. */
int clusterRedirectBlockedClientIfNeeded(client *c) {
if (c->flags & CLIENT_BLOCKED && c->btype == BLOCKED_LIST) {
if (c->flags & CLIENT_BLOCKED &&
(c->btype == BLOCKED_LIST ||
c->btype == BLOCKED_ZSET ||
c->btype == BLOCKED_STREAM))
{
dictEntry *de;
dictIterator *di;

View File

@ -243,7 +243,7 @@ union clusterMsgData {
#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
typedef struct {
char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */
char sig[4]; /* Signature "RCmb" (Redis Cluster message bus). */
uint32_t totlen; /* Total length of this message */
uint16_t ver; /* Protocol version, currently set to 1. */
uint16_t port; /* TCP base port number. */

View File

@ -390,7 +390,7 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
zfree(server.masterauth);
server.masterauth = zstrdup(argv[1]);
server.masterauth = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"slave-serve-stale-data") && argc == 2) {
if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@ -431,6 +431,11 @@ void loadServerConfigFromString(char *config) {
if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
if (server.active_defrag_enabled) {
#ifndef HAVE_DEFRAG
err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr;
#endif
}
} else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
if ((server.daemonize = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@ -483,6 +488,13 @@ void loadServerConfigFromString(char *config) {
yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"rdb-save-incremental-fsync") &&
argc == 2)
{
if ((server.rdb_save_incremental_fsync =
yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"aof-load-truncated") && argc == 2) {
if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@ -496,7 +508,7 @@ void loadServerConfigFromString(char *config) {
err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN";
goto loaderr;
}
server.requirepass = zstrdup(argv[1]);
server.requirepass = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
zfree(server.pidfile);
server.pidfile = zstrdup(argv[1]);
@ -509,14 +521,16 @@ void loadServerConfigFromString(char *config) {
server.rdb_filename = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"active-defrag-threshold-lower") && argc == 2) {
server.active_defrag_threshold_lower = atoi(argv[1]);
if (server.active_defrag_threshold_lower < 0) {
err = "active-defrag-threshold-lower must be 0 or greater";
if (server.active_defrag_threshold_lower < 0 ||
server.active_defrag_threshold_lower > 1000) {
err = "active-defrag-threshold-lower must be between 0 and 1000";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"active-defrag-threshold-upper") && argc == 2) {
server.active_defrag_threshold_upper = atoi(argv[1]);
if (server.active_defrag_threshold_upper < 0) {
err = "active-defrag-threshold-upper must be 0 or greater";
if (server.active_defrag_threshold_upper < 0 ||
server.active_defrag_threshold_upper > 1000) {
err = "active-defrag-threshold-upper must be between 0 and 1000";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"active-defrag-ignore-bytes") && argc == 2) {
@ -547,6 +561,10 @@ void loadServerConfigFromString(char *config) {
server.hash_max_ziplist_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) {
server.hash_max_ziplist_value = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"stream-node-max-bytes") && argc == 2) {
server.stream_node_max_bytes = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"stream-node-max-entries") && argc == 2) {
server.stream_node_max_entries = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
/* DEAD OPTION */
} else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) {
@ -1015,6 +1033,8 @@ void configSetCommand(client *c) {
"cluster-slave-no-failover",server.cluster_slave_no_failover) {
} config_set_bool_field(
"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) {
} config_set_bool_field(
"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync) {
} config_set_bool_field(
"aof-load-truncated",server.aof_load_truncated) {
} config_set_bool_field(
@ -1056,15 +1076,15 @@ void configSetCommand(client *c) {
/* Numerical fields.
* config_set_numerical_field(name,var,min,max) */
} config_set_numerical_field(
"tcp-keepalive",server.tcpkeepalive,0,LLONG_MAX) {
"tcp-keepalive",server.tcpkeepalive,0,INT_MAX) {
} config_set_numerical_field(
"maxmemory-samples",server.maxmemory_samples,1,LLONG_MAX) {
"maxmemory-samples",server.maxmemory_samples,1,INT_MAX) {
} config_set_numerical_field(
"lfu-log-factor",server.lfu_log_factor,0,LLONG_MAX) {
"lfu-log-factor",server.lfu_log_factor,0,INT_MAX) {
} config_set_numerical_field(
"lfu-decay-time",server.lfu_decay_time,0,LLONG_MAX) {
"lfu-decay-time",server.lfu_decay_time,0,INT_MAX) {
} config_set_numerical_field(
"timeout",server.maxidletime,0,LONG_MAX) {
"timeout",server.maxidletime,0,INT_MAX) {
} config_set_numerical_field(
"active-defrag-threshold-lower",server.active_defrag_threshold_lower,0,1000) {
} config_set_numerical_field(
@ -1076,52 +1096,56 @@ void configSetCommand(client *c) {
} config_set_numerical_field(
"active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) {
} config_set_numerical_field(
"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,1,LLONG_MAX) {
"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,1,LONG_MAX) {
} config_set_numerical_field(
"auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,LLONG_MAX){
"auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,INT_MAX){
} config_set_numerical_field(
"hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LLONG_MAX) {
"hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LONG_MAX) {
} config_set_numerical_field(
"hash-max-ziplist-value",server.hash_max_ziplist_value,0,LLONG_MAX) {
"hash-max-ziplist-value",server.hash_max_ziplist_value,0,LONG_MAX) {
} config_set_numerical_field(
"stream-node-max-bytes",server.stream_node_max_bytes,0,LONG_MAX) {
} config_set_numerical_field(
"stream-node-max-entries",server.stream_node_max_entries,0,LLONG_MAX) {
} config_set_numerical_field(
"list-max-ziplist-size",server.list_max_ziplist_size,INT_MIN,INT_MAX) {
} config_set_numerical_field(
"list-compress-depth",server.list_compress_depth,0,INT_MAX) {
} config_set_numerical_field(
"set-max-intset-entries",server.set_max_intset_entries,0,LLONG_MAX) {
"set-max-intset-entries",server.set_max_intset_entries,0,LONG_MAX) {
} config_set_numerical_field(
"zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LLONG_MAX) {
"zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LONG_MAX) {
} config_set_numerical_field(
"zset-max-ziplist-value",server.zset_max_ziplist_value,0,LLONG_MAX) {
"zset-max-ziplist-value",server.zset_max_ziplist_value,0,LONG_MAX) {
} config_set_numerical_field(
"hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LLONG_MAX) {
"hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LONG_MAX) {
} config_set_numerical_field(
"lua-time-limit",server.lua_time_limit,0,LLONG_MAX) {
"lua-time-limit",server.lua_time_limit,0,LONG_MAX) {
} config_set_numerical_field(
"slowlog-log-slower-than",server.slowlog_log_slower_than,0,LLONG_MAX) {
"slowlog-log-slower-than",server.slowlog_log_slower_than,-1,LLONG_MAX) {
} config_set_numerical_field(
"slowlog-max-len",ll,0,LLONG_MAX) {
"slowlog-max-len",ll,0,LONG_MAX) {
/* Cast to unsigned. */
server.slowlog_max_len = (unsigned)ll;
server.slowlog_max_len = (unsigned long)ll;
} config_set_numerical_field(
"latency-monitor-threshold",server.latency_monitor_threshold,0,LLONG_MAX){
} config_set_numerical_field(
"repl-ping-slave-period",server.repl_ping_slave_period,1,LLONG_MAX) {
"repl-ping-slave-period",server.repl_ping_slave_period,1,INT_MAX) {
} config_set_numerical_field(
"repl-timeout",server.repl_timeout,1,LLONG_MAX) {
"repl-timeout",server.repl_timeout,1,INT_MAX) {
} config_set_numerical_field(
"repl-backlog-ttl",server.repl_backlog_time_limit,0,LLONG_MAX) {
"repl-backlog-ttl",server.repl_backlog_time_limit,0,LONG_MAX) {
} config_set_numerical_field(
"repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,LLONG_MAX) {
"repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,INT_MAX) {
} config_set_numerical_field(
"slave-priority",server.slave_priority,0,LLONG_MAX) {
"slave-priority",server.slave_priority,0,INT_MAX) {
} config_set_numerical_field(
"slave-announce-port",server.slave_announce_port,0,65535) {
} config_set_numerical_field(
"min-slaves-to-write",server.repl_min_slaves_to_write,0,LLONG_MAX) {
"min-slaves-to-write",server.repl_min_slaves_to_write,0,INT_MAX) {
refreshGoodSlavesCount();
} config_set_numerical_field(
"min-slaves-max-lag",server.repl_min_slaves_max_lag,0,LLONG_MAX) {
"min-slaves-max-lag",server.repl_min_slaves_max_lag,0,INT_MAX) {
refreshGoodSlavesCount();
} config_set_numerical_field(
"cluster-node-timeout",server.cluster_node_timeout,0,LLONG_MAX) {
@ -1130,17 +1154,17 @@ void configSetCommand(client *c) {
} config_set_numerical_field(
"cluster-announce-bus-port",server.cluster_announce_bus_port,0,65535) {
} config_set_numerical_field(
"cluster-migration-barrier",server.cluster_migration_barrier,0,LLONG_MAX){
"cluster-migration-barrier",server.cluster_migration_barrier,0,INT_MAX){
} config_set_numerical_field(
"cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,LLONG_MAX) {
"cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,INT_MAX) {
} config_set_numerical_field(
"hz",server.hz,0,LLONG_MAX) {
"hz",server.hz,0,INT_MAX) {
/* Hz is more an hint from the user, so we accept values out of range
* but cap them to reasonable values. */
if (server.hz < CONFIG_MIN_HZ) server.hz = CONFIG_MIN_HZ;
if (server.hz > CONFIG_MAX_HZ) server.hz = CONFIG_MAX_HZ;
} config_set_numerical_field(
"watchdog-period",ll,0,LLONG_MAX) {
"watchdog-period",ll,0,INT_MAX) {
if (ll)
enableWatchdog(ll);
else
@ -1267,6 +1291,10 @@ void configGetCommand(client *c) {
server.hash_max_ziplist_entries);
config_get_numerical_field("hash-max-ziplist-value",
server.hash_max_ziplist_value);
config_get_numerical_field("stream-node-max-bytes",
server.stream_node_max_bytes);
config_get_numerical_field("stream-node-max-entries",
server.stream_node_max_entries);
config_get_numerical_field("list-max-ziplist-size",
server.list_max_ziplist_size);
config_get_numerical_field("list-compress-depth",
@ -1333,6 +1361,8 @@ void configGetCommand(client *c) {
server.repl_diskless_sync);
config_get_bool_field("aof-rewrite-incremental-fsync",
server.aof_rewrite_incremental_fsync);
config_get_bool_field("rdb-save-incremental-fsync",
server.rdb_save_incremental_fsync);
config_get_bool_field("aof-load-truncated",
server.aof_load_truncated);
config_get_bool_field("aof-use-rdb-preamble",
@ -2056,6 +2086,8 @@ int rewriteConfig(char *path) {
rewriteConfigNotifykeyspaceeventsOption(state);
rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,OBJ_HASH_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,OBJ_HASH_MAX_ZIPLIST_VALUE);
rewriteConfigNumericalOption(state,"stream-node-max-bytes",server.stream_node_max_bytes,OBJ_STREAM_NODE_MAX_BYTES);
rewriteConfigNumericalOption(state,"stream-node-max-entries",server.stream_node_max_entries,OBJ_STREAM_NODE_MAX_ENTRIES);
rewriteConfigNumericalOption(state,"list-max-ziplist-size",server.list_max_ziplist_size,OBJ_LIST_MAX_ZIPLIST_SIZE);
rewriteConfigNumericalOption(state,"list-compress-depth",server.list_compress_depth,OBJ_LIST_COMPRESS_DEPTH);
rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,OBJ_SET_MAX_INTSET_ENTRIES);
@ -2068,6 +2100,7 @@ int rewriteConfig(char *path) {
rewriteConfigClientoutputbufferlimitOption(state);
rewriteConfigNumericalOption(state,"hz",server.hz,CONFIG_DEFAULT_HZ);
rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED);
rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE);
rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE);
@ -2107,10 +2140,10 @@ void configCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"get <pattern> -- Return parameters matching the glob-like <pattern> and their values.",
"set <parameter> <value> -- Set parameter to value.",
"resetstat -- Reset statistics reported by INFO.",
"rewrite -- Rewrite the configuration file.",
"GET <pattern> -- Return parameters matching the glob-like <pattern> and their values.",
"SET <parameter> <value> -- Set parameter to value.",
"RESETSTAT -- Reset statistics reported by INFO.",
"REWRITE -- Rewrite the configuration file.",
NULL
};
addReplyHelp(c, help);
@ -2135,8 +2168,7 @@ NULL
addReply(c,shared.ok);
}
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CONFIG HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
return;
}
}

View File

@ -87,11 +87,11 @@
#endif
#endif
/* Define aof_fsync to fdatasync() in Linux and fsync() for all the rest */
/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
#ifdef __linux__
#define aof_fsync fdatasync
#define redis_fsync fdatasync
#else
#define aof_fsync fsync
#define redis_fsync fsync
#endif
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use

View File

@ -90,7 +90,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) {
* LOOKUP_NONE (or zero): no special flags are passed.
* LOOKUP_NOTOUCH: don't alter the last access time of the key.
*
* Note: this function also returns NULL is the key is logically expired
* Note: this function also returns NULL if the key is logically expired
* but still existing, in case this is a slave, since this API is called only
* for read operations. Even if the key expiry is master-driven, we can
* correctly report a key is expired on slaves even if the master is lagging
@ -113,7 +113,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
* safety measure, the command invoked is a read-only command, we can
* safely return NULL here, and provide a more consistent behavior
* to clients accessign expired values in a read-only fashion, that
* will say the key as non exisitng.
* will say the key as non existing.
*
* Notably this covers GETs when slaves are used to scale reads. */
if (server.current_client &&
@ -223,6 +223,8 @@ int dbExists(redisDb *db, robj *key) {
* The function makes sure to return keys not already expired. */
robj *dbRandomKey(redisDb *db) {
dictEntry *de;
int maxtries = 100;
int allvolatile = dictSize(db->dict) == dictSize(db->expires);
while(1) {
sds key;
@ -234,6 +236,17 @@ robj *dbRandomKey(redisDb *db) {
key = dictGetKey(de);
keyobj = createStringObject(key,sdslen(key));
if (dictFind(db->expires,key)) {
if (allvolatile && server.masterhost && --maxtries == 0) {
/* If the DB is composed only of keys with an expire set,
* it could happen that all the keys are already logically
* expired in the slave, so the function cannot stop because
* expireIfNeeded() is false, nor it can stop because
* dictGetRandomKey() returns NULL (there are keys to return).
* To prevent the infinite loop we do some tries, but if there
* are the conditions for an infinite loop, eventually we
* return a key name that may be already expired. */
return keyobj;
}
if (expireIfNeeded(db,keyobj)) {
decrRefCount(keyobj);
continue; /* search for another key. This expired. */
@ -305,7 +318,7 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
* If callback is given the function is called from time to time to
* signal that work is in progress.
*
* The dbnum can be -1 if all teh DBs should be flushed, or the specified
* The dbnum can be -1 if all the DBs should be flushed, or the specified
* DB number if we want to flush only a single Redis database number.
*
* Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or
@ -467,8 +480,7 @@ void existsCommand(client *c) {
int j;
for (j = 1; j < c->argc; j++) {
expireIfNeeded(c->db,c->argv[j]);
if (dbExists(c->db,c->argv[j])) count++;
if (lookupKeyRead(c->db,c->argv[j])) count++;
}
addReplyLongLong(c,count);
}
@ -942,16 +954,18 @@ void moveCommand(client *c) {
}
/* Helper function for dbSwapDatabases(): scans the list of keys that have
* one or more blocked clients for B[LR]POP or other list blocking commands
* and signal the keys are ready if they are lists. See the comment where
* the function is used for more info. */
* one or more blocked clients for B[LR]POP or other blocking commands
* and signal the keys as ready if they are of the right type. See the comment
* where the function is used for more info. */
void scanDatabaseForReadyLists(redisDb *db) {
dictEntry *de;
dictIterator *di = dictGetSafeIterator(db->blocking_keys);
while((de = dictNext(di)) != NULL) {
robj *key = dictGetKey(de);
robj *value = lookupKey(db,key,LOOKUP_NOTOUCH);
if (value && (value->type == OBJ_LIST || value->type == OBJ_STREAM))
if (value && (value->type == OBJ_LIST ||
value->type == OBJ_STREAM ||
value->type == OBJ_ZSET))
signalKeyAsReady(db, key);
}
dictReleaseIterator(di);
@ -1171,7 +1185,7 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in
for (j = cmd->firstkey; j <= last; j += cmd->keystep) {
if (j >= argc) {
/* Modules commands, and standard commands with a not fixed number
* of arugments (negative arity parameter) do not have dispatch
* of arguments (negative arity parameter) do not have dispatch
* time arity checks, so we need to handle the case where the user
* passed an invalid number of arguments here. In this case we
* return no keys and expect the command implementation to report
@ -1226,7 +1240,7 @@ int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *nu
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
* reply with syntax error. */
if (num > (argc-3)) {
if (num < 1 || num > (argc-3)) {
*numkeys = 0;
return NULL;
}
@ -1255,7 +1269,7 @@ int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
* reply with syntax error. */
if (num > (argc-3)) {
if (num <= 0 || num > (argc-3)) {
*numkeys = 0;
return NULL;
}
@ -1384,23 +1398,37 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk
}
/* XREAD [BLOCK <milliseconds>] [COUNT <count>] [GROUP <groupname> <ttl>]
* [RETRY <milliseconds> <ttl>] STREAMS key_1 key_2 ... key_N
* ID_1 ID_2 ... ID_N */
* STREAMS key_1 key_2 ... key_N ID_1 ID_2 ... ID_N */
int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, num, *keys;
int i, num = 0, *keys;
UNUSED(cmd);
/* We need to seek the last argument that contains "STREAMS", because other
* arguments before may contain it (for example the group name). */
/* We need to parse the options of the command in order to seek the first
* "STREAMS" string which is actually the option. This is needed because
* "STREAMS" could also be the name of the consumer group and even the
* name of the stream key. */
int streams_pos = -1;
for (i = 1; i < argc; i++) {
char *arg = argv[i]->ptr;
if (!strcasecmp(arg, "streams")) streams_pos = i;
if (!strcasecmp(arg, "block")) {
i++; /* Skip option argument. */
} else if (!strcasecmp(arg, "count")) {
i++; /* Skip option argument. */
} else if (!strcasecmp(arg, "group")) {
i += 2; /* Skip option argument. */
} else if (!strcasecmp(arg, "noack")) {
/* Nothing to do. */
} else if (!strcasecmp(arg, "streams")) {
streams_pos = i;
break;
} else {
break; /* Syntax error. */
}
}
if (streams_pos != -1) num = argc - streams_pos - 1;
/* Syntax error. */
if (streams_pos == -1 || num % 2 != 0) {
if (streams_pos == -1 || num == 0 || num % 2 != 0) {
*numkeys = 0;
return NULL;
}
@ -1408,7 +1436,7 @@ int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
there are also the IDs, one per key. */
keys = zmalloc(sizeof(int) * num);
for (i = streams_pos+1; i < argc; i++) keys[i-streams_pos-1] = i;
for (i = streams_pos+1; i < argc-num; i++) keys[i-streams_pos-1] = i;
*numkeys = num;
return keys;
}

View File

@ -285,25 +285,26 @@ void computeDatasetDigest(unsigned char *final) {
void debugCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"assert -- Crash by assertion failed.",
"change-repl-id -- Change the replication IDs of the instance. Dangerous, should be used only for testing the replication subsystem.",
"crash-and-recovery <milliseconds> -- Hard crash and restart after <milliseconds> delay.",
"digest -- Outputs an hex signature representing the current DB content.",
"htstats <dbid> -- Return hash table statistics of the specified Redis database.",
"loadaof -- Flush the AOF buffers on disk and reload the AOF in memory.",
"lua-always-replicate-commands (0|1) -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.",
"object <key> -- Show low level info about key and associated value.",
"panic -- Crash the server simulating a panic.",
"populate <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.",
"reload -- Save the RDB on disk and reload it back in memory.",
"restart -- Graceful restart: save config, db, restart.",
"sdslen <key> -- Show low level SDS string info representing key and value.",
"segfault -- Crash the server with sigsegv.",
"set-active-expire (0|1) -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
"sleep <seconds> -- Stop the server for <seconds>. Decimals allowed.",
"structsize -- Return the size of different Redis core C structures.",
"ziplist <key> -- Show low level info about the ziplist encoding.",
"error <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.",
"ASSERT -- Crash by assertion failed.",
"CHANGE-REPL-ID -- Change the replication IDs of the instance. Dangerous, should be used only for testing the replication subsystem.",
"CRASH-AND-RECOVER <milliseconds> -- Hard crash and restart after <milliseconds> delay.",
"DIGEST -- Output a hex signature representing the current DB content.",
"ERROR <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.",
"HTSTATS <dbid> -- Return hash table statistics of the specified Redis database.",
"HTSTATS-KEY <key> -- Like htstats but for the hash table stored as key's value.",
"LOADAOF -- Flush the AOF buffers on disk and reload the AOF in memory.",
"LUA-ALWAYS-REPLICATE-COMMANDS <0|1> -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.",
"OBJECT <key> -- Show low level info about key and associated value.",
"PANIC -- Crash the server simulating a panic.",
"POPULATE <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.",
"RELOAD -- Save the RDB on disk and reload it back in memory.",
"RESTART -- Graceful restart: save config, db, restart.",
"SDSLEN <key> -- Show low level SDS string info representing key and value.",
"SEGFAULT -- Crash the server with sigsegv.",
"SET-ACTIVE-EXPIRE <0|1> -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
"SLEEP <seconds> -- Stop the server for <seconds>. Decimals allowed.",
"STRUCTSIZE -- Return the size of different Redis core C structures.",
"ZIPLIST <key> -- Show low level info about the ziplist encoding.",
NULL
};
addReplyHelp(c, help);
@ -347,7 +348,7 @@ NULL
serverLog(LL_WARNING,"DB reloaded by DEBUG RELOAD");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
if (server.aof_state == AOF_ON) flushAppendOnlyFile(1);
if (server.aof_state != AOF_OFF) flushAppendOnlyFile(1);
emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
if (loadAppendOnlyFile(server.aof_filename) != C_OK) {
addReply(c,shared.err);
@ -547,14 +548,41 @@ NULL
stats = sdscat(stats,buf);
addReplyBulkSds(c,stats);
} else if (!strcasecmp(c->argv[1]->ptr,"htstats-key") && c->argc == 3) {
robj *o;
dict *ht = NULL;
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
== NULL) return;
/* Get the hash table reference from the object, if possible. */
switch (o->encoding) {
case OBJ_ENCODING_SKIPLIST:
{
zset *zs = o->ptr;
ht = zs->dict;
}
break;
case OBJ_ENCODING_HT:
ht = o->ptr;
break;
}
if (ht == NULL) {
addReplyError(c,"The value stored at the specified key is not "
"represented using an hash table");
} else {
char buf[4096];
dictGetStats(buf,sizeof(buf),ht);
addReplyBulkCString(c,buf);
}
} else if (!strcasecmp(c->argv[1]->ptr,"change-repl-id") && c->argc == 2) {
serverLog(LL_WARNING,"Changing replication IDs after receiving DEBUG change-repl-id");
changeReplicationId();
clearReplicationId2();
addReply(c,shared.ok);
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try DEBUG HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
return;
}
}
@ -1048,7 +1076,7 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
infostring = genRedisInfoString("all");
serverLogRaw(LL_WARNING|LL_RAW, infostring);
serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n");
clients = getAllClientsInfoString();
clients = getAllClientsInfoString(-1);
serverLogRaw(LL_WARNING|LL_RAW, clients);
sdsfree(infostring);
sdsfree(clients);

View File

@ -592,6 +592,171 @@ long defragSet(redisDb *db, dictEntry *kde) {
return defragged;
}
/* Defrag callback for radix tree iterator, called for each node,
* used in order to defrag the nodes allocations. */
int defragRaxNode(raxNode **noderef) {
raxNode *newnode = activeDefragAlloc(*noderef);
if (newnode) {
*noderef = newnode;
return 1;
}
return 0;
}
/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
int scanLaterStraemListpacks(robj *ob, unsigned long *cursor, long long endtime, long long *defragged) {
static unsigned char last[sizeof(streamID)];
raxIterator ri;
long iterations = 0;
if (ob->type != OBJ_STREAM || ob->encoding != OBJ_ENCODING_STREAM) {
*cursor = 0;
return 0;
}
stream *s = ob->ptr;
raxStart(&ri,s->rax);
if (*cursor == 0) {
/* if cursor is 0, we start new iteration */
defragRaxNode(&s->rax->head);
/* assign the iterator node callback before the seek, so that the
* initial nodes that are processed till the first item are covered */
ri.node_cb = defragRaxNode;
raxSeek(&ri,"^",NULL,0);
} else {
/* if cursor is non-zero, we seek to the static 'last' */
if (!raxSeek(&ri,">", last, sizeof(last))) {
*cursor = 0;
return 0;
}
/* assign the iterator node callback after the seek, so that the
* initial nodes that are processed till now aren't covered */
ri.node_cb = defragRaxNode;
}
(*cursor)++;
while (raxNext(&ri)) {
void *newdata = activeDefragAlloc(ri.data);
if (newdata)
raxSetData(ri.node, ri.data=newdata), (*defragged)++;
if (++iterations > 16) {
if (ustime() > endtime) {
serverAssert(ri.key_len==sizeof(last));
memcpy(last,ri.key,ri.key_len);
raxStop(&ri);
return 1;
}
iterations = 0;
}
}
raxStop(&ri);
*cursor = 0;
return 0;
}
/* optional callback used defrag each rax element (not including the element pointer itself) */
typedef void *(raxDefragFunction)(raxIterator *ri, void *privdata, long *defragged);
/* defrag radix tree including:
* 1) rax struct
* 2) rax nodes
* 3) rax entry data (only if defrag_data is specified)
* 4) call a callback per element, and allow the callback to return a new pointer for the element */
long defragRadixTree(rax **raxref, int defrag_data, raxDefragFunction *element_cb, void *element_cb_data) {
long defragged = 0;
raxIterator ri;
rax* rax;
if ((rax = activeDefragAlloc(*raxref)))
defragged++, *raxref = rax;
rax = *raxref;
raxStart(&ri,rax);
ri.node_cb = defragRaxNode;
defragRaxNode(&rax->head);
raxSeek(&ri,"^",NULL,0);
while (raxNext(&ri)) {
void *newdata = NULL;
if (element_cb)
newdata = element_cb(&ri, element_cb_data, &defragged);
if (defrag_data && !newdata)
newdata = activeDefragAlloc(ri.data);
if (newdata)
raxSetData(ri.node, ri.data=newdata), defragged++;
}
raxStop(&ri);
return defragged;
}
typedef struct {
streamCG *cg;
streamConsumer *c;
} PendingEntryContext;
void* defragStreamConsumerPendingEntry(raxIterator *ri, void *privdata, long *defragged) {
UNUSED(defragged);
PendingEntryContext *ctx = privdata;
streamNACK *nack = ri->data, *newnack;
nack->consumer = ctx->c; /* update nack pointer to consumer */
newnack = activeDefragAlloc(nack);
if (newnack) {
/* update consumer group pointer to the nack */
void *prev;
raxInsert(ctx->cg->pel, ri->key, ri->key_len, newnack, &prev);
serverAssert(prev==nack);
/* note: we don't increment 'defragged' that's done by the caller */
}
return newnack;
}
void* defragStreamConsumer(raxIterator *ri, void *privdata, long *defragged) {
streamConsumer *c = ri->data;
streamCG *cg = privdata;
void *newc = activeDefragAlloc(c);
if (newc) {
/* note: we don't increment 'defragged' that's done by the caller */
c = newc;
}
sds newsds = activeDefragSds(c->name);
if (newsds)
(*defragged)++, c->name = newsds;
if (c->pel) {
PendingEntryContext pel_ctx = {cg, c};
*defragged += defragRadixTree(&c->pel, 0, defragStreamConsumerPendingEntry, &pel_ctx);
}
return newc; /* returns NULL if c was not defragged */
}
void* defragStreamConsumerGroup(raxIterator *ri, void *privdata, long *defragged) {
streamCG *cg = ri->data;
UNUSED(privdata);
if (cg->consumers)
*defragged += defragRadixTree(&cg->consumers, 0, defragStreamConsumer, cg);
if (cg->pel)
*defragged += defragRadixTree(&cg->pel, 0, NULL, NULL);
return NULL;
}
long defragStream(redisDb *db, dictEntry *kde) {
long defragged = 0;
robj *ob = dictGetVal(kde);
serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM);
stream *s = ob->ptr, *news;
/* handle the main struct */
if ((news = activeDefragAlloc(s)))
defragged++, ob->ptr = s = news;
if (raxSize(s->rax) > server.active_defrag_max_scan_fields) {
rax *newrax = activeDefragAlloc(s->rax);
if (newrax)
defragged++, s->rax = newrax;
defragLater(db, kde);
} else
defragged += defragRadixTree(&s->rax, 1, NULL, NULL);
if (s->cgroups)
defragged += defragRadixTree(&s->cgroups, 1, defragStreamConsumerGroup, NULL);
return defragged;
}
/* for each key we scan in the main dict, this function will attempt to defrag
* all the various pointers it has. Returns a stat of how many pointers were
* moved. */
@ -660,6 +825,8 @@ long defragKey(redisDb *db, dictEntry *de) {
} else {
serverPanic("Unknown hash encoding");
}
} else if (ob->type == OBJ_STREAM) {
defragged += defragStream(db, de);
} else if (ob->type == OBJ_MODULE) {
/* Currently defragmenting modules private data types
* is not supported. */
@ -680,7 +847,7 @@ void defragScanCallback(void *privdata, const dictEntry *de) {
server.stat_active_defrag_scanned++;
}
/* Defrag scan callback for for each hash table bicket,
/* Defrag scan callback for each hash table bicket,
* used in order to defrag the dictEntry allocations. */
void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
UNUSED(privdata); /* NOTE: this function is also used by both activeDefragCycle and scanLaterHash, etc. don't use privdata */
@ -700,9 +867,8 @@ void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
* or not, a false detection can cause the defragmenter to waste a lot of CPU
* without the possibility of getting any results. */
float getAllocatorFragmentation(size_t *out_frag_bytes) {
size_t resident = server.cron_malloc_stats.allocator_resident;
size_t active = server.cron_malloc_stats.allocator_active;
size_t allocated = server.cron_malloc_stats.allocator_allocated;
size_t resident, active, allocated;
zmalloc_get_allocator_info(&allocated, &active, &resident);
float frag_pct = ((float)active / allocated)*100 - 100;
size_t frag_bytes = active - allocated;
float rss_pct = ((float)resident / allocated)*100 - 100;
@ -728,27 +894,29 @@ long defragOtherGlobals() {
return defragged;
}
unsigned long defragLaterItem(dictEntry *de, unsigned long cursor) {
long defragged = 0;
/* returns 0 more work may or may not be needed (see non-zero cursor),
* and 1 if time is up and more work is needed. */
int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime) {
if (de) {
robj *ob = dictGetVal(de);
if (ob->type == OBJ_LIST) {
defragged += scanLaterList(ob);
cursor = 0; /* list has no scan, we must finish it in one go */
server.stat_active_defrag_hits += scanLaterList(ob);
*cursor = 0; /* list has no scan, we must finish it in one go */
} else if (ob->type == OBJ_SET) {
defragged += scanLaterSet(ob, &cursor);
server.stat_active_defrag_hits += scanLaterSet(ob, cursor);
} else if (ob->type == OBJ_ZSET) {
defragged += scanLaterZset(ob, &cursor);
server.stat_active_defrag_hits += scanLaterZset(ob, cursor);
} else if (ob->type == OBJ_HASH) {
defragged += scanLaterHash(ob, &cursor);
server.stat_active_defrag_hits += scanLaterHash(ob, cursor);
} else if (ob->type == OBJ_STREAM) {
return scanLaterStraemListpacks(ob, cursor, endtime, &server.stat_active_defrag_hits);
} else {
cursor = 0; /* object type may have changed since we schedule it for later */
*cursor = 0; /* object type may have changed since we schedule it for later */
}
} else {
cursor = 0; /* object may have been deleted already */
*cursor = 0; /* object may have been deleted already */
}
server.stat_active_defrag_hits += defragged;
return cursor;
return 0;
}
/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
@ -788,17 +956,22 @@ int defragLaterStep(redisDb *db, long long endtime) {
dictEntry *de = dictFind(db->dict, current_key);
key_defragged = server.stat_active_defrag_hits;
do {
cursor = defragLaterItem(de, cursor);
int quit = 0;
if (defragLaterItem(de, &cursor, endtime))
quit = 1; /* time is up, we didn't finish all the work */
/* Don't start a new BIG key in this loop, this is because the
* next key can be a list, and scanLaterList must be done in once cycle */
if (!cursor)
quit = 1;
/* Once in 16 scan iterations, 512 pointer reallocations, or 64 fields
* (if we have a lot of pointers in one hash bucket, or rehashing),
* check if we reached the time limit.
* But regardless, don't start a new BIG key in this loop, this is because the
* next key can be a list, and scanLaterList must be done in once cycle */
if (!cursor || (++iterations > 16 ||
* check if we reached the time limit. */
if (quit || (++iterations > 16 ||
server.stat_active_defrag_hits - prev_defragged > 512 ||
server.stat_active_defrag_scanned - prev_scanned > 64)) {
if (!cursor || ustime() > endtime) {
if (quit || ustime() > endtime) {
if(key_defragged != server.stat_active_defrag_hits)
server.stat_active_defrag_key_hits++;
else

View File

@ -146,14 +146,14 @@ int dictResize(dict *d)
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size);
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size);
/* Rehashing to the same table size is not useful. */
if (realsize == d->ht[0].size) return DICT_ERR;
@ -327,7 +327,7 @@ int dictReplace(dict *d, void *key, void *val)
dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
* does not exists dictAdd will succeed. */
entry = dictAddRaw(d,key,&existing);
if (entry) {
dictSetVal(d, entry, val);
@ -705,8 +705,10 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
* table, there will be no elements in both tables up to
* the current rehashing index, so we jump if possible.
* (this happens when going from big to small table). */
if (i >= d->ht[1].size) i = d->rehashidx;
continue;
if (i >= d->ht[1].size)
i = d->rehashidx;
else
continue;
}
if (i >= d->ht[j].size) continue; /* Out of range for this table. */
dictEntry *he = d->ht[j].table[i];
@ -858,6 +860,15 @@ unsigned long dictScan(dict *d,
de = next;
}
/* Set unmasked bits so incrementing the reversed cursor
* operates on the masked bits */
v |= ~m0;
/* Increment the reverse cursor */
v = rev(v);
v++;
v = rev(v);
} else {
t0 = &d->ht[0];
t1 = &d->ht[1];
@ -892,22 +903,16 @@ unsigned long dictScan(dict *d,
de = next;
}
/* Increment bits not covered by the smaller mask */
v = (((v | m0) + 1) & ~m0) | (v & m0);
/* Increment the reverse cursor not covered by the smaller mask.*/
v |= ~m1;
v = rev(v);
v++;
v = rev(v);
/* Continue while bits covered by mask difference is non-zero */
} while (v & (m0 ^ m1));
}
/* Set unmasked bits so incrementing the reversed cursor
* operates on the masked bits of the smaller table */
v |= ~m0;
/* Increment the reverse cursor */
v = rev(v);
v++;
v = rev(v);
return v;
}

View File

@ -43,12 +43,12 @@ uint16_t intrev16(uint16_t v);
uint32_t intrev32(uint32_t v);
uint64_t intrev64(uint64_t v);
/* variants of the function doing the actual convertion only if the target
/* variants of the function doing the actual conversion only if the target
* host is big endian */
#if (BYTE_ORDER == LITTLE_ENDIAN)
#define memrev16ifbe(p)
#define memrev32ifbe(p)
#define memrev64ifbe(p)
#define memrev16ifbe(p) ((void)(0))
#define memrev32ifbe(p) ((void)(0))
#define memrev64ifbe(p) ((void)(0))
#define intrev16ifbe(v) (v)
#define intrev32ifbe(v) (v)
#define intrev64ifbe(v) (v)

View File

@ -112,7 +112,7 @@ void activeExpireCycle(int type) {
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
/* Don't start a fast cycle if the previous cycle did not exit
* for time limt. Also don't repeat a fast cycle for the same period
* for time limit. Also don't repeat a fast cycle for the same period
* as the fast cycle total duration itself. */
if (!timelimit_exit) return;
if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;

View File

@ -145,7 +145,7 @@ double extractUnitOrReply(client *c, robj *unit) {
/* Input Argument Helper.
* Extract the dinstance from the specified two arguments starting at 'argv'
* that shouldbe in the form: <number> <unit> and return the dinstance in the
* specified unit on success. *conversino is populated with the coefficient
* specified unit on success. *conversions is populated with the coefficient
* to use in order to convert meters to the unit.
*
* On error a value less than zero is returned. */

View File

@ -144,8 +144,8 @@ int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
(longitude - long_range->min) / (long_range->max - long_range->min);
/* convert to fixed point based on the step size */
lat_offset *= (1 << step);
long_offset *= (1 << step);
lat_offset *= (1ULL << step);
long_offset *= (1ULL << step);
hash->bits = interleave64(lat_offset, long_offset);
return 1;
}

View File

@ -1,4 +1,4 @@
/* Automatically generated by utils/generate-command-help.rb, do not edit. */
/* Automatically generated by generate-command-help.rb, do not edit. */
#ifndef __REDIS_HELP_H
#define __REDIS_HELP_H
@ -17,7 +17,8 @@ static char *commandGroups[] = {
"scripting",
"hyperloglog",
"cluster",
"geo"
"geo",
"stream"
};
struct commandHelp {
@ -82,6 +83,16 @@ struct commandHelp {
"Pop a value from a list, push it to another list and return it; or block until one is available",
2,
"2.2.0" },
{ "BZPOPMAX",
"key [key ...] timeout",
"Remove and return the member with the highest score from one or more sorted sets, or block until one is available",
4,
"5.0.0" },
{ "BZPOPMIN",
"key [key ...] timeout",
"Remove and return the member with the lowest score from one or more sorted sets, or block until one is available",
4,
"5.0.0" },
{ "CLIENT GETNAME",
"-",
"Get the current connection name",
@ -318,12 +329,12 @@ struct commandHelp {
0,
"1.2.0" },
{ "FLUSHALL",
"-",
"[ASYNC]",
"Remove all keys from all databases",
9,
"1.0.0" },
{ "FLUSHDB",
"-",
"[ASYNC]",
"Remove all keys from the current database",
9,
"1.0.0" },
@ -532,6 +543,36 @@ struct commandHelp {
"Trim a list to the specified range",
2,
"1.0.0" },
{ "MEMORY DOCTOR",
"-",
"Outputs memory problems report",
9,
"4.0.0" },
{ "MEMORY HELP",
"-",
"Show helpful text about the different subcommands",
9,
"4.0.0" },
{ "MEMORY MALLOC-STATS",
"-",
"Show allocator internal stats",
9,
"4.0.0" },
{ "MEMORY PURGE",
"-",
"Ask the allocator to release memory",
9,
"4.0.0" },
{ "MEMORY STATS",
"-",
"Show memory usage details",
9,
"4.0.0" },
{ "MEMORY USAGE",
"key [SAMPLES count]",
"Estimate the memory usage of a key",
9,
"4.0.0" },
{ "MGET",
"key [key ...]",
"Get the values of all the given keys",
@ -723,7 +764,7 @@ struct commandHelp {
10,
"3.2.0" },
{ "SCRIPT EXISTS",
"script [script ...]",
"sha1 [sha1 ...]",
"Check existence of scripts in the script cache.",
10,
"2.6.0" },
@ -758,7 +799,7 @@ struct commandHelp {
8,
"1.0.0" },
{ "SET",
"key value [EX seconds] [PX milliseconds] [NX|XX]",
"key value [expiration EX seconds|PX milliseconds] [NX|XX]",
"Set the string value of a key",
1,
"1.0.0" },
@ -867,6 +908,11 @@ struct commandHelp {
"Add multiple sets and store the resulting set in a key",
3,
"1.0.0" },
{ "SWAPDB",
"index index",
"Swaps two Redis databases",
8,
"4.0.0" },
{ "SYNC",
"-",
"Internal command used for replication",
@ -877,6 +923,11 @@ struct commandHelp {
"Return the current server time",
9,
"2.6.0" },
{ "TOUCH",
"key [key ...]",
"Alters the last access time of a key(s). Returns the number of existing keys specified.",
0,
"3.2.1" },
{ "TTL",
"key",
"Get the time to live for a key",
@ -887,6 +938,11 @@ struct commandHelp {
"Determine the type stored at key",
0,
"1.0.0" },
{ "UNLINK",
"key [key ...]",
"Delete a key asynchronously in another thread. Otherwise it is just as DEL, but non blocking.",
0,
"4.0.0" },
{ "UNSUBSCRIBE",
"[channel [channel ...]]",
"Stop listening for messages posted to the given channels",
@ -907,6 +963,41 @@ struct commandHelp {
"Watch the given keys to determine execution of the MULTI/EXEC block",
7,
"2.2.0" },
{ "XADD",
"key ID field string [field string ...]",
"Appends a new entry to a stream",
14,
"5.0.0" },
{ "XLEN",
"key",
"Return the number of entires in a stream",
14,
"5.0.0" },
{ "XPENDING",
"key group [start end count] [consumer]",
"Return information and entries from a stream consumer group pending entries list, that are messages fetched but never acknowledged.",
14,
"5.0.0" },
{ "XRANGE",
"key start end [COUNT count]",
"Return a range of elements in a stream, with IDs matching the specified IDs interval",
14,
"5.0.0" },
{ "XREAD",
"[COUNT count] [BLOCK milliseconds] STREAMS key [key ...] ID [ID ...]",
"Return never seen elements in multiple streams, with IDs greater than the ones reported by the caller for each stream. Can block.",
14,
"5.0.0" },
{ "XREADGROUP",
"GROUP group consumer [COUNT count] [BLOCK milliseconds] STREAMS key [key ...] ID [ID ...]",
"Return new entries from a stream using a consumer group, or access the history of the pending entries for a given consumer. Can block.",
14,
"5.0.0" },
{ "XREVRANGE",
"key end start [COUNT count]",
"Return a range of elements in a stream, with IDs matching the specified IDs interval, in reverse order (from greater to smaller IDs) compared to XRANGE",
14,
"5.0.0" },
{ "ZADD",
"key [NX|XX] [CH] [INCR] score member [score member ...]",
"Add one or more members to a sorted set, or update its score if it already exists",
@ -937,6 +1028,16 @@ struct commandHelp {
"Count the number of members in a sorted set between a given lexicographical range",
4,
"2.8.9" },
{ "ZPOPMAX",
"key [count]",
"Remove and return members with the highest scores in a sorted set",
4,
"5.0.0" },
{ "ZPOPMIN",
"key [count]",
"Remove and return members with the lowest scores in a sorted set",
4,
"5.0.0" },
{ "ZRANGE",
"key start stop [WITHSCORES]",
"Return a range of members in a sorted set, by index",

View File

@ -429,14 +429,14 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
}
switch(len & 7) {
case 7: h ^= (uint64_t)data[6] << 48;
case 6: h ^= (uint64_t)data[5] << 40;
case 5: h ^= (uint64_t)data[4] << 32;
case 4: h ^= (uint64_t)data[3] << 24;
case 3: h ^= (uint64_t)data[2] << 16;
case 2: h ^= (uint64_t)data[1] << 8;
case 7: h ^= (uint64_t)data[6] << 48; /* fall-thru */
case 6: h ^= (uint64_t)data[5] << 40; /* fall-thru */
case 5: h ^= (uint64_t)data[4] << 32; /* fall-thru */
case 4: h ^= (uint64_t)data[3] << 24; /* fall-thru */
case 3: h ^= (uint64_t)data[2] << 16; /* fall-thru */
case 2: h ^= (uint64_t)data[1] << 8; /* fall-thru */
case 1: h ^= (uint64_t)data[0];
h *= m;
h *= m; /* fall-thru */
};
h ^= h >> r;
@ -673,7 +673,7 @@ int hllSparseSet(robj *o, long index, uint8_t count) {
end = p + sdslen(o->ptr) - HLL_HDR_SIZE;
first = 0;
prev = NULL; /* Points to previos opcode at the end of the loop. */
prev = NULL; /* Points to previous opcode at the end of the loop. */
next = NULL; /* Points to the next opcode at the end of the loop. */
span = 0;
while(p < end) {
@ -764,7 +764,7 @@ int hllSparseSet(robj *o, long index, uint8_t count) {
* and is either currently represented by a VAL opcode with len > 1,
* by a ZERO opcode with len > 1, or by an XZERO opcode.
*
* In those cases the original opcode must be split into muliple
* In those cases the original opcode must be split into multiple
* opcodes. The worst case is an XZERO split in the middle resuling into
* XZERO - VAL - XZERO, so the resulting sequence max length is
* 5 bytes.
@ -887,7 +887,7 @@ promote: /* Promote to dense representation. */
*
* Note that this in turn means that PFADD will make sure the command
* is propagated to slaves / AOF, so if there is a sparse -> dense
* convertion, it will be performed in all the slaves as well. */
* conversion, it will be performed in all the slaves as well. */
int dense_retval = hllDenseSet(hdr->registers,index,count);
serverAssert(dense_retval == 1);
return dense_retval;

View File

@ -152,7 +152,7 @@ int latencyResetEvent(char *event_to_reset) {
/* ------------------------ Latency reporting (doctor) ---------------------- */
/* Analyze the samples avaialble for a given event and return a structure
/* Analyze the samples available for a given event and return a structure
* populate with different metrics, average, MAD, min, max, and so forth.
* Check latency.h definition of struct latenctStat for more info.
* If the specified event has no elements the structure is populate with
@ -294,7 +294,7 @@ sds createLatencyReport(void) {
/* Potentially commands. */
if (!strcasecmp(event,"command")) {
if (server.slowlog_log_slower_than == 0) {
if (server.slowlog_log_slower_than < 0) {
advise_slowlog_enabled = 1;
advices++;
} else if (server.slowlog_log_slower_than/1000 >

View File

@ -23,10 +23,10 @@ size_t lazyfreeGetPendingObjectsCount(void) {
* the function just returns the number of elements the object is composed of.
*
* Objects composed of single allocations are always reported as having a
* single item even if they are actaully logical composed of multiple
* single item even if they are actually logical composed of multiple
* elements.
*
* For lists the funciton returns the number of elements in the quicklist
* For lists the function returns the number of elements in the quicklist
* representing the list. */
size_t lazyfreeGetFreeEffort(robj *obj) {
if (obj->type == OBJ_LIST) {

View File

@ -291,7 +291,7 @@ int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, ui
/* Store a reverse-encoded variable length field, representing the length
* of the previous element of size 'l', in the target buffer 'buf'.
* The function returns the number of bytes used to encode it, from
* 1 to 5. If 'buf' is NULL the funciton just returns the number of bytes
* 1 to 5. If 'buf' is NULL the function just returns the number of bytes
* needed in order to encode the backlen. */
unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
if (l <= 127) {
@ -568,7 +568,7 @@ unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
}
}
/* Insert, delete or replace the specified element 'ele' of lenght 'len' at
/* Insert, delete or replace the specified element 'ele' of length 'len' at
* the specified position 'p', with 'p' being a listpack element pointer
* obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or
* lpSeek().
@ -710,7 +710,7 @@ unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, un
return lp;
}
/* Append the specified element 'ele' of lenght 'len' at the end of the
/* Append the specified element 'ele' of length 'len' at the end of the
* listpack. It is implemented in terms of lpInsert(), so the return value is
* the same as lpInsert(). */
unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {

123
src/localtime.c Normal file
View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <time.h>
/* This is a safe version of localtime() which contains no locks and is
* fork() friendly. Even the _r version of localtime() cannot be used safely
* in Redis. Another thread may be calling localtime() while the main thread
* forks(). Later when the child process calls localtime() again, for instance
* in order to log something to the Redis log, it may deadlock: in the copy
* of the address space of the forked process the lock will never be released.
*
* This function takes the timezone 'tz' as argument, and the 'dst' flag is
* used to check if daylight saving time is currently in effect. The caller
* of this function should obtain such information calling tzset() ASAP in the
* main() function to obtain the timezone offset from the 'timezone' global
* variable. To obtain the daylight information, if it is currently active or not,
* one trick is to call localtime() in main() ASAP as well, and get the
* information from the tm_isdst field of the tm structure. However the daylight
* time may switch in the future for long running processes, so this information
* should be refreshed at safe times.
*
* Note that this function does not work for dates < 1/1/1970, it is solely
* designed to work with what time(NULL) may return, and to support Redis
* logging of the dates, it's not really a complete implementation. */
static int is_leap_year(time_t year) {
if (year % 4) return 0; /* A year not divisible by 4 is not leap. */
else if (year % 100) return 1; /* If div by 4 and not 100 is surely leap. */
else if (year % 400) return 0; /* If div by 100 *and* 400 is not leap. */
else return 1; /* If div by 100 and not by 400 is leap. */
}
void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst) {
const time_t secs_min = 60;
const time_t secs_hour = 3600;
const time_t secs_day = 3600*24;
t -= tz; /* Adjust for timezone. */
t += 3600*dst; /* Adjust for daylight time. */
time_t days = t / secs_day; /* Days passed since epoch. */
time_t seconds = t % secs_day; /* Remaining seconds. */
tmp->tm_isdst = dst;
tmp->tm_hour = seconds / secs_hour;
tmp->tm_min = (seconds % secs_hour) / secs_min;
tmp->tm_sec = (seconds % secs_hour) % secs_min;
/* 1/1/1970 was a Thursday, that is, day 4 from the POV of the tm structure
* where sunday = 0, so to calculate the day of the week we have to add 4
* and take the modulo by 7. */
tmp->tm_wday = (days+4)%7;
/* Calculate the current year. */
tmp->tm_year = 1970;
while(1) {
/* Leap years have one day more. */
time_t days_this_year = 365 + is_leap_year(tmp->tm_year);
if (days_this_year > days) break;
days -= days_this_year;
tmp->tm_year++;
}
tmp->tm_yday = days; /* Number of day of the current year. */
/* We need to calculate in which month and day of the month we are. To do
* so we need to skip days according to how many days there are in each
* month, and adjust for the leap year that has one more day in February. */
int mdays[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
mdays[1] += is_leap_year(tmp->tm_year);
tmp->tm_mon = 0;
while(days >= mdays[tmp->tm_mon]) {
days -= mdays[tmp->tm_mon];
tmp->tm_mon++;
}
tmp->tm_mday = days+1; /* Add 1 since our 'days' is zero-based. */
tmp->tm_year -= 1900; /* Surprisingly tm_year is year-1900. */
}
#ifdef LOCALTIME_TEST_MAIN
#include <stdio.h>
int main(void) {
/* Obtain timezone and daylight info. */
tzset(); /* Now 'timezome' global is populated. */
time_t t = time(NULL);
struct tm *aux = localtime(&t);
int daylight_active = aux->tm_isdst;
struct tm tm;
char buf[1024];
nolocks_localtime(&tm,t,timezone,daylight_active);
strftime(buf,sizeof(buf),"%d %b %H:%M:%S",&tm);
printf("[timezone: %d, dl: %d] %s\n", (int)timezone, (int)daylight_active, buf);
}
#endif

View File

@ -86,6 +86,8 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
switch (ctrl)
{
case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
@ -97,6 +99,7 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
}
#pragma GCC diagnostic pop
#endif
}
else /* back reference */
@ -163,17 +166,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
break;
case 9: *op++ = *ref++;
case 8: *op++ = *ref++;
case 7: *op++ = *ref++;
case 6: *op++ = *ref++;
case 5: *op++ = *ref++;
case 4: *op++ = *ref++;
case 3: *op++ = *ref++;
case 2: *op++ = *ref++;
case 1: *op++ = *ref++;
case 9: *op++ = *ref++; /* fall-thru */
case 8: *op++ = *ref++; /* fall-thru */
case 7: *op++ = *ref++; /* fall-thru */
case 6: *op++ = *ref++; /* fall-thru */
case 5: *op++ = *ref++; /* fall-thru */
case 4: *op++ = *ref++; /* fall-thru */
case 3: *op++ = *ref++; /* fall-thru */
case 2: *op++ = *ref++; /* fall-thru */
case 1: *op++ = *ref++; /* fall-thru */
case 0: *op++ = *ref++; /* two octets more */
*op++ = *ref++;
*op++ = *ref++; /* fall-thru */
}
#endif
}

View File

@ -2239,6 +2239,9 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
* to avoid a useless copy. */
if (flags & REDISMODULE_HASH_CFIELDS)
low_flags |= HASH_SET_TAKE_FIELD;
robj *argv[2] = {field,value};
hashTypeTryConversion(key->value,argv,0,1);
updated += hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
/* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
@ -2709,9 +2712,9 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch
sds proto = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
sds o = listNodeValue(listFirst(c->reply));
clientReplyBlock *o = listNodeValue(listFirst(c->reply));
proto = sdscatsds(proto,o);
proto = sdscatlen(proto,o->buf,o->used);
listDelNode(c->reply,listFirst(c->reply));
}
reply = moduleCreateCallReplyFromProto(ctx,proto);
@ -3396,7 +3399,7 @@ void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_li
*
* If the specified log level is invalid, verbose is used by default.
* There is a fixed limit to the length of the log line this function is able
* to emit, this limti is not specified but is guaranteed to be more than
* to emit, this limit is not specified but is guaranteed to be more than
* a few lines of text.
*/
void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
@ -3827,7 +3830,7 @@ void moduleReleaseGIL(void) {
*
* Notification callback gets executed with a redis context that can not be
* used to send anything to the client, and has the db number where the event
* occured as its selected db number.
* occurred as its selected db number.
*
* Notice that it is not necessary to enable norifications in redis.conf for
* module notifications to work.
@ -3884,7 +3887,7 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid)
}
}
/* Unsubscribe any notification subscirbers this module has upon unloading */
/* Unsubscribe any notification subscribers this module has upon unloading */
void moduleUnsubscribeNotifications(RedisModule *module) {
listIter li;
listNode *ln;
@ -4362,7 +4365,7 @@ void moduleInitModulesSystem(void) {
* because the server must be fully initialized before loading modules.
*
* The function aborts the server on errors, since to start with missing
* modules is not considered sane: clients may rely on the existance of
* modules is not considered sane: clients may rely on the existence of
* given commands, loading AOF also may need some modules to exist, and
* if this instance is a slave, it must understand commands from master. */
void moduleLoadFromQueue(void) {
@ -4499,7 +4502,15 @@ int moduleUnload(sds name) {
* MODULE LOAD <path> [args...] */
void moduleCommand(client *c) {
char *subcmd = c->argv[1]->ptr;
if (c->argc == 2 && !strcasecmp(subcmd,"help")) {
const char *help[] = {
"LIST -- Return a list of loaded modules.",
"LOAD <path> [arg ...] -- Load a module library from <path>.",
"UNLOAD <name> -- Unload a module.",
NULL
};
addReplyHelp(c, help);
} else
if (!strcasecmp(subcmd,"load") && c->argc >= 3) {
robj **argv = NULL;
int argc = 0;
@ -4548,7 +4559,8 @@ void moduleCommand(client *c) {
}
dictReleaseIterator(di);
} else {
addReply(c,shared.syntaxerr);
addReplySubcommandSyntaxError(c);
return;
}
}

View File

@ -1,5 +1,5 @@
# gendoc.rb -- Converts the top-comments inside module.c to modules API
# reference documentaiton in markdown format.
# reference documentation in markdown format.
# Convert the C comment to markdown
def markdown(s)

View File

@ -56,11 +56,14 @@ size_t getStringObjectSdsUsedMemory(robj *o) {
/* Client.reply list dup and free methods. */
void *dupClientReplyValue(void *o) {
return sdsdup(o);
clientReplyBlock *old = o;
clientReplyBlock *buf = zmalloc(sizeof(clientReplyBlock) + old->size);
memcpy(buf, o, sizeof(clientReplyBlock) + old->size);
return buf;
}
void freeClientReplyValue(void *o) {
sdsfree(o);
zfree(o);
}
int listMatchObjects(void *a, void *b) {
@ -75,6 +78,8 @@ void linkClient(client *c) {
* this way removing the client in unlinkClient() will not require
* a linear scan, but just a constant time operation. */
c->client_list_node = listLast(server.clients);
uint64_t id = htonu64(c->id);
raxInsert(server.clients_index,(unsigned char*)&id,sizeof(id),c,NULL);
}
client *createClient(int fd) {
@ -138,6 +143,7 @@ client *createClient(int fd) {
c->bpop.target = NULL;
c->bpop.xread_group = NULL;
c->bpop.xread_consumer = NULL;
c->bpop.xread_group_noack = 0;
c->bpop.numreplicas = 0;
c->bpop.reploffset = 0;
c->woff = 0;
@ -237,25 +243,35 @@ int _addReplyToBuffer(client *c, const char *s, size_t len) {
void _addReplyStringToList(client *c, const char *s, size_t len) {
if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
if (listLength(c->reply) == 0) {
sds node = sdsnewlen(s,len);
listAddNodeTail(c->reply,node);
c->reply_bytes += len;
} else {
listNode *ln = listLast(c->reply);
sds tail = listNodeValue(ln);
listNode *ln = listLast(c->reply);
clientReplyBlock *tail = ln? listNodeValue(ln): NULL;
/* Append to this object when possible. If tail == NULL it was
* set via addDeferredMultiBulkLength(). */
if (tail && sdslen(tail)+len <= PROTO_REPLY_CHUNK_BYTES) {
tail = sdscatlen(tail,s,len);
listNodeValue(ln) = tail;
c->reply_bytes += len;
} else {
sds node = sdsnewlen(s,len);
listAddNodeTail(c->reply,node);
c->reply_bytes += len;
}
/* Note that 'tail' may be NULL even if we have a tail node, becuase when
* addDeferredMultiBulkLength() is used, it sets a dummy node to NULL just
* fo fill it later, when the size of the bulk length is set. */
/* Append to tail string when possible. */
if (tail) {
/* Copy the part we can fit into the tail, and leave the rest for a
* new node */
size_t avail = tail->size - tail->used;
size_t copy = avail >= len? len: avail;
memcpy(tail->buf + tail->used, s, copy);
tail->used += copy;
s += copy;
len -= copy;
}
if (len) {
/* Create a new node, make sure it is allocated to at
* least PROTO_REPLY_CHUNK_BYTES */
size_t size = len < PROTO_REPLY_CHUNK_BYTES? PROTO_REPLY_CHUNK_BYTES: len;
tail = zmalloc(size + sizeof(clientReplyBlock));
/* take over the allocation's internal fragmentation */
tail->size = zmalloc_usable(tail) - sizeof(clientReplyBlock);
tail->used = len;
memcpy(tail->buf, s, len);
listAddNodeTail(c->reply, tail);
c->reply_bytes += tail->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
@ -326,11 +342,30 @@ void addReplyErrorLength(client *c, const char *s, size_t len) {
if (!len || s[0] != '-') addReplyString(c,"-ERR ",5);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
if (c->flags & CLIENT_MASTER) {
/* Sometimes it could be normal that a slave replies to a master with
* an error and this function gets called. Actually the error will never
* be sent because addReply*() against master clients has no effect...
* A notable example is:
*
* EVAL 'redis.call("incr",KEYS[1]); redis.call("nonexisting")' 1 x
*
* Where the master must propagate the first change even if the second
* will produce an error. However it is useful to log such events since
* they are rare and may hint at errors in a script or a bug in Redis. */
if (c->flags & (CLIENT_MASTER|CLIENT_SLAVE)) {
char* to = c->flags & CLIENT_MASTER? "master": "slave";
char* from = c->flags & CLIENT_MASTER? "slave": "master";
char *cmdname = c->lastcmd ? c->lastcmd->name : "<unknown>";
serverLog(LL_WARNING,"== CRITICAL == This slave is sending an error "
"to its master: '%s' after processing the command "
"'%s'", s, cmdname);
serverLog(LL_WARNING,"== CRITICAL == This %s is sending an error "
"to its %s: '%s' after processing the command "
"'%s'", from, to, s, cmdname);
/* Here we want to panic because when a master is sending an
* error to some slave in the context of replication, this can
* only create some kind of offset or data desynchronization. Better
* to catch it ASAP and crash instead of continuing. */
if (c->flags & CLIENT_SLAVE)
serverPanic("Continuing is unsafe: replication protocol violation.");
}
}
@ -387,26 +422,41 @@ void *addDeferredMultiBulkLength(client *c) {
/* Populate the length object and try gluing it to the next chunk. */
void setDeferredMultiBulkLength(client *c, void *node, long length) {
listNode *ln = (listNode*)node;
sds len, next;
clientReplyBlock *next;
char lenstr[128];
size_t lenstr_len = sprintf(lenstr, "*%ld\r\n", length);
/* Abort when *node is NULL: when the client should not accept writes
* we return NULL in addDeferredMultiBulkLength() */
if (node == NULL) return;
serverAssert(!listNodeValue(ln));
len = sdscatprintf(sdsnewlen("*",1),"%ld\r\n",length);
listNodeValue(ln) = len;
c->reply_bytes += sdslen(len);
if (ln->next != NULL) {
next = listNodeValue(ln->next);
/* Only glue when the next node is non-NULL (an sds in this case) */
if (next != NULL) {
len = sdscatsds(len,next);
listDelNode(c->reply,ln->next);
listNodeValue(ln) = len;
/* No need to update c->reply_bytes: we are just moving the same
* amount of bytes from one node to another. */
}
/* Normally we fill this dummy NULL node, added by addDeferredMultiBulkLength(),
* with a new buffer structure containing the protocol needed to specify
* the length of the array following. However sometimes when there is
* little memory to move, we may instead remove this NULL node, and prefix
* our protocol in the node immediately after to it, in order to save a
* write(2) syscall later. Conditions needed to do it:
*
* - The next node is non-NULL,
* - It has enough room already allocated
* - And not too large (avoid large memmove) */
if (ln->next != NULL && (next = listNodeValue(ln->next)) &&
next->size - next->used >= lenstr_len &&
next->used < PROTO_REPLY_CHUNK_BYTES * 4) {
memmove(next->buf + lenstr_len, next->buf, next->used);
memcpy(next->buf, lenstr, lenstr_len);
next->used += lenstr_len;
listDelNode(c->reply,ln);
} else {
/* Create a new node */
clientReplyBlock *buf = zmalloc(lenstr_len + sizeof(clientReplyBlock));
/* Take over the allocation's internal fragmentation */
buf->size = zmalloc_usable(buf) - sizeof(clientReplyBlock);
buf->used = lenstr_len;
memcpy(buf->buf, lenstr, lenstr_len);
listNodeValue(ln) = buf;
c->reply_bytes += buf->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
@ -560,11 +610,24 @@ void addReplyHelp(client *c, const char **help) {
setDeferredMultiBulkLength(c,blenp,blen);
}
/* Add a suggestive error reply.
* This function is typically invoked by from commands that support
* subcommands in response to an unknown subcommand or argument error. */
void addReplySubcommandSyntaxError(client *c) {
sds cmd = sdsnew((char*) c->argv[0]->ptr);
sdstoupper(cmd);
addReplyErrorFormat(c,
"Unknown subcommand or wrong number of arguments for '%s'. Try %s HELP.",
(char*)c->argv[1]->ptr,cmd);
sdsfree(cmd);
}
/* Copy 'src' client output buffers into 'dst' client output buffers.
* The function takes care of freeing the old output buffers of the
* destination client. */
void copyClientOutputBuffer(client *dst, client *src) {
listRelease(dst->reply);
dst->sentlen = 0;
dst->reply = listDup(src->reply);
memcpy(dst->buf,src->buf,src->bufpos);
dst->bufpos = src->bufpos;
@ -720,6 +783,8 @@ void unlinkClient(client *c) {
if (c->fd != -1) {
/* Remove from the list of active clients. */
if (c->client_list_node) {
uint64_t id = htonu64(c->id);
raxRemove(server.clients_index,(unsigned char*)&id,sizeof(id),NULL);
listDelNode(server.clients,c->client_list_node);
c->client_list_node = NULL;
}
@ -864,12 +929,21 @@ void freeClientsInAsyncFreeQueue(void) {
}
}
/* Return a client by ID, or NULL if the client ID is not in the set
* of registered clients. Note that "fake clients", created with -1 as FD,
* are not registered clients. */
client *lookupClientByID(uint64_t id) {
id = htonu64(id);
client *c = raxFind(server.clients_index,(unsigned char*)&id,sizeof(id));
return (c == raxNotFound) ? NULL : c;
}
/* Write data in output buffers to client. Return C_OK if the client
* is still valid after the call, C_ERR if it was freed. */
int writeToClient(int fd, client *c, int handler_installed) {
ssize_t nwritten = 0, totwritten = 0;
size_t objlen;
sds o;
clientReplyBlock *o;
while(clientHasPendingReplies(c)) {
if (c->bufpos > 0) {
@ -886,23 +960,24 @@ int writeToClient(int fd, client *c, int handler_installed) {
}
} else {
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o);
objlen = o->used;
if (objlen == 0) {
c->reply_bytes -= o->size;
listDelNode(c->reply,listFirst(c->reply));
continue;
}
nwritten = write(fd, o + c->sentlen, objlen - c->sentlen);
nwritten = write(fd, o->buf + c->sentlen, objlen - c->sentlen);
if (nwritten <= 0) break;
c->sentlen += nwritten;
totwritten += nwritten;
/* If we fully sent the object on head go to the next one */
if (c->sentlen == objlen) {
c->reply_bytes -= o->size;
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
c->reply_bytes -= objlen;
/* If there are no longer objects in the list, we expect
* the count of reply bytes to be exactly zero. */
if (listLength(c->reply) == 0)
@ -1039,7 +1114,7 @@ void resetClient(client *c) {
* with the error and close the connection. */
int processInlineBuffer(client *c) {
char *newline;
int argc, j;
int argc, j, linefeed_chars = 1;
sds *argv, aux;
size_t querylen;
@ -1057,7 +1132,7 @@ int processInlineBuffer(client *c) {
/* Handle the \r\n case. */
if (newline && newline != c->querybuf && *(newline-1) == '\r')
newline--;
newline--, linefeed_chars++;
/* Split the input buffer up to the \r\n */
querylen = newline-(c->querybuf);
@ -1077,7 +1152,7 @@ int processInlineBuffer(client *c) {
c->repl_ack_time = server.unixtime;
/* Leave data after the first line of the query in the buffer */
sdsrange(c->querybuf,querylen+2,-1);
sdsrange(c->querybuf,querylen+linefeed_chars,-1);
/* Setup argv array on client structure */
if (argc) {
@ -1493,6 +1568,7 @@ sds catClientInfoString(sds s, client *client) {
*p++ = 'S';
}
if (client->flags & CLIENT_MASTER) *p++ = 'M';
if (client->flags & CLIENT_PUBSUB) *p++ = 'P';
if (client->flags & CLIENT_MULTI) *p++ = 'x';
if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd';
@ -1531,7 +1607,7 @@ sds catClientInfoString(sds s, client *client) {
client->lastcmd ? client->lastcmd->name : "NULL");
}
sds getAllClientsInfoString(void) {
sds getAllClientsInfoString(int type) {
listNode *ln;
listIter li;
client *client;
@ -1540,6 +1616,7 @@ sds getAllClientsInfoString(void) {
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
client = listNodeValue(ln);
if (type != -1 && getClientType(client) != type) continue;
o = catClientInfoString(o,client);
o = sdscatlen(o,"\n",1);
}
@ -1553,22 +1630,40 @@ void clientCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"getname -- Return the name of the current connection.",
"kill <ip:port> -- Kill connection made from <ip:port>.",
"id -- Return the ID of the current connection.",
"getname -- Return the name of the current connection.",
"kill <ip:port> -- Kill connection made from <ip:port>.",
"kill <option> <value> [option value ...] -- Kill connections. Options are:",
" addr <ip:port> -- Kill connection made from <ip:port>.",
" type (normal|master|slave|pubsub) -- Kill connections by type.",
" skipme (yes|no) -- Skip killing current connection (default: yes).",
"list -- Return information about client connections.",
"pause <timeout> -- Suspend all Redis clients for <timout> milliseconds.",
"reply (on|off|skip) -- Control the replies sent to the current connection.",
"setname <name> -- Assign the name <name> to the current connection.",
" addr <ip:port> -- Kill connection made from <ip:port>",
" type (normal|master|slave|pubsub) -- Kill connections by type.",
" skipme (yes|no) -- Skip killing current connection (default: yes).",
"list [options ...] -- Return information about client connections. Options:",
" type (normal|master|slave|pubsub) -- Return clients of specified type.",
"pause <timeout> -- Suspend all Redis clients for <timout> milliseconds.",
"reply (on|off|skip) -- Control the replies sent to the current connection.",
"setname <name> -- Assign the name <name> to the current connection.",
"unblock <clientid> [TIMEOUT|ERROR] -- Unblock the specified blocked client.",
NULL
};
addReplyHelp(c, help);
} else if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) {
} else if (!strcasecmp(c->argv[1]->ptr,"id") && c->argc == 2) {
/* CLIENT ID */
addReplyLongLong(c,c->id);
} else if (!strcasecmp(c->argv[1]->ptr,"list")) {
/* CLIENT LIST */
sds o = getAllClientsInfoString();
int type = -1;
if (c->argc == 4 && !strcasecmp(c->argv[2]->ptr,"type")) {
type = getClientTypeByName(c->argv[3]->ptr);
if (type == -1) {
addReplyErrorFormat(c,"Unknown client type '%s'",
(char*) c->argv[3]->ptr);
return;
}
} else if (c->argc != 2) {
addReply(c,shared.syntaxerr);
return;
}
sds o = getAllClientsInfoString(type);
addReplyBulkCBuffer(c,o,sdslen(o));
sdsfree(o);
} else if (!strcasecmp(c->argv[1]->ptr,"reply") && c->argc == 3) {
@ -1671,6 +1766,38 @@ NULL
/* If this client has to be closed, flag it as CLOSE_AFTER_REPLY
* only after we queued the reply to its output buffers. */
if (close_this_client) c->flags |= CLIENT_CLOSE_AFTER_REPLY;
} else if (!strcasecmp(c->argv[1]->ptr,"unblock") && (c->argc == 3 ||
c->argc == 4))
{
/* CLIENT UNBLOCK <id> [timeout|error] */
long long id;
int unblock_error = 0;
if (c->argc == 4) {
if (!strcasecmp(c->argv[3]->ptr,"timeout")) {
unblock_error = 0;
} else if (!strcasecmp(c->argv[3]->ptr,"error")) {
unblock_error = 1;
} else {
addReplyError(c,
"CLIENT UNBLOCK reason should be TIMEOUT or ERROR");
return;
}
}
if (getLongLongFromObjectOrReply(c,c->argv[2],&id,NULL)
!= C_OK) return;
struct client *target = lookupClientByID(id);
if (target && target->flags & CLIENT_BLOCKED) {
if (unblock_error)
addReplyError(target,
"-UNBLOCKED client unblocked via CLIENT UNBLOCK");
else
replyToBlockedClientTimedOut(target);
unblockClient(target);
addReply(c,shared.cone);
} else {
addReply(c,shared.czero);
}
} else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) {
int j, len = sdslen(c->argv[2]->ptr);
char *p = c->argv[2]->ptr;
@ -1821,10 +1948,7 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) {
* the caller wishes. The main usage of this function currently is
* enforcing the client output length limits. */
unsigned long getClientOutputBufferMemoryUsage(client *c) {
unsigned long list_item_size = sizeof(listNode)+5;
/* The +5 above means we assume an sds16 hdr, may not be true
* but is not going to be a problem. */
unsigned long list_item_size = sizeof(listNode) + sizeof(clientReplyBlock);
return c->reply_bytes + (list_item_size*listLength(c->reply));
}

View File

@ -29,8 +29,8 @@
#include "server.h"
/* This file implements keyspace events notification via Pub/Sub ad
* described at http://redis.io/topics/keyspace-events. */
/* This file implements keyspace events notification via Pub/Sub and
* described at https://redis.io/topics/notifications. */
/* Turn a string representing notification classes into an integer
* representing notification classes flags xored.

View File

@ -123,9 +123,25 @@ robj *createStringObject(const char *ptr, size_t len) {
return createRawStringObject(ptr,len);
}
robj *createStringObjectFromLongLong(long long value) {
/* Create a string object from a long long value. When possible returns a
* shared integer object, or at least an integer encoded one.
*
* If valueobj is non zero, the function avoids returning a a shared
* integer, because the object is going to be used as value in the Redis key
* space (for instance when the INCR command is used), so we want LFU/LRU
* values specific for each key. */
robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
robj *o;
if (value >= 0 && value < OBJ_SHARED_INTEGERS) {
if (server.maxmemory == 0 ||
!(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
{
/* If the maxmemory policy permits, we can still return shared integers
* even if valueobj is true. */
valueobj = 0;
}
if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
@ -140,6 +156,20 @@ robj *createStringObjectFromLongLong(long long value) {
return o;
}
/* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
* to create a shared object if possible. */
robj *createStringObjectFromLongLong(long long value) {
return createStringObjectFromLongLongWithOptions(value,0);
}
/* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
* object when LFU/LRU info are needed, that is, when the object is used
* as a value in the key space, and Redis is configured to evict based on
* LFU/LRU. */
robj *createStringObjectFromLongLongForValue(long long value) {
return createStringObjectFromLongLongWithOptions(value,1);
}
/* Create a string object from a long double. If humanfriendly is non-zero
* it does not use exponential format and trims trailing zeroes at the end,
* however this results in loss of precision. Otherwise exp format is used
@ -715,7 +745,7 @@ char *strEncoding(int encoding) {
* size of a radix tree that is used to store Stream IDs.
*
* Note: to guess the size of the radix tree is not trivial, so we
* approximate it considering 128 bytes of data overhead for each
* approximate it considering 16 bytes of data overhead for each
* key (the ID), and then adding the number of bare nodes, plus some
* overhead due by the data and child pointers. This secret recipe
* was obtained by checking the average radix tree created by real
@ -874,6 +904,7 @@ size_t objectComputeSize(robj *o, size_t sample_size) {
* structures and the PEL memory usage. */
raxIterator cri;
raxStart(&cri,cg->consumers);
raxSeek(&cri,"^",NULL,0);
while(raxNext(&cri)) {
streamConsumer *consumer = cri.data;
asize += sizeof(*consumer);
@ -968,7 +999,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
listRewind(server.clients,&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
if (c->flags & CLIENT_SLAVE)
if (c->flags & CLIENT_SLAVE && !(c->flags & CLIENT_MONITOR))
continue;
mem += getClientOutputBufferMemoryUsage(c);
mem += sdsAllocSize(c->querybuf);
@ -1136,6 +1167,32 @@ sds getMemoryDoctorReport(void) {
return s;
}
/* Set the object LRU/LFU depending on server.maxmemory_policy.
* The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
* The lru_idle and lru_clock args are only relevant if policy
* is MAXMEMORY_FLAG_LRU.
* Either or both of them may be <0, in that case, nothing is set. */
void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
long long lru_clock) {
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
if (lfu_freq >= 0) {
serverAssert(lfu_freq <= 255);
val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
}
} else if (lru_idle >= 0) {
/* Serialized LRU idle time is in seconds. Scale
* according to the LRU clock resolution this Redis
* instance was compiled with (normally 1000 ms, so the
* below statement will expand to lru_idle*1000/1000. */
lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
val->lru = lru_clock - lru_idle;
/* If the lru field overflows (since LRU it is a wrapping
* clock), the best we can do is to provide the maximum
* representable idle time. */
if (val->lru < 0) val->lru = lru_clock+1;
}
}
/* ======================= The OBJECT and MEMORY commands =================== */
/* This is a helper function for the OBJECT command. We need to lookup keys
@ -1161,10 +1218,10 @@ void objectCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"encoding <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
"freq <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
"idletime <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
"refcount <key> -- Return the number of references of the value associated with the specified key.",
"ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
"FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
"IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
"REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
NULL
};
addReplyHelp(c, help);
@ -1197,7 +1254,7 @@ NULL
* when the key is read or overwritten. */
addReplyLongLong(c,LFUDecrAndReturn(o));
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try OBJECT help", (char *)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -327,9 +327,9 @@ void publishCommand(client *c) {
void pubsubCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"channels [<pattern>] -- Return the currently active channels matching a pattern (default: all).",
"numpat -- Return number of subscriptions to patterns.",
"numsub [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).",
"CHANNELS [<pattern>] -- Return the currently active channels matching a pattern (default: all).",
"NUMPAT -- Return number of subscriptions to patterns.",
"NUMSUB [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).",
NULL
};
addReplyHelp(c, help);
@ -372,7 +372,6 @@ NULL
/* PUBSUB NUMPAT */
addReplyLongLong(c,listLength(server.pubsub_patterns));
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try PUBSUB HELP",
(char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -1636,7 +1636,7 @@ int quicklistTest(int argc, char *argv[]) {
TEST("add to tail of empty list") {
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushTail(ql, "hello", 6);
/* 1 for head and 1 for tail beacuse 1 node = head = tail */
/* 1 for head and 1 for tail because 1 node = head = tail */
ql_verify(ql, 1, 1, 1, 1);
quicklistRelease(ql);
}
@ -1644,7 +1644,7 @@ int quicklistTest(int argc, char *argv[]) {
TEST("add to head of empty list") {
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushHead(ql, "hello", 6);
/* 1 for head and 1 for tail beacuse 1 node = head = tail */
/* 1 for head and 1 for tail because 1 node = head = tail */
ql_verify(ql, 1, 1, 1, 1);
quicklistRelease(ql);
}

View File

@ -359,7 +359,18 @@ raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **chi
* parent's node is returned as '*plink' if not NULL. Finally, if the
* search stopped in a compressed node, '*splitpos' returns the index
* inside the compressed node where the search ended. This is useful to
* know where to split the node for insertion. */
* know where to split the node for insertion.
*
* Note that when we stop in the middle of a compressed node with
* a perfect match, this function will return a length equal to the
* 'len' argument (all the key matched), and will return a *splitpos which is
* always positive (that will represent the index of the character immediately
* *after* the last match in the current compressed node).
*
* When instead we stop at a compressed node and *splitpos is zero, it
* means that the current node represents the key (that is, none of the
* compressed node characters are needed to represent the key, just all
* its parents nodes). */
static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
raxNode *h = rax->head;
raxNode **parentlink = &rax->head;
@ -405,10 +416,12 @@ static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode
/* Insert the element 's' of size 'len', setting as auxiliary data
* the pointer 'data'. If the element is already present, the associated
* data is updated, and 0 is returned, otherwise the element is inserted
* and 1 is returned. On out of memory the function returns 0 as well but
* sets errno to ENOMEM, otherwise errno will be set to 0. */
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
* data is updated (only if 'overwrite' is set to 1), and 0 is returned,
* otherwise the element is inserted and 1 is returned. On out of memory the
* function returns 0 as well but sets errno to ENOMEM, otherwise errno will
* be set to 0.
*/
int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
size_t i;
int j = 0; /* Split position. If raxLowWalk() stops in a compressed
node, the index 'j' represents the char we stopped within the
@ -426,7 +439,8 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
* data pointer. */
if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
debugf("### Insert: node representing key exists\n");
if (!h->iskey || h->isnull) {
/* Make space for the value pointer if needed. */
if (!h->iskey || (h->isnull && overwrite)) {
h = raxReallocForData(h,data);
if (h) memcpy(parentlink,&h,sizeof(h));
}
@ -434,12 +448,17 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
errno = ENOMEM;
return 0;
}
/* Update the existing key if there is already one. */
if (h->iskey) {
if (old) *old = raxGetData(h);
raxSetData(h,data);
if (overwrite) raxSetData(h,data);
errno = 0;
return 0; /* Element already exists. */
}
/* Otherwise set the node as a key. Note that raxSetData()
* will set h->iskey. */
raxSetData(h,data);
rax->numele++;
return 1; /* Element inserted. */
@ -448,7 +467,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
/* If the node we stopped at is a compressed node, we need to
* split it before to continue.
*
* Splitting a compressed node have a few possibile cases.
* Splitting a compressed node have a few possible cases.
* Imagine that the node 'h' we are currently at is a compressed
* node contaning the string "ANNIBALE" (it means that it represents
* nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
@ -730,7 +749,7 @@ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
cp = raxNodeLastChildPtr(trimmed);
memcpy(cp,&postfix,sizeof(postfix));
/* Finish! We don't need to contine with the insertion
/* Finish! We don't need to continue with the insertion
* algorithm for ALGO 2. The key is already inserted. */
rax->numele++;
rax_free(h);
@ -793,6 +812,19 @@ oom:
return 0;
}
/* Overwriting insert. Just a wrapper for raxGenericInsert() that will
* update the element if there is already one for the same key. */
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
return raxGenericInsert(rax,s,len,data,old,1);
}
/* Non overwriting insert function: this if an element with the same key
* exists, the value is not updated and the function returns 0.
* This is a just a wrapper for raxGenericInsert(). */
int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
return raxGenericInsert(rax,s,len,data,old,0);
}
/* Find a key in the rax, returns raxNotFound special void pointer value
* if the item was not found, otherwise the value associated with the
* item is returned. */
@ -1135,6 +1167,7 @@ void raxStart(raxIterator *it, rax *rt) {
it->key = it->key_static_string;
it->key_max = RAX_ITER_STATIC_LEN;
it->data = NULL;
it->node_cb = NULL;
raxStackInit(&it->stack);
}
@ -1208,6 +1241,10 @@ int raxIteratorNextStep(raxIterator *it, int noup) {
if (!raxIteratorAddChars(it,it->node->data,
it->node->iscompr ? it->node->size : 1)) return 0;
memcpy(&it->node,cp,sizeof(it->node));
/* Call the node callback if any, and replace the node pointer
* if the callback returns true. */
if (it->node_cb && it->node_cb(&it->node))
memcpy(cp,&it->node,sizeof(it->node));
/* For "next" step, stop every time we find a key along the
* way, since the key is lexicograhically smaller compared to
* what follows in the sub-children. */
@ -1260,6 +1297,10 @@ int raxIteratorNextStep(raxIterator *it, int noup) {
raxIteratorAddChars(it,it->node->data+i,1);
if (!raxStackPush(&it->stack,it->node)) return 0;
memcpy(&it->node,cp,sizeof(it->node));
/* Call the node callback if any, and replace the node
* pointer if the callback returns true. */
if (it->node_cb && it->node_cb(&it->node))
memcpy(cp,&it->node,sizeof(it->node));
if (it->node->iskey) {
it->data = raxGetData(it->node);
return 1;
@ -1293,7 +1334,7 @@ int raxSeekGreatest(raxIterator *it) {
/* Like raxIteratorNextStep() but implements an iteration step moving
* to the lexicographically previous element. The 'noup' option has a similar
* effect to the one of raxIteratorPrevSte(). */
* effect to the one of raxIteratorNextStep(). */
int raxIteratorPrevStep(raxIterator *it, int noup) {
if (it->flags & RAX_ITER_EOF) {
return 1;
@ -1523,11 +1564,26 @@ int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
/* If there was no mismatch we are into a node representing the
* key, (but which is not a key or the seek operator does not
* include 'eq'), or we stopped in the middle of a compressed node
* after processing all the key. Cotinue iterating as this was
* after processing all the key. Continue iterating as this was
* a legitimate key we stopped at. */
it->flags &= ~RAX_ITER_JUST_SEEKED;
if (gt && !raxIteratorNextStep(it,0)) return 0;
if (lt && !raxIteratorPrevStep(it,0)) return 0;
if (it->node->iscompr && it->node->iskey && splitpos && lt) {
/* If we stopped in the middle of a compressed node with
* perfect match, and the condition is to seek a key "<" than
* the specified one, then if this node is a key it already
* represents our match. For instance we may have nodes:
*
* "f" -> "oobar" = 1 -> "" = 2
*
* Representing keys "f" = 1, "foobar" = 2. A seek for
* the key < "foo" will stop in the middle of the "oobar"
* node, but will be our match, representing the key "f".
*
* So in that case, we don't seek backward. */
} else {
if (gt && !raxIteratorNextStep(it,0)) return 0;
if (lt && !raxIteratorPrevStep(it,0)) return 0;
}
it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
}
} else {

View File

@ -94,7 +94,7 @@ typedef struct raxNode {
*
* If the node has an associated key (iskey=1) and is not NULL
* (isnull=0), then after the raxNode pointers poiting to the
* childen, an additional value pointer is present (as you can see
* children, an additional value pointer is present (as you can see
* in the representation above as "value-ptr" field).
*/
unsigned char data[];
@ -119,6 +119,21 @@ typedef struct raxStack {
int oom; /* True if pushing into this stack failed for OOM at some point. */
} raxStack;
/* Optional callback used for iterators and be notified on each rax node,
* including nodes not representing keys. If the callback returns true
* the callback changed the node pointer in the iterator structure, and the
* iterator implementation will have to replace the pointer in the radix tree
* internals. This allows the callback to reallocate the node to perform
* very special operations, normally not needed by normal applications.
*
* This callback is used to perform very low level analysis of the radix tree
* structure, scanning each possible node (but the root node), or in order to
* reallocate the nodes to reduce the allocation fragmentation (this is the
* Redis application for this callback).
*
* This is currently only supported in forward iterations (raxNext) */
typedef int (*raxNodeCallback)(raxNode **noderef);
/* Radix tree iterator state is encapsulated into this data structure. */
#define RAX_ITER_STATIC_LEN 128
#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
@ -137,6 +152,7 @@ typedef struct raxIterator {
unsigned char key_static_string[RAX_ITER_STATIC_LEN];
raxNode *node; /* Current node. Only for unsafe iteration. */
raxStack stack; /* Stack used for unsafe iteration. */
raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
} raxIterator;
/* A special pointer returned for not found items. */
@ -145,6 +161,7 @@ extern void *raxNotFound;
/* Exported API. */
rax *raxNew(void);
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
void *raxFind(rax *rax, unsigned char *s, size_t len);
void raxFree(rax *rax);
@ -160,4 +177,8 @@ int raxEOF(raxIterator *it);
void raxShow(rax *rax);
uint64_t raxSize(rax *rax);
/* Internal API. May be used by the node callback in order to access rax nodes
* in a low level way, so this function is exported as well. */
void raxSetData(raxNode *n, void *data);
#endif

View File

@ -100,6 +100,9 @@ int rdbLoadType(rio *rdb) {
return type;
}
/* This is only used to load old databases stored with the RDB_OPCODE_EXPIRETIME
* opcode. New versions of Redis store using the RDB_OPCODE_EXPIRETIME_MS
* opcode. */
time_t rdbLoadTime(rio *rdb) {
int32_t t32;
rdbLoadRaw(rdb,&t32,4);
@ -108,12 +111,26 @@ time_t rdbLoadTime(rio *rdb) {
int rdbSaveMillisecondTime(rio *rdb, long long t) {
int64_t t64 = (int64_t) t;
memrev64ifbe(&t64); /* Store in little endian. */
return rdbWriteRaw(rdb,&t64,8);
}
long long rdbLoadMillisecondTime(rio *rdb) {
/* This function loads a time from the RDB file. It gets the version of the
* RDB because, unfortunately, before Redis 5 (RDB version 9), the function
* failed to convert data to/from little endian, so RDB files with keys having
* expires could not be shared between big endian and little endian systems
* (because the expire time will be totally wrong). The fix for this is just
* to call memrev64ifbe(), however if we fix this for all the RDB versions,
* this call will introduce an incompatibility for big endian systems:
* after upgrading to Redis version 5 they will no longer be able to load their
* own old RDB files. Because of that, we instead fix the function only for new
* RDB versions, and load older RDB versions as we used to do in the past,
* allowing big endian systems to load their own old RDB files. */
long long rdbLoadMillisecondTime(rio *rdb, int rdbver) {
int64_t t64;
rdbLoadRaw(rdb,&t64,8);
if (rdbver >= 9) /* Check the top comment of this function. */
memrev64ifbe(&t64); /* Convert in big endian if the system is BE. */
return (long long)t64;
}
@ -271,7 +288,7 @@ void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
memcpy(p,buf,len);
return p;
} else if (encode) {
return createStringObjectFromLongLong(val);
return createStringObjectFromLongLongForValue(val);
} else {
return createObject(OBJ_STRING,sdsfromlonglong(val));
}
@ -988,8 +1005,7 @@ size_t rdbSavedObjectLen(robj *o) {
* On error -1 is returned.
* On success if the key was actually saved 1 is returned, otherwise 0
* is returned (the key was already expired). */
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime)
{
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime) {
int savelru = server.maxmemory_policy & MAXMEMORY_FLAG_LRU;
int savelfu = server.maxmemory_policy & MAXMEMORY_FLAG_LFU;
@ -1001,7 +1017,7 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime)
/* Save the LRU info. */
if (savelru) {
int idletime = estimateObjectIdleTime(val);
uint64_t idletime = estimateObjectIdleTime(val);
idletime /= 1000; /* Using seconds is enough and requires less space.*/
if (rdbSaveType(rdb,RDB_OPCODE_IDLE) == -1) return -1;
if (rdbSaveLen(rdb,idletime) == -1) return -1;
@ -1111,13 +1127,9 @@ int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) {
* is currently the largest type we are able to represent in RDB sizes.
* However this does not limit the actual size of the DB to load since
* these sizes are just hints to resize the hash tables. */
uint32_t db_size, expires_size;
db_size = (dictSize(db->dict) <= UINT32_MAX) ?
dictSize(db->dict) :
UINT32_MAX;
expires_size = (dictSize(db->expires) <= UINT32_MAX) ?
dictSize(db->expires) :
UINT32_MAX;
uint64_t db_size, expires_size;
db_size = dictSize(db->dict);
expires_size = dictSize(db->expires);
if (rdbSaveType(rdb,RDB_OPCODE_RESIZEDB) == -1) goto werr;
if (rdbSaveLen(rdb,db_size) == -1) goto werr;
if (rdbSaveLen(rdb,expires_size) == -1) goto werr;
@ -1225,6 +1237,10 @@ int rdbSave(char *filename, rdbSaveInfo *rsi) {
}
rioInitWithFile(&rdb,fp);
if (server.rdb_save_incremental_fsync)
rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES);
if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE,rsi) == C_ERR) {
errno = error;
goto werr;
@ -1441,6 +1457,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
o = createZsetObject();
zs = o->ptr;
if (zsetlen > DICT_HT_INITIAL_SIZE)
dictExpand(zs->dict,zsetlen);
/* Load every single element of the sorted set. */
while(zsetlen--) {
sds sdsele;
@ -1509,6 +1528,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
sdsfree(value);
}
if (o->encoding == OBJ_ENCODING_HT && len > DICT_HT_INITIAL_SIZE)
dictExpand(o->ptr,len);
/* Load remaining fields and values into the hash table */
while (o->encoding == OBJ_ENCODING_HT && len > 0) {
len--;
@ -1636,7 +1658,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
if (first == NULL) {
/* Serialized listpacks should never be empty, since on
* deletion we should remove the radix tree key if the
* resulting listpack is emtpy. */
* resulting listpack is empty. */
rdbExitReportCorruptRDB("Empty listpack inside stream");
}
@ -1683,7 +1705,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
unsigned char rawid[sizeof(streamID)];
rdbLoadRaw(rdb,rawid,sizeof(rawid));
streamNACK *nack = streamCreateNACK(NULL);
nack->delivery_time = rdbLoadMillisecondTime(rdb);
nack->delivery_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
nack->delivery_count = rdbLoadLen(rdb,NULL);
if (!raxInsert(cgroup->pel,rawid,sizeof(rawid),nack,NULL))
rdbExitReportCorruptRDB("Duplicated gobal PEL entry "
@ -1702,7 +1724,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
streamConsumer *consumer = streamLookupConsumer(cgroup,cname,
1);
sdsfree(cname);
consumer->seen_time = rdbLoadMillisecondTime(rdb);
consumer->seen_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
/* Load the PEL about entries owned by this specific
* consumer. */
@ -1845,11 +1867,9 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
}
/* Key-specific attributes, set by opcodes before the key type. */
long long expiretime = -1, now = mstime();
long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
long long lru_clock = LRU_CLOCK();
uint64_t lru_idle = -1;
int lfu_freq = -1;
while(1) {
robj *key, *val;
@ -1867,7 +1887,7 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
} else if (type == RDB_OPCODE_EXPIRETIME_MS) {
/* EXPIRETIME_MS: milliseconds precision expire times introduced
* with RDB v3. Like EXPIRETIME but no with more precision. */
expiretime = rdbLoadMillisecondTime(rdb);
expiretime = rdbLoadMillisecondTime(rdb,rdbver);
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_FREQ) {
/* FREQ: LFU frequency. */
@ -1877,7 +1897,9 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_IDLE) {
/* IDLE: LRU idle time. */
if ((lru_idle = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
uint64_t qword;
if ((qword = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
lru_idle = qword;
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_EOF) {
/* EOF: End of file, exit the main loop. */
@ -1996,20 +2018,9 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
/* Set the expire time if needed */
if (expiretime != -1) setExpire(NULL,db,key,expiretime);
if (lfu_freq != -1) {
val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
} else {
/* LRU idle time loaded from RDB is in seconds. Scale
* according to the LRU clock resolution this Redis
* instance was compiled with (normaly 1000 ms, so the
* below statement will expand to lru_idle*1000/1000. */
lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
val->lru = lru_clock - lru_idle;
/* If the lru field overflows (since LRU it is a wrapping
* clock), the best we can do is to provide the maxium
* representable idle time. */
if (val->lru < 0) val->lru = lru_clock+1;
}
/* Set usage information (for eviction). */
objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock);
/* Decrement the key refcount since dbAdd() will take its
* own reference. */
@ -2088,7 +2099,7 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
* tirggering an error conditon. */
* tirggering an error condition. */
if (bysignal != SIGUSR1)
server.lastbgsave_status = C_ERR;
}
@ -2125,7 +2136,7 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
* in error state.
*
* If the process returned an error, consider the list of slaves that
* can continue to be emtpy, so that it's just a special case of the
* can continue to be empty, so that it's just a special case of the
* normal code path. */
ok_slaves = zmalloc(sizeof(uint64_t)); /* Make space for the count. */
ok_slaves[0] = 0;

View File

@ -129,6 +129,8 @@ int rdbLoadType(rio *rdb);
int rdbSaveTime(rio *rdb, time_t t);
time_t rdbLoadTime(rio *rdb);
int rdbSaveLen(rio *rdb, uint64_t len);
int rdbSaveMillisecondTime(rio *rdb, long long t);
long long rdbLoadMillisecondTime(rio *rdb, int rdbver);
uint64_t rdbLoadLen(rio *rdb, int *isencoded);
int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr);
int rdbSaveObjectType(rio *rdb, robj *o);

View File

@ -34,7 +34,6 @@
void createSharedObjects(void);
void rdbLoadProgressCallback(rio *r, const void *buf, size_t len);
long long rdbLoadMillisecondTime(rio *rdb);
int rdbCheckMode = 0;
struct {
@ -224,7 +223,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
/* EXPIRETIME_MS: milliseconds precision expire times introduced
* with RDB v3. Like EXPIRETIME but no with more precision. */
rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
if ((expiretime = rdbLoadMillisecondTime(&rdb, rdbver)) == -1) goto eoferr;
continue; /* Read next opcode. */
} else if (type == RDB_OPCODE_FREQ) {
/* FREQ: LFU frequency. */
@ -287,12 +286,8 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
/* Read value */
rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE;
if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
/* Check if the key already expired. This function is used when loading
* an RDB file from disk, either at startup, or when an RDB was
* received from the master. In the latter case, the master is
* responsible for key expiry. If we would expire keys here, the
* snapshot taken by the master may not be reflected on the slave. */
if (server.masterhost == NULL && expiretime != -1 && expiretime < now)
/* Check if the key already expired. */
if (expiretime != -1 && expiretime < now)
rdbstate.already_expired++;
if (expiretime != -1) rdbstate.expires++;
rdbstate.key = NULL;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* redisassert.h -- Drop in replacemnet assert.h that prints the stack trace
/* redisassert.h -- Drop in replacements assert.h that prints the stack trace
* in the Redis logs.
*
* This file should be included instead of "assert.h" inside libraries used by

View File

@ -553,7 +553,7 @@ need_full_resync:
* Side effects, other than starting a BGSAVE:
*
* 1) Handle the slaves in WAIT_START state, by preparing them for a full
* sync if the BGSAVE was succesfully started, or sending them an error
* sync if the BGSAVE was successfully started, or sending them an error
* and dropping them from the list of slaves.
*
* 2) Flush the Lua scripting script cache if the BGSAVE was actually
@ -896,7 +896,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
}
}
/* If the preamble was already transfered, send the RDB bulk data. */
/* If the preamble was already transferred, send the RDB bulk data. */
lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN);
if (buflen <= 0) {
@ -965,7 +965,7 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
replicationGetSlaveName(slave));
/* Note: we wait for a REPLCONF ACK message from slave in
* order to really put it online (install the write handler
* so that the accumulated data can be transfered). However
* so that the accumulated data can be transferred). However
* we change the replication state ASAP, since our slave
* is technically online now. */
slave->replstate = SLAVE_STATE_ONLINE;
@ -1048,7 +1048,7 @@ int slaveIsInHandshakeState(void) {
/* Avoid the master to detect the slave is timing out while loading the
* RDB file in initial synchronization. We send a single newline character
* that is valid protocol but is guaranteed to either be sent entierly or
* that is valid protocol but is guaranteed to either be sent entirely or
* not, since the byte is indivisible.
*
* The function is called in two contexts: while we flush the current
@ -1105,7 +1105,7 @@ void restartAOF() {
#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
char buf[4096];
ssize_t nread, readlen;
ssize_t nread, readlen, nwritten;
off_t left;
UNUSED(el);
UNUSED(privdata);
@ -1206,8 +1206,9 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
}
server.repl_transfer_lastio = server.unixtime;
if (write(server.repl_transfer_fd,buf,nread) != nread) {
serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s", strerror(errno));
if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) {
serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s",
(nwritten == -1) ? strerror(errno) : "short write");
goto error;
}
server.repl_transfer_read += nread;
@ -1278,6 +1279,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
close(server.repl_transfer_fd);
replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
server.repl_state = REPL_STATE_CONNECTED;
server.repl_down_since = 0;
/* After a full resynchroniziation we use the replication ID and
* offset of the master. The secondary ID / offset are cleared since
* we are starting a new history. */
@ -1314,24 +1316,31 @@ error:
#define SYNC_CMD_FULL (SYNC_CMD_READ|SYNC_CMD_WRITE)
char *sendSynchronousCommand(int flags, int fd, ...) {
/* Create the command to send to the master, we use simple inline
* protocol for simplicity as currently we only send simple strings. */
/* Create the command to send to the master, we use redis binary
* protocol to make sure correct arguments are sent. This function
* is not safe for all binary data. */
if (flags & SYNC_CMD_WRITE) {
char *arg;
va_list ap;
sds cmd = sdsempty();
sds cmdargs = sdsempty();
size_t argslen = 0;
va_start(ap,fd);
while(1) {
arg = va_arg(ap, char*);
if (arg == NULL) break;
if (sdslen(cmd) != 0) cmd = sdscatlen(cmd," ",1);
cmd = sdscat(cmd,arg);
cmdargs = sdscatprintf(cmdargs,"$%zu\r\n%s\r\n",strlen(arg),arg);
argslen++;
}
cmd = sdscatlen(cmd,"\r\n",2);
va_end(ap);
cmd = sdscatprintf(cmd,"*%zu\r\n",argslen);
cmd = sdscatsds(cmd,cmdargs);
sdsfree(cmdargs);
/* Transfer command to the server. */
if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
== -1)
@ -1388,7 +1397,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
*
* The function returns:
*
* PSYNC_CONTINUE: If the PSYNC command succeded and we can continue.
* PSYNC_CONTINUE: If the PSYNC command succeeded and we can continue.
* PSYNC_FULLRESYNC: If PSYNC is supported but a full resync is needed.
* In this case the master run_id and global replication
* offset is saved.
@ -1942,7 +1951,6 @@ void replicationSetMaster(char *ip, int port) {
* our own parameters, to later PSYNC with the new master. */
if (was_master) replicationCacheMasterUsingMyself();
server.repl_state = REPL_STATE_CONNECT;
server.repl_down_since = 0;
}
/* Cancel replication, setting the instance as a master itself. */
@ -2112,7 +2120,7 @@ void replicationSendAck(void) {
* functions. */
/* This function is called by freeClient() in order to cache the master
* client structure instead of destryoing it. freeClient() will return
* client structure instead of destroying it. freeClient() will return
* ASAP after this function returns, so every action needed to avoid problems
* with a client that is really "suspended" has to be done by this function.
*
@ -2140,6 +2148,8 @@ void replicationCacheMaster(client *c) {
server.master->read_reploff = server.master->reploff;
if (c->flags & CLIENT_MULTI) discardTransaction(c);
listEmpty(c->reply);
c->sentlen = 0;
c->reply_bytes = 0;
c->bufpos = 0;
resetClient(c);
@ -2209,6 +2219,7 @@ void replicationResurrectCachedMaster(int newfd) {
server.master->authenticated = 1;
server.master->lastinteraction = server.unixtime;
server.repl_state = REPL_STATE_CONNECTED;
server.repl_down_since = 0;
/* Re-add to the list of clients. */
linkClient(server.master);

View File

@ -116,7 +116,7 @@ static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
r->io.file.buffered >= r->io.file.autosync)
{
fflush(r->io.file.fp);
aof_fsync(fileno(r->io.file.fp));
redis_fsync(fileno(r->io.file.fp));
r->io.file.buffered = 0;
}
return retval;

View File

@ -575,9 +575,9 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
reply = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
sds o = listNodeValue(listFirst(c->reply));
clientReplyBlock *o = listNodeValue(listFirst(c->reply));
reply = sdscatsds(reply,o);
reply = sdscatlen(reply,o->buf,o->used);
listDelNode(c->reply,listFirst(c->reply));
}
}
@ -1457,11 +1457,11 @@ void evalShaCommand(client *c) {
void scriptCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"debug (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
"exists <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
"flush -- Flush the Lua scripts cache. Very dangerous on slaves.",
"kill -- Kill the currently executing Lua script.",
"load <script> -- Load a script into the scripts cache, without executing it.",
"DEBUG (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
"EXISTS <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
"FLUSH -- Flush the Lua scripts cache. Very dangerous on slaves.",
"KILL -- Kill the currently executing Lua script.",
"LOAD <script> -- Load a script into the scripts cache, without executing it.",
NULL
};
addReplyHelp(c, help);
@ -1514,7 +1514,7 @@ NULL
return;
}
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try SCRIPT HELP", (char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -67,8 +67,10 @@ static inline char sdsReqType(size_t string_size) {
#if (LONG_MAX == LLONG_MAX)
if (string_size < 1ll<<32)
return SDS_TYPE_32;
#endif
return SDS_TYPE_64;
#else
return SDS_TYPE_32;
#endif
}
/* Create a new sds string with the content specified by the 'init' pointer
@ -283,7 +285,7 @@ sds sdsRemoveFreeSpace(sds s) {
return s;
}
/* Return the total size of the allocation of the specifed sds string,
/* Return the total size of the allocation of the specified sds string,
* including:
* 1) The sds header before the pointer.
* 2) The string.

View File

@ -84,6 +84,7 @@ typedef struct sentinelAddr {
#define SENTINEL_MAX_PENDING_COMMANDS 100
#define SENTINEL_ELECTION_TIMEOUT 10000
#define SENTINEL_MAX_DESYNC 1000
#define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG 1
/* Failover machine different states. */
#define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
@ -177,6 +178,10 @@ typedef struct sentinelRedisInstance {
mstime_t o_down_since_time; /* Objectively down since time. */
mstime_t down_after_period; /* Consider it down after that period. */
mstime_t info_refresh; /* Time at which we received INFO output from it. */
dict *renamed_commands; /* Commands renamed in this instance:
Sentinel will use the alternative commands
mapped on this table to send things like
SLAVEOF, CONFING, INFO, ... */
/* Role and the first time we observed it.
* This is useful in order to delay replacing what the instance reports
@ -241,6 +246,8 @@ struct sentinelState {
int announce_port; /* Port that is gossiped to other sentinels if
non zero. */
unsigned long simfailure_flags; /* Failures simulation. */
int deny_scripts_reconfig; /* Allow SENTINEL SET ... to change script
paths at runtime? */
} sentinel;
/* A script execution job. */
@ -380,7 +387,9 @@ void sentinelSimFailureCrash(void);
/* ========================= Dictionary types =============================== */
uint64_t dictSdsHash(const void *key);
uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
void dictInstancesValDestructor (void *privdata, void *obj) {
@ -414,6 +423,16 @@ dictType leaderVotesDictType = {
NULL /* val destructor */
};
/* Instance renamed commands table. */
dictType renamedCommandsDictType = {
dictSdsCaseHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictSdsKeyCaseCompare, /* key compare */
dictSdsDestructor, /* key destructor */
dictSdsDestructor /* val destructor */
};
/* =========================== Initialization =============================== */
void sentinelCommand(client *c);
@ -468,6 +487,7 @@ void initSentinel(void) {
sentinel.announce_ip = NULL;
sentinel.announce_port = 0;
sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG;
memset(sentinel.myid,0,sizeof(sentinel.myid));
}
@ -494,7 +514,7 @@ void sentinelIsRunning(void) {
if (sentinel.myid[j] != 0) break;
if (j == CONFIG_RUN_ID_SIZE) {
/* Pick ID and presist the config. */
/* Pick ID and persist the config. */
getRandomHexChars(sentinel.myid,CONFIG_RUN_ID_SIZE);
sentinelFlushConfig();
}
@ -1207,6 +1227,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri->master = master;
ri->slaves = dictCreate(&instancesDictType,NULL);
ri->info_refresh = 0;
ri->renamed_commands = dictCreate(&renamedCommandsDictType,NULL);
/* Failover state. */
ri->leader = NULL;
@ -1254,6 +1275,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
sdsfree(ri->auth_pass);
sdsfree(ri->info);
releaseSentinelAddr(ri->addr);
dictRelease(ri->renamed_commands);
/* Clear state into the master if needed. */
if ((ri->flags & SRI_SLAVE) && (ri->flags & SRI_PROMOTED) && ri->master)
@ -1568,6 +1590,21 @@ char *sentinelGetInstanceTypeString(sentinelRedisInstance *ri) {
else return "unknown";
}
/* This function is used in order to send commands to Redis instances: the
* commands we send from Sentinel may be renamed, a common case is a master
* with CONFIG and SLAVEOF commands renamed for security concerns. In that
* case we check the ri->renamed_command table (or if the instance is a slave,
* we check the one of the master), and map the command that we should send
* to the set of renamed commads. However, if the command was not renamed,
* we just return "command" itself. */
char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
sds sc = sdsnew(command);
if (ri->master) ri = ri->master;
char *retval = dictFetchValue(ri->renamed_commands, sc);
sdsfree(sc);
return retval ? retval : command;
}
/* ============================ Config handling ============================= */
char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
@ -1677,6 +1714,17 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
si->runid = sdsnew(argv[4]);
sentinelTryConnectionSharing(si);
}
} else if (!strcasecmp(argv[0],"rename-command") && argc == 4) {
/* rename-command <name> <command> <renamed-command> */
ri = sentinelGetMasterByName(argv[1]);
if (!ri) return "No such master with specified name.";
sds oldcmd = sdsnew(argv[2]);
sds newcmd = sdsnew(argv[3]);
if (dictAdd(ri->renamed_commands,oldcmd,newcmd) != DICT_OK) {
sdsfree(oldcmd);
sdsfree(newcmd);
return "Same command renamed multiple times with rename-command.";
}
} else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
/* announce-ip <ip-address> */
if (strlen(argv[1]))
@ -1684,6 +1732,12 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
} else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
/* announce-port <port> */
sentinel.announce_port = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"deny-scripts-reconfig") && argc == 2) {
/* deny-scripts-reconfig <yes|no> */
if ((sentinel.deny_scripts_reconfig = yesnotoi(argv[1])) == -1) {
return "Please specify yes or no for the "
"deny-scripts-reconfig options.";
}
} else {
return "Unrecognized sentinel configuration statement.";
}
@ -1704,6 +1758,12 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
rewriteConfigRewriteLine(state,"sentinel",line,1);
/* sentinel deny-scripts-reconfig. */
line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
sentinel.deny_scripts_reconfig ? "yes" : "no");
rewriteConfigRewriteLine(state,"sentinel",line,
sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG);
/* For every master emit a "sentinel monitor" config entry. */
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@ -1811,6 +1871,18 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
/* sentinel rename-command */
di2 = dictGetIterator(master->renamed_commands);
while((de = dictNext(di2)) != NULL) {
sds oldname = dictGetKey(de);
sds newname = dictGetVal(de);
line = sdscatprintf(sdsempty(),
"sentinel rename-command %s %s %s",
master->name, oldname, newname);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
}
/* sentinel current-epoch is a global state valid for all the masters. */
@ -1875,7 +1947,8 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
ri->master->auth_pass;
if (auth_pass) {
if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "AUTH %s",
if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s",
sentinelInstanceMapCommand(ri,"AUTH"),
auth_pass) == C_OK) ri->link->pending_commands++;
}
}
@ -1891,7 +1964,9 @@ void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char
snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
"CLIENT SETNAME %s", name) == C_OK)
"%s SETNAME %s",
sentinelInstanceMapCommand(ri,"CLIENT"),
name) == C_OK)
{
ri->link->pending_commands++;
}
@ -1953,8 +2028,9 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
sentinelSetClientName(ri,link->pc,"pubsub");
/* Now we subscribe to the Sentinels "Hello" channel. */
retval = redisAsyncCommand(link->pc,
sentinelReceiveHelloMessages, ri, "SUBSCRIBE %s",
SENTINEL_HELLO_CHANNEL);
sentinelReceiveHelloMessages, ri, "%s %s",
sentinelInstanceMapCommand(ri,"SUBSCRIBE"),
SENTINEL_HELLO_CHANNEL);
if (retval != C_OK) {
/* If we can't subscribe, the Pub/Sub connection is useless
* and we can simply disconnect it and try again. */
@ -2288,8 +2364,11 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
{
if (redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri,
"SCRIPT KILL") == C_OK)
"%s KILL",
sentinelInstanceMapCommand(ri,"SCRIPT")) == C_OK)
{
ri->link->pending_commands++;
}
ri->flags |= SRI_SCRIPT_KILL_SENT;
}
}
@ -2452,7 +2531,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
}
/* Send an "Hello" message via Pub/Sub to the specified 'ri' Redis
* instance in order to broadcast the current configuraiton for this
* instance in order to broadcast the current configuration for this
* master, and to advertise the existence of this Sentinel at the same time.
*
* The message has the following format:
@ -2495,8 +2574,9 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
master->name,master_addr->ip,master_addr->port,
(unsigned long long) master->config_epoch);
retval = redisAsyncCommand(ri->link->cc,
sentinelPublishReplyCallback, ri, "PUBLISH %s %s",
SENTINEL_HELLO_CHANNEL,payload);
sentinelPublishReplyCallback, ri, "%s %s %s",
sentinelInstanceMapCommand(ri,"PUBLISH"),
SENTINEL_HELLO_CHANNEL,payload);
if (retval != C_OK) return C_ERR;
ri->link->pending_commands++;
return C_OK;
@ -2541,7 +2621,8 @@ int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
int retval = redisAsyncCommand(ri->link->cc,
sentinelPingReplyCallback, ri, "PING");
sentinelPingReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"PING"));
if (retval == C_OK) {
ri->link->pending_commands++;
ri->link->last_ping_time = mstime();
@ -2605,7 +2686,8 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
(now - ri->info_refresh) > info_period))
{
retval = redisAsyncCommand(ri->link->cc,
sentinelInfoReplyCallback, ri, "INFO");
sentinelInfoReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"INFO"));
if (retval == C_OK) ri->link->pending_commands++;
}
@ -3099,7 +3181,7 @@ void sentinelCommand(client *c) {
addReplySds(c,e);
}
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc < 3 || c->argc % 2 == 0) goto numargserr;
if (c->argc < 3) goto numargserr;
sentinelSetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
/* SENTINEL INFO-CACHE <name> */
@ -3275,7 +3357,7 @@ void sentinelInfoCommand(client *c) {
addReplyBulkSds(c, info);
}
/* Implements Sentinel verison of the ROLE command. The output is
/* Implements Sentinel version of the ROLE command. The output is
* "sentinel" and the list of currently monitored master names. */
void sentinelRoleCommand(client *c) {
dictIterator *di;
@ -3298,39 +3380,58 @@ void sentinelRoleCommand(client *c) {
void sentinelSetCommand(client *c) {
sentinelRedisInstance *ri;
int j, changes = 0;
char *option, *value;
int badarg = 0; /* Bad argument position for error reporting. */
char *option;
if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
== NULL) return;
/* Process option - value pairs. */
for (j = 3; j < c->argc; j += 2) {
for (j = 3; j < c->argc; j++) {
int moreargs = (c->argc-1) - j;
option = c->argv[j]->ptr;
value = c->argv[j+1]->ptr;
robj *o = c->argv[j+1];
long long ll;
int old_j = j; /* Used to know what to log as an event. */
if (!strcasecmp(option,"down-after-milliseconds")) {
if (!strcasecmp(option,"down-after-milliseconds") && moreargs > 0) {
/* down-after-millisecodns <milliseconds> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->down_after_period = ll;
sentinelPropagateDownAfterPeriod(ri);
changes++;
} else if (!strcasecmp(option,"failover-timeout")) {
} else if (!strcasecmp(option,"failover-timeout") && moreargs > 0) {
/* failover-timeout <milliseconds> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->failover_timeout = ll;
changes++;
} else if (!strcasecmp(option,"parallel-syncs")) {
} else if (!strcasecmp(option,"parallel-syncs") && moreargs > 0) {
/* parallel-syncs <milliseconds> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->parallel_syncs = ll;
changes++;
} else if (!strcasecmp(option,"notification-script")) {
} else if (!strcasecmp(option,"notification-script") && moreargs > 0) {
/* notification-script <path> */
char *value = c->argv[++j]->ptr;
if (sentinel.deny_scripts_reconfig) {
addReplyError(c,
"Reconfiguration of scripts path is denied for "
"security reasons. Check the deny-scripts-reconfig "
"configuration directive in your Sentinel configuration");
return;
}
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Notification script seems non existing or non executable");
@ -3340,8 +3441,17 @@ void sentinelSetCommand(client *c) {
sdsfree(ri->notification_script);
ri->notification_script = strlen(value) ? sdsnew(value) : NULL;
changes++;
} else if (!strcasecmp(option,"client-reconfig-script")) {
} else if (!strcasecmp(option,"client-reconfig-script") && moreargs > 0) {
/* client-reconfig-script <path> */
char *value = c->argv[++j]->ptr;
if (sentinel.deny_scripts_reconfig) {
addReplyError(c,
"Reconfiguration of scripts path is denied for "
"security reasons. Check the deny-scripts-reconfig "
"configuration directive in your Sentinel configuration");
return;
}
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Client reconfiguration script seems non existing or "
@ -3352,24 +3462,65 @@ void sentinelSetCommand(client *c) {
sdsfree(ri->client_reconfig_script);
ri->client_reconfig_script = strlen(value) ? sdsnew(value) : NULL;
changes++;
} else if (!strcasecmp(option,"auth-pass")) {
} else if (!strcasecmp(option,"auth-pass") && moreargs > 0) {
/* auth-pass <password> */
char *value = c->argv[++j]->ptr;
sdsfree(ri->auth_pass);
ri->auth_pass = strlen(value) ? sdsnew(value) : NULL;
changes++;
} else if (!strcasecmp(option,"quorum")) {
} else if (!strcasecmp(option,"quorum") && moreargs > 0) {
/* quorum <count> */
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
robj *o = c->argv[++j];
if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
badarg = j;
goto badfmt;
}
ri->quorum = ll;
changes++;
} else if (!strcasecmp(option,"rename-command") && moreargs > 1) {
/* rename-command <oldname> <newname> */
sds oldname = c->argv[++j]->ptr;
sds newname = c->argv[++j]->ptr;
if ((sdslen(oldname) == 0) || (sdslen(newname) == 0)) {
badarg = sdslen(newname) ? j-1 : j;
goto badfmt;
}
/* Remove any older renaming for this command. */
dictDelete(ri->renamed_commands,oldname);
/* If the target name is the same as the source name there
* is no need to add an entry mapping to itself. */
if (!dictSdsKeyCaseCompare(NULL,oldname,newname)) {
oldname = sdsdup(oldname);
newname = sdsdup(newname);
dictAdd(ri->renamed_commands,oldname,newname);
}
changes++;
} else {
addReplyErrorFormat(c,"Unknown option '%s' for SENTINEL SET",
option);
addReplyErrorFormat(c,"Unknown option or number of arguments for "
"SENTINEL SET '%s'", option);
if (changes) sentinelFlushConfig();
return;
}
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",option,value);
/* Log the event. */
int numargs = j-old_j+1;
switch(numargs) {
case 2:
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",c->argv[old_j]->ptr,
c->argv[old_j+1]->ptr);
break;
case 3:
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s %s",c->argv[old_j]->ptr,
c->argv[old_j+1]->ptr,
c->argv[old_j+2]->ptr);
break;
default:
sentinelEvent(LL_WARNING,"+set",ri,"%@ %s",c->argv[old_j]->ptr);
break;
}
}
if (changes) sentinelFlushConfig();
@ -3379,7 +3530,7 @@ void sentinelSetCommand(client *c) {
badfmt: /* Bad format errors */
if (changes) sentinelFlushConfig();
addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL SET '%s'",
value, option);
(char*)c->argv[badarg]->ptr,option);
}
/* Our fake PUBLISH command: it is actually useful only to receive hello messages
@ -3417,7 +3568,7 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
if (ri->link->cc &&
(mstime() - ri->link->cc_conn_time) >
SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
ri->link->act_ping_time != 0 && /* Ther is a pending ping... */
ri->link->act_ping_time != 0 && /* There is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
(mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
@ -3585,7 +3736,8 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f
ll2string(port,sizeof(port),master->addr->port);
retval = redisAsyncCommand(ri->link->cc,
sentinelReceiveIsMasterDownReply, ri,
"SENTINEL is-master-down-by-addr %s %s %llu %s",
"%s is-master-down-by-addr %s %s %llu %s",
sentinelInstanceMapCommand(ri,"SENTINEL"),
master->addr->ip, port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
@ -3605,7 +3757,7 @@ void sentinelSimFailureCrash(void) {
}
/* Vote for the sentinel with 'req_runid' or return the old vote if already
* voted for the specifed 'req_epoch' or one greater.
* voted for the specified 'req_epoch' or one greater.
*
* If a vote is not available returns NULL, otherwise return the Sentinel
* runid and populate the leader_epoch with the epoch of the vote. */
@ -3756,7 +3908,7 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
/* In order to send SLAVEOF in a safe way, we send a transaction performing
* the following tasks:
* 1) Reconfigure the instance according to the specified host/port params.
* 2) Rewrite the configuraiton.
* 2) Rewrite the configuration.
* 3) Disconnect all clients (but this one sending the commnad) in order
* to trigger the ask-master-on-reconnection protocol for connected
* clients.
@ -3764,17 +3916,21 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
* Note that we don't check the replies returned by commands, since we
* will observe instead the effects in the next INFO output. */
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "MULTI");
sentinelDiscardReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"MULTI"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "SLAVEOF %s %s", host, portstr);
sentinelDiscardReplyCallback, ri, "%s %s %s",
sentinelInstanceMapCommand(ri,"SLAVEOF"),
host, portstr);
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "CONFIG REWRITE");
sentinelDiscardReplyCallback, ri, "%s REWRITE",
sentinelInstanceMapCommand(ri,"CONFIG"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
@ -3784,12 +3940,14 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
* recognized as a syntax error, and the transaction will not fail (but
* only the unsupported command will fail). */
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "CLIENT KILL TYPE normal");
sentinelDiscardReplyCallback, ri, "%s KILL TYPE normal",
sentinelInstanceMapCommand(ri,"CLIENT"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;
retval = redisAsyncCommand(ri->link->cc,
sentinelDiscardReplyCallback, ri, "EXEC");
sentinelDiscardReplyCallback, ri, "%s",
sentinelInstanceMapCommand(ri,"EXEC"));
if (retval == C_ERR) return retval;
ri->link->pending_commands++;

View File

@ -198,8 +198,8 @@ struct redisCommand redisCommandTable[] = {
{"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
{"zpopmin",zpopminCommand,-2,"wF",0,NULL,1,-1,1,0,0},
{"zpopmax",zpopmaxCommand,-2,"wF",0,NULL,1,-1,1,0,0},
{"zpopmin",zpopminCommand,-2,"wF",0,NULL,1,1,1,0,0},
{"zpopmax",zpopmaxCommand,-2,"wF",0,NULL,1,1,1,0,0},
{"bzpopmin",bzpopminCommand,-2,"wsF",0,NULL,1,-2,1,0,0},
{"bzpopmax",bzpopmaxCommand,-2,"wsF",0,NULL,1,-2,1,0,0},
{"hset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
@ -326,6 +326,10 @@ struct redisCommand redisCommandTable[] = {
/*============================ Utility functions ============================ */
/* We use a private localtime implementation which is fork-safe. The logging
* function of Redis may be called from other threads. */
void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst);
/* Low level logging. To use only for very big messages, otherwise
* serverLog() is to prefer. */
void serverLogRaw(int level, const char *msg) {
@ -351,7 +355,9 @@ void serverLogRaw(int level, const char *msg) {
pid_t pid = getpid();
gettimeofday(&tv,NULL);
off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
struct tm tm;
nolocks_localtime(&tm,tv.tv_sec,server.timezone,server.daylight_active);
off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",&tm);
snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
if (server.sentinel_mode) {
role_char = 'X'; /* Sentinel. */
@ -845,19 +851,37 @@ int clientsCronResizeQueryBuffer(client *c) {
/* There are two conditions to resize the query buffer:
* 1) Query buffer is > BIG_ARG and too big for latest peak.
* 2) Client is inactive and the buffer is bigger than 1k. */
if (((querybuf_size > PROTO_MBULK_BIG_ARG) &&
(querybuf_size/(c->querybuf_peak+1)) > 2) ||
(querybuf_size > 1024 && idletime > 2))
* 2) Query buffer is > BIG_ARG and client is idle. */
if (querybuf_size > PROTO_MBULK_BIG_ARG &&
((querybuf_size/(c->querybuf_peak+1)) > 2 ||
idletime > 2))
{
/* Only resize the query buffer if it is actually wasting space. */
if (sdsavail(c->querybuf) > 1024) {
/* Only resize the query buffer if it is actually wasting
* at least a few kbytes. */
if (sdsavail(c->querybuf) > 1024*4) {
c->querybuf = sdsRemoveFreeSpace(c->querybuf);
}
}
/* Reset the peak again to capture the peak memory usage in the next
* cycle. */
c->querybuf_peak = 0;
/* Clients representing masters also use a "pending query buffer" that
* is the yet not applied part of the stream we are reading. Such buffer
* also needs resizing from time to time, otherwise after a very large
* transfer (a huge value or a big MIGRATE operation) it will keep using
* a lot of memory. */
if (c->flags & CLIENT_MASTER) {
/* There are two conditions to resize the pending query buffer:
* 1) Pending Query buffer is > LIMIT_PENDING_QUERYBUF.
* 2) Used length is smaller than pending_querybuf_size/2 */
size_t pending_querybuf_size = sdsAllocSize(c->pending_querybuf);
if(pending_querybuf_size > LIMIT_PENDING_QUERYBUF &&
sdslen(c->pending_querybuf) < (pending_querybuf_size/2))
{
c->pending_querybuf = sdsRemoveFreeSpace(c->pending_querybuf);
}
}
return 0;
}
@ -959,6 +983,14 @@ void updateCachedTime(void) {
time_t unixtime = time(NULL);
atomicSet(server.unixtime,unixtime);
server.mstime = mstime();
/* To get information about daylight saving time, we need to call localtime_r
* and cache the result. However calling localtime_r in this context is safe
* since we will never fork() while here, in the main thread. The logging
* function will call a thread safe version of localtime that has no locks. */
struct tm tm;
localtime_r(&server.unixtime,&tm);
server.daylight_active = tm.tm_isdst;
}
/* This is our timer interrupt, called server.hz times per second.
@ -1133,7 +1165,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
} else {
/* If there is not a background saving/rewrite in progress check if
* we have to save/rewrite now. */
for (j = 0; j < server.saveparamslen; j++) {
for (j = 0; j < server.saveparamslen; j++) {
struct saveparam *sp = server.saveparams+j;
/* Save if we reached the given amount of changes,
@ -1153,23 +1185,23 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
rdbSaveBackground(server.rdb_filename,rsiptr);
break;
}
}
}
/* Trigger an AOF rewrite if needed. */
if (server.aof_state == AOF_ON &&
server.rdb_child_pid == -1 &&
server.aof_child_pid == -1 &&
server.aof_rewrite_perc &&
server.aof_current_size > server.aof_rewrite_min_size)
{
/* Trigger an AOF rewrite if needed. */
if (server.aof_state == AOF_ON &&
server.rdb_child_pid == -1 &&
server.aof_child_pid == -1 &&
server.aof_rewrite_perc &&
server.aof_current_size > server.aof_rewrite_min_size)
{
long long base = server.aof_rewrite_base_size ?
server.aof_rewrite_base_size : 1;
server.aof_rewrite_base_size : 1;
long long growth = (server.aof_current_size*100/base) - 100;
if (growth >= server.aof_rewrite_perc) {
serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
rewriteAppendOnlyFileBackground();
}
}
}
}
@ -1401,10 +1433,12 @@ void initServerConfig(void) {
pthread_mutex_init(&server.lruclock_mutex,NULL);
pthread_mutex_init(&server.unixtime_mutex,NULL);
updateCachedTime();
getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
server.runid[CONFIG_RUN_ID_SIZE] = '\0';
changeReplicationId();
clearReplicationId2();
server.timezone = timezone; /* Initialized by tzset(). */
server.configfile = NULL;
server.executable = NULL;
server.hz = CONFIG_DEFAULT_HZ;
@ -1456,6 +1490,7 @@ void initServerConfig(void) {
server.aof_selected_db = -1; /* Make sure the first time will not match */
server.aof_flush_postponed_start = 0;
server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
server.rdb_save_incremental_fsync = CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC;
server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED;
server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE;
server.pidfile = NULL;
@ -1485,6 +1520,8 @@ void initServerConfig(void) {
server.zset_max_ziplist_entries = OBJ_ZSET_MAX_ZIPLIST_ENTRIES;
server.zset_max_ziplist_value = OBJ_ZSET_MAX_ZIPLIST_VALUE;
server.hll_sparse_max_bytes = CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES;
server.stream_node_max_bytes = OBJ_STREAM_NODE_MAX_BYTES;
server.stream_node_max_entries = OBJ_STREAM_NODE_MAX_ENTRIES;
server.shutdown_asap = 0;
server.cluster_enabled = 0;
server.cluster_node_timeout = CLUSTER_DEFAULT_NODE_TIMEOUT;
@ -1886,6 +1923,7 @@ void initServer(void) {
server.pid = getpid();
server.current_client = NULL;
server.clients = listCreate();
server.clients_index = raxNew();
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
@ -1978,7 +2016,6 @@ void initServer(void) {
server.aof_last_write_status = C_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
updateCachedTime();
/* Create the timer callback, this is our way to process many background
* operations incrementally, like clients timeout, eviction of unaccessed
@ -2342,7 +2379,7 @@ void call(client *c, int flags) {
if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
/* However prevent AOF / replication propagation if the command
* implementatino called preventCommandPropagation() or similar,
* implementations called preventCommandPropagation() or similar,
* or if we don't have the call() flags to do so. */
if (c->flags & CLIENT_PREVENT_REPL_PROP ||
!(flags & CMD_CALL_PROPAGATE_REPL))
@ -2412,8 +2449,13 @@ int processCommand(client *c) {
c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
if (!c->cmd) {
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[0]->ptr);
sds args = sdsempty();
int i;
for (i=1; i < c->argc && sdslen(args) < 128; i++)
args = sdscatprintf(args, "`%.*s`, ", 128-(int)sdslen(args), (char*)c->argv[i]->ptr);
addReplyErrorFormat(c,"unknown command `%s`, with args beginning with: %s",
(char*)c->argv[0]->ptr, args);
sdsfree(args);
return C_OK;
} else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
@ -2482,7 +2524,8 @@ int processCommand(client *c) {
if (((server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0 &&
server.lastbgsave_status == C_ERR) ||
server.aof_last_write_status == C_ERR) &&
(server.aof_state != AOF_OFF &&
server.aof_last_write_status == C_ERR)) &&
server.masterhost == NULL &&
(c->cmd->flags & CMD_WRITE ||
c->cmd->proc == pingCommand))
@ -2635,7 +2678,7 @@ int prepareForShutdown(int flags) {
/* Append only file: flush buffers and fsync() the AOF at exit */
serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
flushAppendOnlyFile(1);
aof_fsync(server.aof_fd);
redis_fsync(server.aof_fd);
}
/* Create a new RDB file before exiting. */
@ -2824,9 +2867,9 @@ void commandCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"(no subcommand) -- Return details about all Redis commands.",
"count -- Return the total number of commands in this Redis server.",
"getkeys <full-command> -- Return the keys from a full Redis command.",
"info [command-name ...] -- Return details about multiple Redis commands.",
"COUNT -- Return the total number of commands in this Redis server.",
"GETKEYS <full-command> -- Return the keys from a full Redis command.",
"INFO [command-name ...] -- Return details about multiple Redis commands.",
NULL
};
addReplyHelp(c, help);
@ -2850,7 +2893,10 @@ NULL
int *keys, numkeys, j;
if (!cmd) {
addReplyErrorFormat(c,"Invalid command specified");
addReplyError(c,"Invalid command specified");
return;
} else if (cmd->getkeys_proc == NULL && cmd->firstkey == 0) {
addReplyError(c,"The command has no key arguments");
return;
} else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
((c->argc-2) < -cmd->arity))
@ -2860,11 +2906,15 @@ NULL
}
keys = getKeysFromCommand(cmd,c->argv+2,c->argc-2,&numkeys);
addReplyMultiBulkLen(c,numkeys);
for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
getKeysFreeResult(keys);
if (!keys) {
addReplyError(c,"Invalid arguments specified for command");
} else {
addReplyMultiBulkLen(c,numkeys);
for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
getKeysFreeResult(keys);
}
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try COMMAND HELP", (char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}
@ -2876,7 +2926,6 @@ void bytesToHuman(char *s, unsigned long long n) {
if (n < 1024) {
/* Bytes */
sprintf(s,"%lluB",n);
return;
} else if (n < (1024*1024)) {
d = (double)n/(1024);
sprintf(s,"%.2fK",d);
@ -2906,7 +2955,6 @@ sds genRedisInfoString(char *section) {
time_t uptime = server.unixtime-server.stat_starttime;
int j;
struct rusage self_ru, c_ru;
unsigned long lol, bib;
int allsections = 0, defsections = 0;
int sections = 0;
@ -2916,7 +2964,6 @@ sds genRedisInfoString(char *section) {
getrusage(RUSAGE_SELF, &self_ru);
getrusage(RUSAGE_CHILDREN, &c_ru);
getClientsMaxBuffers(&lol,&bib);
/* Server */
if (allsections || defsections || !strcasecmp(section,"server")) {
@ -2986,6 +3033,8 @@ sds genRedisInfoString(char *section) {
/* Clients */
if (allsections || defsections || !strcasecmp(section,"clients")) {
unsigned long lol, bib;
getClientsMaxBuffers(&lol,&bib);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Clients\r\n"
@ -3058,6 +3107,11 @@ sds genRedisInfoString(char *section) {
"rss_overhead_bytes:%zu\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
"mem_fragmentation_bytes:%zu\r\n"
"mem_not_counted_for_evict:%zu\r\n"
"mem_replication_backlog:%zu\r\n"
"mem_clients_slaves:%zu\r\n"
"mem_clients_normal:%zu\r\n"
"mem_aof_buffer:%zu\r\n"
"mem_allocator:%s\r\n"
"active_defrag_running:%d\r\n"
"lazyfree_pending_objects:%zu\r\n",
@ -3090,6 +3144,11 @@ sds genRedisInfoString(char *section) {
mh->rss_extra_bytes,
mh->total_frag, /* this is the total RSS overhead, including fragmentation, */
mh->total_frag_bytes, /* named so for backwards compatibility */
freeMemoryGetNotCountedMemory(),
mh->repl_backlog,
mh->clients_slaves,
mh->clients_normal,
mh->aof_buffer,
ZMALLOC_LIB,
server.active_defrag_running,
lazyfreeGetPendingObjectsCount()
@ -3832,9 +3891,11 @@ int main(int argc, char **argv) {
spt_init(argc, argv);
#endif
setlocale(LC_COLLATE,"");
tzset(); /* Populates 'timezone' global. */
zmalloc_set_oom_handler(redisOutOfMemoryHandler);
srand(time(NULL)^getpid());
gettimeofday(&tv,NULL);
char hashseed[16];
getRandomHexChars(hashseed,sizeof(hashseed));
dictSetHashFunctionSeed((uint8_t*)hashseed);
@ -3891,7 +3952,7 @@ int main(int argc, char **argv) {
configfile = argv[j];
server.configfile = getAbsolutePath(configfile);
/* Replace the config file in server.exec_argv with
* its absoulte path. */
* its absolute path. */
zfree(server.exec_argv[j]);
server.exec_argv[j] = zstrdup(server.configfile);
j++;

View File

@ -142,6 +142,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE 1
#define CONFIG_DEFAULT_ACTIVE_REHASHING 1
#define CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
#define CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC 1
#define CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE 0
#define CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG 10
#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
@ -183,7 +184,9 @@ typedef long long mstime_t; /* millisecond time type. */
#define PROTO_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
#define PROTO_MBULK_BIG_ARG (1024*32)
#define LONG_STR_SIZE 21 /* Bytes needed for long -> str + '\0' */
#define AOF_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
#define REDIS_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
#define LIMIT_PENDING_QUERYBUF (4*1024*1024) /* 4mb */
/* When configuring the server eventloop, we setup it so that the total number
* of file descriptors we can handle are server.maxclients + RESERVED_FDS +
@ -339,7 +342,7 @@ typedef long long mstime_t; /* millisecond time type. */
/* Anti-warning macro... */
#define UNUSED(V) ((void) V)
#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
#define ZSKIPLIST_MAXLEVEL 64 /* Should be enough for 2^64 elements */
#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
/* Append only defines */
@ -348,12 +351,14 @@ typedef long long mstime_t; /* millisecond time type. */
#define AOF_FSYNC_EVERYSEC 2
#define CONFIG_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC
/* Zip structure related defaults */
/* Zipped structures related defaults */
#define OBJ_HASH_MAX_ZIPLIST_ENTRIES 512
#define OBJ_HASH_MAX_ZIPLIST_VALUE 64
#define OBJ_SET_MAX_INTSET_ENTRIES 512
#define OBJ_ZSET_MAX_ZIPLIST_ENTRIES 128
#define OBJ_ZSET_MAX_ZIPLIST_VALUE 64
#define OBJ_STREAM_NODE_MAX_BYTES 4096
#define OBJ_STREAM_NODE_MAX_ENTRIES 100
/* List defaults */
#define OBJ_LIST_MAX_ZIPLIST_SIZE -2
@ -614,6 +619,13 @@ typedef struct redisObject {
struct evictionPoolEntry; /* Defined in evict.c */
/* This structure is used in order to represent the output buffer of a client,
* which is actually a linked list of blocks like that, that is: client->reply. */
typedef struct clientReplyBlock {
size_t size, used;
char buf[];
} clientReplyBlock;
/* Redis database representation. There are multiple databases identified
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
@ -660,6 +672,7 @@ typedef struct blockingState {
robj *xread_group; /* XREADGROUP group name. */
robj *xread_consumer; /* XREADGROUP consumer name. */
mstime_t xread_retry_time, xread_retry_ttl;
int xread_group_noack;
/* BLOCKED_WAIT */
int numreplicas; /* Number of replicas we are waiting for ACK. */
@ -695,9 +708,10 @@ typedef struct client {
redisDb *db; /* Pointer to currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */
sds querybuf; /* Buffer we use to accumulate client queries. */
sds pending_querybuf; /* If this is a master, this buffer represents the
yet not applied replication stream that we
are receiving from the master. */
sds pending_querybuf; /* If this client is flagged as master, this buffer
represents the yet not applied portion of the
replication stream that we are receiving from
the master. */
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
int argc; /* Num of arguments of current command. */
robj **argv; /* Arguments of current command. */
@ -780,7 +794,7 @@ typedef struct zskiplistNode {
struct zskiplistNode *backward;
struct zskiplistLevel {
struct zskiplistNode *forward;
unsigned int span;
unsigned long span;
} level[];
} zskiplistNode;
@ -879,13 +893,13 @@ typedef struct rdbSaveInfo {
#define RDB_SAVE_INFO_INIT {-1,0,"000000000000000000000000000000",-1}
typedef struct malloc_stats {
struct malloc_stats {
size_t zmalloc_used;
size_t process_rss;
size_t allocator_allocated;
size_t allocator_active;
size_t allocator_resident;
} malloc_stats;
};
/*-----------------------------------------------------------------------------
* Global server state
@ -949,6 +963,7 @@ struct redisServer {
list *clients_pending_write; /* There is to write or install handler. */
list *slaves, *monitors; /* List of slaves and MONITORs */
client *current_client; /* Current client, only used on crash report */
rax *clients_index; /* Active clients dictionary by client ID. */
int clients_paused; /* True if clients are currently paused */
mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
@ -992,7 +1007,7 @@ struct redisServer {
long long slowlog_entry_id; /* SLOWLOG current entry ID */
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
long long stat_net_input_bytes; /* Bytes read from network. */
long long stat_net_output_bytes; /* Bytes written to network. */
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
@ -1044,7 +1059,8 @@ struct redisServer {
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
int aof_lastbgrewrite_status; /* C_OK or C_ERR */
unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */
int aof_last_write_status; /* C_OK or C_ERR */
int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
@ -1177,11 +1193,15 @@ struct redisServer {
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
size_t hll_sparse_max_bytes;
size_t stream_node_max_bytes;
int64_t stream_node_max_entries;
/* List parameters */
int list_max_ziplist_size;
int list_compress_depth;
/* time cache */
time_t unixtime; /* Unix time sampled every cron cycle. */
time_t timezone; /* Cached timezone. As set by tzset(). */
int daylight_active; /* Currently in daylight saving time. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
@ -1406,15 +1426,17 @@ void addReplyHumanLongDouble(client *c, long double d);
void addReplyLongLong(client *c, long long ll);
void addReplyMultiBulkLen(client *c, long length);
void addReplyHelp(client *c, const char **help);
void addReplySubcommandSyntaxError(client *c);
void copyClientOutputBuffer(client *dst, client *src);
size_t sdsZmallocSize(sds s);
size_t getStringObjectSdsUsedMemory(robj *o);
void freeClientReplyValue(void *o);
void *dupClientReplyValue(void *o);
void getClientsMaxBuffers(unsigned long *longest_output_list,
unsigned long *biggest_input_buffer);
char *getClientPeerId(client *client);
sds catClientInfoString(sds s, client *client);
sds getAllClientsInfoString(void);
sds getAllClientsInfoString(int type);
void rewriteClientCommandVector(client *c, int argc, ...);
void rewriteClientCommandArgument(client *c, int i, robj *newval);
void replaceClientCommandVector(client *c, int argc, robj **argv);
@ -1495,6 +1517,7 @@ robj *tryObjectEncoding(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value);
robj *createStringObjectFromLongLongForValue(long long value);
robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
robj *createQuicklistObject(void);
robj *createZiplistObject(void);
@ -1589,11 +1612,11 @@ void receiveChildInfo(void);
#define ZADD_NONE 0
#define ZADD_INCR (1<<0) /* Increment the score instead of setting it. */
#define ZADD_NX (1<<1) /* Don't touch elements not already existing. */
#define ZADD_XX (1<<2) /* Only touch elements already exisitng. */
#define ZADD_XX (1<<2) /* Only touch elements already existing. */
/* Output flags. */
#define ZADD_NOP (1<<3) /* Operation not performed because of conditionals.*/
#define ZADD_NAN (1<<4) /* Only touch elements already exisitng. */
#define ZADD_NAN (1<<4) /* Only touch elements already existing. */
#define ZADD_ADDED (1<<5) /* The element was new and was added. */
#define ZADD_UPDATED (1<<6) /* The element already existed, score updated. */
@ -1624,7 +1647,7 @@ void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range);
unsigned int zsetLength(const robj *zobj);
unsigned long zsetLength(const robj *zobj);
void zsetConvert(robj *zobj, int encoding);
void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen);
int zsetScore(robj *zobj, sds member, double *score);
@ -1649,6 +1672,7 @@ int zslLexValueLteMax(sds value, zlexrangespec *spec);
/* Core functions */
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
size_t freeMemoryGetNotCountedMemory();
int freeMemoryIfNeeded(void);
int processCommand(client *c);
void setupSignalHandlers(void);
@ -1765,6 +1789,8 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply);
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags);
robj *objectCommandLookup(client *c, robj *key);
robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
long long lru_clock);
#define LOOKUP_NONE 0
#define LOOKUP_NOTOUCH (1<<0)
void dbAdd(redisDb *db, robj *key, robj *val);

View File

@ -142,12 +142,12 @@ uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
}
switch (left) {
case 7: b |= ((uint64_t)in[6]) << 48;
case 6: b |= ((uint64_t)in[5]) << 40;
case 5: b |= ((uint64_t)in[4]) << 32;
case 4: b |= ((uint64_t)in[3]) << 24;
case 3: b |= ((uint64_t)in[2]) << 16;
case 2: b |= ((uint64_t)in[1]) << 8;
case 7: b |= ((uint64_t)in[6]) << 48; /* fall-thru */
case 6: b |= ((uint64_t)in[5]) << 40; /* fall-thru */
case 5: b |= ((uint64_t)in[4]) << 32; /* fall-thru */
case 4: b |= ((uint64_t)in[3]) << 24; /* fall-thru */
case 3: b |= ((uint64_t)in[2]) << 16; /* fall-thru */
case 2: b |= ((uint64_t)in[1]) << 8; /* fall-thru */
case 1: b |= ((uint64_t)in[0]); break;
case 0: break;
}
@ -202,12 +202,12 @@ uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
}
switch (left) {
case 7: b |= ((uint64_t)siptlw(in[6])) << 48;
case 6: b |= ((uint64_t)siptlw(in[5])) << 40;
case 5: b |= ((uint64_t)siptlw(in[4])) << 32;
case 4: b |= ((uint64_t)siptlw(in[3])) << 24;
case 3: b |= ((uint64_t)siptlw(in[2])) << 16;
case 2: b |= ((uint64_t)siptlw(in[1])) << 8;
case 7: b |= ((uint64_t)siptlw(in[6])) << 48; /* fall-thru */
case 6: b |= ((uint64_t)siptlw(in[5])) << 40; /* fall-thru */
case 5: b |= ((uint64_t)siptlw(in[4])) << 32; /* fall-thru */
case 4: b |= ((uint64_t)siptlw(in[3])) << 24; /* fall-thru */
case 3: b |= ((uint64_t)siptlw(in[2])) << 16; /* fall-thru */
case 2: b |= ((uint64_t)siptlw(in[1])) << 8; /* fall-thru */
case 1: b |= ((uint64_t)siptlw(in[0])); break;
case 0: break;
}

View File

@ -142,11 +142,11 @@ void slowlogReset(void) {
void slowlogCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
const char *help[] = {
"get [count] -- Return top entries from the slowlog (default: 10)."
"GET [count] -- Return top entries from the slowlog (default: 10)."
" Entries are made of:",
" id, timestamp, time in microseconds, arguments array, client IP and port, client name",
"len -- Return the length of the slowlog.",
"reset -- Reset the slowlog.",
"LEN -- Return the length of the slowlog.",
"RESET -- Reset the slowlog.",
NULL
};
addReplyHelp(c, help);
@ -187,6 +187,6 @@ NULL
}
setDeferredMultiBulkLength(c,totentries,sent);
} else {
addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try SLOWLOG HELP", (char*)c->argv[1]->ptr);
addReplySubcommandSyntaxError(c);
}
}

View File

@ -193,7 +193,7 @@ void sortCommand(client *c) {
long limit_start = 0, limit_count = -1, start, end;
int j, dontsort = 0, vectorlen;
int getop = 0; /* GET operation counter */
int int_convertion_error = 0;
int int_conversion_error = 0;
int syntax_error = 0;
robj *sortval, *sortby = NULL, *storekey = NULL;
redisSortObject *vector; /* Resulting vector to sort */
@ -447,7 +447,7 @@ void sortCommand(client *c) {
serverAssertWithInfo(c,sortval,j == vectorlen);
/* Now it's time to load the right scores in the sorting vector */
if (dontsort == 0) {
if (!dontsort) {
for (j = 0; j < vectorlen; j++) {
robj *byval;
if (sortby) {
@ -469,7 +469,7 @@ void sortCommand(client *c) {
if (eptr[0] != '\0' || errno == ERANGE ||
isnan(vector[j].u.score))
{
int_convertion_error = 1;
int_conversion_error = 1;
}
} else if (byval->encoding == OBJ_ENCODING_INT) {
/* Don't need to decode the object if it's
@ -487,9 +487,7 @@ void sortCommand(client *c) {
decrRefCount(byval);
}
}
}
if (dontsort == 0) {
server.sort_desc = desc;
server.sort_alpha = alpha;
server.sort_bypattern = sortby ? 1 : 0;
@ -503,7 +501,7 @@ void sortCommand(client *c) {
/* Send command output to the output buffer, performing the specified
* GET/DEL/INCR/DECR operations if any. */
outputlen = getop ? getop*(end-start+1) : end-start+1;
if (int_convertion_error) {
if (int_conversion_error) {
addReplyError(c,"One or more scores can't be converted into double");
} else if (storekey == NULL) {
/* STORE option not specified, sent the sorting result to client */

View File

@ -108,5 +108,6 @@ streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int create);
streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id);
streamNACK *streamCreateNACK(streamConsumer *consumer);
void streamDecodeID(void *buf, streamID *id);
int streamCompareID(streamID *a, streamID *b);
#endif

View File

@ -41,6 +41,7 @@
#define STREAM_ITEM_FLAG_SAMEFIELDS (1<<1) /* Same fields as master entry. */
void streamFreeCG(streamCG *cg);
void streamFreeNACK(streamNACK *na);
size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer);
/* -----------------------------------------------------------------------
@ -171,7 +172,7 @@ int streamCompareID(streamID *a, streamID *b) {
* if the ID was generated by the function. However the function may return
* C_ERR if an ID was given via 'use_id', but adding it failed since the
* current top ID is greater or equal. */
int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id, streamID *use_id) {
int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id) {
/* If an ID was given, check that it's greater than the last entry ID
* or return an error. */
if (use_id && streamCompareID(use_id,&s->last_id) <= 0) return C_ERR;
@ -206,7 +207,7 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id,
/* Create a new listpack and radix tree node if needed. Note that when
* a new listpack is created, we populate it with a "master entry". This
* is just a set of fields that is taken as refernce in order to compress
* is just a set of fields that is taken as references in order to compress
* the stream entries that we'll add inside the listpack.
*
* Note that while we use the first added entry fields to create
@ -221,7 +222,7 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id,
* +-------+---------+------------+---------+--/--+---------+---------+-+
*
* count and deleted just represent respectively the total number of
* entires inside the listpack that are valid, and marked as deleted
* entries inside the listpack that are valid, and marked as deleted
* (delted flag in the entry flags set). So the total number of items
* actually inside the listpack (both deleted and not) is count+deleted.
*
@ -234,10 +235,24 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id,
*
* The "0" entry at the end is the same as the 'lp-count' entry in the
* regular stream entries (see below), and marks the fact that there are
* no more entires, when we scan the stream from right to left. */
* no more entries, when we scan the stream from right to left. */
/* First of all, check if we can append to the current macro node or
* if we need to switch to the next one. 'lp' will be set to NULL if
* the current node is full. */
if (lp != NULL) {
if (server.stream_node_max_bytes &&
lp_bytes > server.stream_node_max_bytes)
{
lp = NULL;
} else if (server.stream_node_max_entries) {
int64_t count = lpGetInteger(lpFirst(lp));
if (count > server.stream_node_max_entries) lp = NULL;
}
}
int flags = STREAM_ITEM_FLAG_NONE;
if (lp == NULL || lp_bytes > STREAM_BYTES_PER_LISTPACK) {
if (lp == NULL || lp_bytes > server.stream_node_max_bytes) {
master_id = id;
streamEncodeID(rax_key,&id);
/* Create the listpack having the master entry ID and fields. */
@ -245,7 +260,7 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id,
lp = lpAppendInteger(lp,1); /* One item, the one we are adding. */
lp = lpAppendInteger(lp,0); /* Zero deleted so far. */
lp = lpAppendInteger(lp,numfields);
for (int i = 0; i < numfields; i++) {
for (int64_t i = 0; i < numfields; i++) {
sds field = argv[i*2]->ptr;
lp = lpAppend(lp,(unsigned char*)field,sdslen(field));
}
@ -270,10 +285,10 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id,
/* Check if the entry we are adding, have the same fields
* as the master entry. */
int master_fields_count = lpGetInteger(lp_ele);
int64_t master_fields_count = lpGetInteger(lp_ele);
lp_ele = lpNext(lp,lp_ele);
if (numfields == master_fields_count) {
int i;
int64_t i;
for (i = 0; i < master_fields_count; i++) {
sds field = argv[i*2]->ptr;
int64_t e_len;
@ -317,14 +332,14 @@ int streamAppendItem(stream *s, robj **argv, int numfields, streamID *added_id,
lp = lpAppendInteger(lp,id.seq - master_id.seq);
if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))
lp = lpAppendInteger(lp,numfields);
for (int i = 0; i < numfields; i++) {
for (int64_t i = 0; i < numfields; i++) {
sds field = argv[i*2]->ptr, value = argv[i*2+1]->ptr;
if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))
lp = lpAppend(lp,(unsigned char*)field,sdslen(field));
lp = lpAppend(lp,(unsigned char*)value,sdslen(value));
}
/* Compute and store the lp-count field. */
int lp_count = numfields;
int64_t lp_count = numfields;
lp_count += 3; /* Add the 3 fixed fields flags + ms-diff + seq-diff. */
if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) {
/* If the item is not compressed, it also has the fields other than
@ -454,7 +469,7 @@ int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
* iteration is from the start to the end element (inclusive), otherwise
* if rev is non-zero, the iteration is reversed.
*
* Once the iterator is initalized, we iterate like this:
* Once the iterator is initialized, we iterate like this:
*
* streamIterator myiterator;
* streamIteratorStart(&myiterator,...);
@ -564,7 +579,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
/* If we are going backward, read the number of elements this
* entry is composed of, and jump backward N times to seek
* its start. */
int lp_count = lpGetInteger(si->lp_ele);
int64_t lp_count = lpGetInteger(si->lp_ele);
if (lp_count == 0) { /* We reached the master entry. */
si->lp = NULL;
si->lp_ele = NULL;
@ -627,12 +642,17 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
* forward, or seek the previous entry if we are going
* backward. */
if (!si->rev) {
int to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ?
*numfields : *numfields*2;
int64_t to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ?
*numfields : *numfields*2;
for (int64_t i = 0; i < to_discard; i++)
si->lp_ele = lpNext(si->lp,si->lp_ele);
} else {
int prev_times = 4; /* flag + id ms/seq diff + numfields. */
int64_t prev_times = 4; /* flag + id ms + id seq + one more to
go back to the previous entry "count"
field. */
/* If the entry was not flagged SAMEFIELD we also read the
* number of fields, so go back one more. */
if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) prev_times++;
while(prev_times--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
}
}
@ -685,10 +705,22 @@ void streamIteratorRemoveEntry(streamIterator *si, streamID *current) {
/* Change the valid/deleted entries count in the master entry. */
unsigned char *p = lpFirst(lp);
aux = lpGetInteger(p);
lp = lpReplaceInteger(lp,&p,aux-1);
p = lpNext(lp,p); /* Seek deleted field. */
aux = lpGetInteger(p);
lp = lpReplaceInteger(lp,&p,aux+1);
if (aux == 1) {
/* If this is the last element in the listpack, we can remove the whole
* node. */
lpFree(lp);
raxRemove(si->stream->rax,si->ri.key,si->ri.key_len,NULL);
} else {
/* In the base case we alter the counters of valid/deleted entries. */
lp = lpReplaceInteger(lp,&p,aux-1);
p = lpNext(lp,p); /* Seek deleted field. */
aux = lpGetInteger(p);
lp = lpReplaceInteger(lp,&p,aux+1);
}
/* Update the number of entries counter. */
si->stream->length--;
/* Re-seek the iterator to fix the now messed up state. */
streamID start, end;
@ -811,10 +843,10 @@ void streamPropagateXCLAIM(client *c, robj *key, robj *group, robj *id, streamNA
* given, but currently such a feature is never used by the code base that
* will always pass 'spi' and propagate when a group is passed.
*
* Note that this function is recursive in certian cases. When it's called
* Note that this function is recursive in certain cases. When it's called
* with a non NULL group and consumer argument, it may call
* streamReplyWithRangeFromConsumerPEL() in order to get entries from the
* consumer pending entires list. However such a function will then call
* consumer pending entries list. However such a function will then call
* streamReplyWithRange() in order to emit single entries (found in the
* PEL by ID) to the client. This is the use case for the STREAM_RWR_RAWENTRIES
* flag.
@ -867,18 +899,43 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
/* If a group is passed, we need to create an entry in the
* PEL (pending entries list) of this group *and* this consumer.
* Note that we are sure about the fact the message is not already
* associated with some other consumer, because if we reached this
* loop the IDs the user is requesting are greater than any message
* delivered for this group. */
*
* Note that we cannot be sure about the fact the message is not
* already owned by another consumer, because the admin is able
* to change the consumer group last delivered ID using the
* XGROUP SETID command. So if we find that there is already
* a NACK for the entry, we need to associate it to the new
* consumer. */
if (group && !(flags & STREAM_RWR_NOACK)) {
unsigned char buf[sizeof(streamID)];
streamEncodeID(buf,&id);
/* Try to add a new NACK. Most of the time this will work and
* will not require extra lookups. We'll fix the problem later
* if we find that there is already a entry for this ID. */
streamNACK *nack = streamCreateNACK(consumer);
int retval = 0;
retval += raxInsert(group->pel,buf,sizeof(buf),nack,NULL);
retval += raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
serverAssert(retval == 2); /* Make sure entry was inserted. */
int group_inserted =
raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
int consumer_inserted =
raxTryInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
/* Now we can check if the entry was already busy, and
* in that case reassign the entry to the new consumer,
* or update it if the consumer is the same as before. */
if (group_inserted == 0) {
streamFreeNACK(nack);
nack = raxFind(group->pel,buf,sizeof(buf));
serverAssert(nack != raxNotFound);
raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
/* Update the consumer and NACK metadata. */
nack->consumer = consumer;
nack->delivery_time = mstime();
nack->delivery_count = 1;
/* Add the entry in the new consumer local PEL. */
raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
} else if (group_inserted == 1 && consumer_inserted == 0) {
serverPanic("NACK half-created. Should not be possible.");
}
/* Propagate as XCLAIM. */
if (spi) {
@ -899,7 +956,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
/* This is an helper function for streamReplyWithRange() when called with
* group and consumer arguments, but with a range that is referring to already
* delivered messages. In this case we just emit messages that are already
* in the history of the conusmer, fetching the IDs from its PEL.
* in the history of the consumer, fetching the IDs from its PEL.
*
* Note that this function does not have a 'rev' argument because it's not
* possible to iterate in reverse using a group. Basically this function
@ -1035,7 +1092,7 @@ invalid:
void xaddCommand(client *c) {
streamID id;
int id_given = 0; /* Was an ID different than "*" specified? */
long long maxlen = 0; /* 0 means no maximum length. */
long long maxlen = -1; /* If left to -1 no trimming is performed. */
int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so
the maxium length is not applied verbatim. */
int maxlen_arg_idx = 0; /* Index of the count in MAXLEN, for rewriting. */
@ -1059,6 +1116,11 @@ void xaddCommand(client *c) {
}
if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL)
!= C_OK) return;
if (maxlen < 0) {
addReplyError(c,"The MAXLEN argument must be >= 0.");
return;
}
i++;
maxlen_arg_idx = i;
} else {
@ -1098,7 +1160,7 @@ void xaddCommand(client *c) {
server.dirty++;
/* Remove older elements if MAXLEN was specified. */
if (maxlen) {
if (maxlen >= 0) {
if (!streamTrimByLength(s,maxlen,approx_maxlen)) {
/* If no trimming was performed, for instance because approximated
* trimming length was specified, rewrite the MAXLEN argument
@ -1269,14 +1331,13 @@ void xreadCommand(client *c) {
* starting from now. */
int id_idx = i - streams_arg - streams_count;
robj *key = c->argv[i-streams_count];
robj *o;
robj *o = lookupKeyRead(c->db,key);
if (o && checkType(c,o,OBJ_STREAM)) goto cleanup;
streamCG *group = NULL;
/* If a group was specified, than we need to be sure that the
* key and group actually exist. */
if (groupname) {
o = lookupKeyRead(c->db,key);
if (o && checkType(c,o,OBJ_STREAM)) goto cleanup;
if (o == NULL ||
(group = streamLookupCG(o->ptr,groupname->ptr)) == NULL)
{
@ -1290,8 +1351,14 @@ void xreadCommand(client *c) {
}
if (strcmp(c->argv[i]->ptr,"$") == 0) {
o = lookupKeyRead(c->db,key);
if (o && checkType(c,o,OBJ_STREAM)) goto cleanup;
if (xreadgroup) {
addReplyError(c,"The $ ID is meaningless in the context of "
"XREADGROUP: you want to read the history of "
"this consumer by specifying a proper ID, or "
"use the > ID to get new messages. The $ ID would "
"just return an empty result set.");
goto cleanup;
}
if (o) {
stream *s = o->ptr;
ids[id_idx] = s->last_id;
@ -1301,13 +1368,17 @@ void xreadCommand(client *c) {
}
continue;
} else if (strcmp(c->argv[i]->ptr,">") == 0) {
if (!xreadgroup || groupname == NULL) {
if (!xreadgroup) {
addReplyError(c,"The > ID can be specified only when calling "
"XREADGROUP using the GROUP <group> "
"<consumer> option.");
goto cleanup;
}
ids[id_idx] = group->last_id;
/* We use just the maximum ID to signal this is a ">" ID, anyway
* the code handling the blocking clients will have to update the
* ID later in order to match the changing consumer group last ID. */
ids[id_idx].ms = UINT64_MAX;
ids[id_idx].seq = UINT64_MAX;
continue;
}
if (streamParseIDOrReply(c,c->argv[i],ids+id_idx,0) != C_OK)
@ -1322,9 +1393,36 @@ void xreadCommand(client *c) {
if (o == NULL) continue;
stream *s = o->ptr;
streamID *gt = ids+i; /* ID must be greater than this. */
if (s->last_id.ms > gt->ms ||
(s->last_id.ms == gt->ms && s->last_id.seq > gt->seq))
{
int serve_synchronously = 0;
/* Check if there are the conditions to serve the client synchronously. */
if (groups) {
/* If the consumer is blocked on a group, we always serve it
* synchronously (serving its local history) if the ID specified
* was not the special ">" ID. */
if (gt->ms != UINT64_MAX ||
gt->seq != UINT64_MAX)
{
serve_synchronously = 1;
} else {
/* We also want to serve a consumer in a consumer group
* synchronously in case the group top item delivered is smaller
* than what the stream has inside. */
streamID *last = &groups[i]->last_id;
if (streamCompareID(&s->last_id, last) > 0) {
serve_synchronously = 1;
*gt = *last;
}
}
} else {
/* For consumers without a group, we serve synchronously if we can
* actually provide at least one item from the stream. */
if (streamCompareID(&s->last_id, gt) > 0) {
serve_synchronously = 1;
}
}
if (serve_synchronously) {
arraylen++;
if (arraylen == 1) arraylen_ptr = addDeferredMultiBulkLength(c);
/* streamReplyWithRange() handles the 'start' ID as inclusive,
@ -1336,7 +1434,7 @@ void xreadCommand(client *c) {
/* Emit the two elements sub-array consisting of the name
* of the stream and the data we extracted from it. */
addReplyMultiBulkLen(c,2);
addReplyBulk(c,c->argv[i+streams_arg]);
addReplyBulk(c,c->argv[streams_arg+i]);
streamConsumer *consumer = NULL;
if (groups) consumer = streamLookupConsumer(groups[i],
consumername->ptr,1);
@ -1378,6 +1476,7 @@ void xreadCommand(client *c) {
incrRefCount(consumername);
c->bpop.xread_group = groupname;
c->bpop.xread_consumer = consumername;
c->bpop.xread_group_noack = noack;
} else {
c->bpop.xread_group = NULL;
c->bpop.xread_consumer = NULL;
@ -1516,14 +1615,14 @@ uint64_t streamDelConsumer(streamCG *cg, sds name) {
/* XGROUP CREATE <key> <groupname> <id or $>
* XGROUP SETID <key> <id or $>
* XGROUP DELGROUP <key> <groupname>
* XGROUP DESTROY <key> <groupname>
* XGROUP DELCONSUMER <key> <groupname> <consumername> */
void xgroupCommand(client *c) {
const char *help[] = {
"CREATE <key> <groupname> <id or $> -- Create a new consumer group.",
"SETID <key> <groupname> <id or $> -- Set the current group ID.",
"DELGROUP <key> <groupname> -- Remove the specified group.",
"DELCONSUMER <key> <groupname> <consumer> -- Remove the specified conusmer.",
"DESTROY <key> <groupname> -- Remove the specified group.",
"DELCONSUMER <key> <groupname> <consumer> -- Remove the specified consumer.",
"HELP -- Prints this help.",
NULL
};
@ -1535,14 +1634,13 @@ NULL
/* Lookup the key now, this is common for all the subcommands but HELP. */
if (c->argc >= 4) {
robj *o = lookupKeyWriteOrReply(c,c->argv[2],shared.nokeyerr);
if (o == NULL) return;
if (o == NULL || checkType(c,o,OBJ_STREAM)) return;
s = o->ptr;
grpname = c->argv[3]->ptr;
/* Certain subcommands require the group to exist. */
if ((cg = streamLookupCG(s,grpname)) == NULL &&
(!strcasecmp(opt,"SETID") ||
!strcasecmp(opt,"DELGROUP") ||
!strcasecmp(opt,"DELCONSUMER")))
{
addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' "
@ -1564,22 +1662,46 @@ NULL
if (cg) {
addReply(c,shared.ok);
server.dirty++;
notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-create",
c->argv[2],c->db->id);
} else {
addReplySds(c,
sdsnew("-BUSYGROUP Consumer Group name already exists\r\n"));
}
} else if (!strcasecmp(opt,"SETID") && c->argc == 5) {
} else if (!strcasecmp(opt,"DELGROUP") && c->argc == 4) {
streamID id;
if (!strcmp(c->argv[4]->ptr,"$")) {
id = s->last_id;
} else if (streamParseIDOrReply(c,c->argv[4],&id,0) != C_OK) {
return;
}
cg->last_id = id;
addReply(c,shared.ok);
server.dirty++;
notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-setid",c->argv[2],c->db->id);
} else if (!strcasecmp(opt,"DESTROY") && c->argc == 4) {
if (cg) {
raxRemove(s->cgroups,(unsigned char*)grpname,sdslen(grpname),NULL);
streamFreeCG(cg);
addReply(c,shared.cone);
server.dirty++;
notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-destroy",
c->argv[2],c->db->id);
} else {
addReply(c,shared.czero);
}
} else if (!strcasecmp(opt,"DELCONSUMER") && c->argc == 5) {
/* Delete the consumer and returns the number of pending messages
* that were yet associated with such a consumer. */
long long pending = streamDelConsumer(cg,c->argv[4]->ptr);
addReplyLongLong(c,pending);
server.dirty++;
notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-delconsumer",
c->argv[2],c->db->id);
} else if (!strcasecmp(opt,"HELP")) {
addReplyHelp(c, help);
} else {
addReply(c,shared.syntaxerr);
addReplySubcommandSyntaxError(c);
}
}
@ -1728,8 +1850,10 @@ void xpendingCommand(client *c) {
/* If a consumer name was mentioned but it does not exist, we can
* just return an empty array. */
if (consumername && consumer == NULL)
if (consumername && consumer == NULL) {
addReplyMultiBulkLen(c,0);
return;
}
rax *pel = consumer ? consumer->pel : group->pel;
unsigned char startkey[sizeof(streamID)];
@ -1785,7 +1909,7 @@ void xpendingCommand(client *c) {
* becomes the specified <consumer>. If the minimum idle time specified
* is zero, messages are claimed regardless of their idle time.
*
* All the messages that cannot be found inside the pending entires list
* All the messages that cannot be found inside the pending entries list
* are ignored, but in case the FORCE option is used. In that case we
* create the NACK (representing a not yet acknowledged message) entry in
* the consumer group PEL.
@ -1970,7 +2094,7 @@ void xclaimCommand(client *c) {
nack->delivery_time = deliverytime;
/* Set the delivery attempts counter if given. */
if (retrycount >= 0) nack->delivery_count = retrycount;
/* Add the entry in the new cosnumer local PEL. */
/* Add the entry in the new consumer local PEL. */
raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
/* Send the reply for this entry. */
if (justid) {
@ -1994,32 +2118,36 @@ void xclaimCommand(client *c) {
/* XDEL <key> [<ID1> <ID2> ... <IDN>]
*
* Removes the specified entries from the stream. Returns the number
* of items actaully deleted, that may be different from the number
* of items actually deleted, that may be different from the number
* of IDs passed in case certain IDs do not exist. */
void xdelCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL
|| checkType(c,o,OBJ_STREAM)) return;
stream *s = o->ptr;
/* We need to sanity check the IDs passed to start. Even if not
* a big issue, it is not great that the command is only partially
* executed becuase at some point an invalid ID is parsed. */
* executed because at some point an invalid ID is parsed. */
streamID id;
for (int j = 2; j < c->argc; j++) {
if (streamParseIDOrReply(c,c->argv[j],&id,0) != C_OK) return;
}
/* Actaully apply the command. */
/* Actually apply the command. */
int deleted = 0;
for (int j = 2; j < c->argc; j++) {
streamParseIDOrReply(c,c->argv[j],&id,0); /* Retval already checked. */
deleted += streamDeleteItem(s,&id);
}
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id);
server.dirty += deleted;
/* Propagate the write if needed. */
if (deleted) {
signalModifiedKey(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id);
server.dirty += deleted;
}
addReplyLongLong(c,deleted);
}
@ -2040,7 +2168,7 @@ void xtrimCommand(client *c) {
/* If the key does not exist, we are ok returning zero, that is, the
* number of elements removed from the stream. */
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL
|| checkType(c,o,OBJ_STREAM)) return;
stream *s = o->ptr;
@ -2093,14 +2221,12 @@ void xtrimCommand(client *c) {
/* XINFO CONSUMERS key group
* XINFO GROUPS <key>
* XINFO STREAM <key>
* XINFO <key> (alias of XINFO STREAM key)
* XINFO HELP. */
void xinfoCommand(client *c) {
const char *help[] = {
"CONSUMERS <key> <groupname> -- Show consumer groups of group <groupname>.",
"GROUPS <key> -- Show the stream consumer groups.",
"STREAM <key> -- Show information about the stream.",
"<key> -- Alias for STREAM <key>.",
"HELP -- Print this help.",
NULL
};
@ -2112,20 +2238,19 @@ NULL
if (!strcasecmp(c->argv[1]->ptr,"HELP")) {
addReplyHelp(c, help);
return;
} else if (c->argc < 3) {
addReplyError(c,"syntax error, try 'XINFO HELP'");
return;
}
/* Handle the fact that no subcommand means "STREAM". */
if (c->argc == 2) {
opt = "STREAM";
key = c->argv[1];
} else {
opt = c->argv[1]->ptr;
key = c->argv[2];
}
/* With the exception of HELP handled before any other sub commands, all
* the ones are in the form of "<subcommand> <key>". */
opt = c->argv[1]->ptr;
key = c->argv[2];
/* Lookup the key now, this is common for all the subcommands but HELP. */
robj *o = lookupKeyWriteOrReply(c,key,shared.nokeyerr);
if (o == NULL) return;
if (o == NULL || checkType(c,o,OBJ_STREAM)) return;
s = o->ptr;
/* Dispatch the different subcommands. */
@ -2171,20 +2296,20 @@ NULL
raxSeek(&ri,"^",NULL,0);
while(raxNext(&ri)) {
streamCG *cg = ri.data;
addReplyMultiBulkLen(c,6);
addReplyMultiBulkLen(c,8);
addReplyStatus(c,"name");
addReplyBulkCBuffer(c,ri.key,ri.key_len);
addReplyStatus(c,"consumers");
addReplyLongLong(c,raxSize(cg->consumers));
addReplyStatus(c,"pending");
addReplyLongLong(c,raxSize(cg->pel));
addReplyStatus(c,"last-delivered-id");
addReplyStreamID(c,&cg->last_id);
}
raxStop(&ri);
} else if (c->argc == 2 ||
(!strcasecmp(opt,"STREAM") && c->argc == 3))
{
} else if (!strcasecmp(opt,"STREAM") && c->argc == 3) {
/* XINFO STREAM <key> (or the alias XINFO <key>). */
addReplyMultiBulkLen(c,12);
addReplyMultiBulkLen(c,14);
addReplyStatus(c,"length");
addReplyLongLong(c,s->length);
addReplyStatus(c,"radix-tree-keys");
@ -2193,6 +2318,8 @@ NULL
addReplyLongLong(c,s->rax->numnodes);
addReplyStatus(c,"groups");
addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0);
addReplyStatus(c,"last-generated-id");
addReplyStreamID(c,&s->last_id);
/* To emit the first/last entry we us the streamReplyWithRange()
* API. */
@ -2209,7 +2336,7 @@ NULL
STREAM_RWR_RAWENTRIES,NULL);
if (!count) addReply(c,shared.nullbulk);
} else {
addReplyError(c,"syntax error, try 'XINFO HELP'");
addReplySubcommandSyntaxError(c);
}
}

View File

@ -361,7 +361,7 @@ void incrDecrCommand(client *c, long long incr) {
new = o;
o->ptr = (void*)((long)value);
} else {
new = createStringObjectFromLongLong(value);
new = createStringObjectFromLongLongForValue(value);
if (o) {
dbOverwrite(c->db,c->argv[1],new);
} else {

View File

@ -507,7 +507,7 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
* + means the max string possible
*
* If the string is valid the *dest pointer is set to the redis object
* that will be used for the comparision, and ex will be set to 0 or 1
* that will be used for the comparison, and ex will be set to 0 or 1
* respectively if the item is exclusive or inclusive. C_OK will be
* returned.
*
@ -1100,8 +1100,8 @@ unsigned char *zzlDeleteRangeByRank(unsigned char *zl, unsigned int start, unsig
* Common sorted set API
*----------------------------------------------------------------------------*/
unsigned int zsetLength(const robj *zobj) {
int length = -1;
unsigned long zsetLength(const robj *zobj) {
unsigned long length = 0;
if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
length = zzlLength(zobj->ptr);
} else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
@ -1878,7 +1878,7 @@ void zuiClearIterator(zsetopsrc *op) {
}
}
int zuiLength(zsetopsrc *op) {
unsigned long zuiLength(zsetopsrc *op) {
if (op->subject == NULL)
return 0;
@ -2085,7 +2085,11 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
}
int zuiCompareByCardinality(const void *s1, const void *s2) {
return zuiLength((zsetopsrc*)s1) - zuiLength((zsetopsrc*)s2);
unsigned long first = zuiLength((zsetopsrc*)s1);
unsigned long second = zuiLength((zsetopsrc*)s2);
if (first > second) return 1;
if (first < second) return -1;
return 0;
}
#define REDIS_AGGR_SUM 1
@ -2129,7 +2133,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) {
zsetopsrc *src;
zsetopval zval;
sds tmp;
unsigned int maxelelen = 0;
size_t maxelelen = 0;
robj *dstobj;
zset *dstzset;
zskiplistNode *znode;
@ -2363,8 +2367,8 @@ void zrangeGenericCommand(client *c, int reverse) {
int withscores = 0;
long start;
long end;
int llen;
int rangelen;
long llen;
long rangelen;
if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
(getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
@ -2671,7 +2675,7 @@ void zcountCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
zrangespec range;
int count = 0;
unsigned long count = 0;
/* Parse the range arguments */
if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
@ -2748,7 +2752,7 @@ void zlexcountCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
zlexrangespec range;
int count = 0;
unsigned long count = 0;
/* Parse the range arguments */
if (zslParseLexRange(c->argv[2],c->argv[3],&range) != C_OK) {
@ -3163,8 +3167,8 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
signalModifiedKey(c->db,key);
}
addReplyDouble(c,score);
addReplyBulkCBuffer(c,ele,sdslen(ele));
addReplyDouble(c,score);
sdsfree(ele);
arraylen += 2;
@ -3216,9 +3220,9 @@ void blockingGenericZpopCommand(client *c, int where) {
return;
} else {
if (zsetLength(o) != 0) {
/* Non empty zset, this is like a normal Z[REV]POP. */
/* Non empty zset, this is like a normal ZPOP[MIN|MAX]. */
genericZpopCommand(c,&c->argv[j],1,where,1,NULL);
/* Replicate it as an Z[REV]POP instead of BZ[REV]POP. */
/* Replicate it as an ZPOP[MIN|MAX] instead of BZPOP[MIN|MAX]. */
rewriteClientCommandVector(c,2,
where == ZSET_MAX ? shared.zpopmax : shared.zpopmin,
c->argv[j]);

View File

@ -451,7 +451,7 @@ int string2ld(const char *s, size_t slen, long double *dp) {
/* Convert a double to a string representation. Returns the number of bytes
* required. The representation should always be parsable by strtod(3).
* This function does not support human-friendly formatting like ld2string
* does. It is intented mainly to be used inside t_zset.c when writing scores
* does. It is intended mainly to be used inside t_zset.c when writing scores
* into a ziplist representing a sorted set. */
int d2string(char *buf, size_t len, double value) {
if (isnan(value)) {

View File

@ -27,7 +27,7 @@
* traversal.
*
* <uint16_t zllen> is the number of entries. When there are more than
* 2^16-2 entires, this value is set to 2^16-1 and we need to traverse the
* 2^16-2 entries, this value is set to 2^16-1 and we need to traverse the
* entire list to know how many items it holds.
*
* <uint8_t zlend> is a special entry representing the end of the ziplist.
@ -256,7 +256,7 @@
#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
/* Increment the number of items field in the ziplist header. Note that this
* macro should never overflow the unsigned 16 bit integer, since entires are
* macro should never overflow the unsigned 16 bit integer, since entries are
* always pushed one at a time. When UINT16_MAX is reached we want the count
* to stay there to signal that a full scan is needed to get the number of
* items inside the ziplist. */
@ -269,7 +269,7 @@
* Note that this is not how the data is actually encoded, is just what we
* get filled by a function in order to operate more easily. */
typedef struct zlentry {
unsigned int prevrawlensize; /* Bytes used to encode the previos entry len*/
unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
unsigned int prevrawlen; /* Previous entry len. */
unsigned int lensize; /* Bytes used to encode this entry type/len.
For example strings have a 1, 2 or 5 bytes
@ -431,7 +431,7 @@ unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
/* Return the length of the previous element, and the number of bytes that
* are used in order to encode the previous element length.
* 'ptr' must point to the prevlen prefix of an entry (that encodes the
* length of the previos entry in order to navigate the elements backward).
* length of the previous entry in order to navigate the elements backward).
* The length of the previous entry is stored in 'prevlen', the number of
* bytes needed to encode the previous entry length are stored in
* 'prevlensize'. */

View File

@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
/* This function provide us access to the original libc free(). This is useful
* for instance to free results obtained by backtrace_symbols(). We need
@ -164,7 +165,7 @@ void *zrealloc(void *ptr, size_t size) {
*((size_t*)newptr) = size;
update_zmalloc_stat_free(oldsize);
update_zmalloc_stat_alloc(size);
update_zmalloc_stat_alloc(size+PREFIX_SIZE);
return (char*)newptr+PREFIX_SIZE;
#endif
}
@ -181,6 +182,9 @@ size_t zmalloc_size(void *ptr) {
if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1));
return size+PREFIX_SIZE;
}
size_t zmalloc_usable(void *ptr) {
return zmalloc_usable(ptr)-PREFIX_SIZE;
}
#endif
void zfree(void *ptr) {
@ -379,7 +383,7 @@ size_t zmalloc_get_private_dirty(long pid) {
}
/* Returns the size of physical memory (RAM) in bytes.
* It looks ugly, but this is the cleanest way to achive cross platform results.
* It looks ugly, but this is the cleanest way to achieve cross platform results.
* Cleaned up from:
*
* http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
@ -418,7 +422,7 @@ size_t zmalloc_get_memory_size(void) {
mib[0] = CTL_HW;
#if defined(HW_REALMEM)
mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
#elif defined(HW_PYSMEM)
#elif defined(HW_PHYSMEM)
mib[1] = HW_PHYSMEM; /* Others. ------------------ */
#endif
unsigned int size = 0; /* 32-bit */

View File

@ -63,6 +63,11 @@
#ifndef ZMALLOC_LIB
#define ZMALLOC_LIB "libc"
#ifdef __GLIBC__
#include <malloc.h>
#define HAVE_MALLOC_SIZE 1
#define zmalloc_size(p) malloc_usable_size(p)
#endif
#endif
/* We can enable the Redis defrag capabilities only if we are using Jemalloc
@ -93,6 +98,9 @@ void *zmalloc_no_tcache(size_t size);
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr);
size_t zmalloc_usable(void *ptr);
#else
#define zmalloc_usable(p) zmalloc_size(p)
#endif
#endif /* __ZMALLOC_H */

View File

@ -92,3 +92,80 @@ test "Node #10 should eventually replicate node #5" {
fail "#10 didn't became slave of #5"
}
}
source "../tests/includes/init-tests.tcl"
# Create a cluster with 3 master and 15 slaves, so that we have 5
# slaves for eatch master.
test "Create a 3 nodes cluster" {
create_cluster 3 15
}
test "Cluster is up" {
assert_cluster_state ok
}
test "The first master has actually 5 slaves" {
assert {[llength [lindex [R 0 role] 2]] == 5}
}
test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
set port0 [get_instance_attrib redis 0 port]
assert {[lindex [R 3 role] 2] == $port0}
assert {[lindex [R 6 role] 2] == $port0}
assert {[lindex [R 9 role] 2] == $port0}
assert {[lindex [R 12 role] 2] == $port0}
assert {[lindex [R 15 role] 2] == $port0}
}
test {Instance #3, #6, #9, #12 and #15 synced with the master} {
wait_for_condition 1000 50 {
[RI 3 master_link_status] eq {up} &&
[RI 6 master_link_status] eq {up} &&
[RI 9 master_link_status] eq {up} &&
[RI 12 master_link_status] eq {up} &&
[RI 15 master_link_status] eq {up}
} else {
fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
}
}
proc master_detected {instances} {
foreach instance [dict keys $instances] {
if {[RI $instance role] eq {master}} {
return true
}
}
return false
}
test "New Master down consecutively" {
set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
set loops [expr {[dict size $instances]-1}]
for {set i 0} {$i < $loops} {incr i} {
set master_id -1
foreach instance [dict keys $instances] {
if {[RI $instance role] eq {master}} {
set master_id $instance
break;
}
}
if {$master_id eq -1} {
fail "no master detected, #loop $i"
}
set instances [dict remove $instances $master_id]
kill_instance redis $master_id
wait_for_condition 1000 50 {
[master_detected $instances]
} else {
failover "No failover detected when master $master_id fails"
}
assert_cluster_state ok
}
}

View File

@ -39,6 +39,25 @@ start_server [list overrides [list "dir" $server_path]] {
} {0000000000000000000000000000000000000000}
}
start_server [list overrides [list "dir" $server_path]] {
test {Test RDB stream encoding} {
for {set j 0} {$j < 1000} {incr j} {
if {rand() < 0.9} {
r xadd stream * foo $j
} else {
r xadd stream * bar $j
}
}
r xgroup create stream mygroup 0
r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
set digest [r debug digest]
r debug reload
set newdigest [r debug digest]
assert {$digest eq $newdigest}
r del stream
}
}
# Helper function to start a server and kill it, just to check the error
# logged.
set defaults {}

View File

@ -11,7 +11,7 @@ proc stop_bg_complex_data {handle} {
# partial resyncs attempts, all this while flooding the master with
# write queries.
#
# You can specifiy backlog size, ttl, delay before reconnection, test duration
# You can specify backlog size, ttl, delay before reconnection, test duration
# in seconds, and an additional condition to verify at the end.
#
# If reconnect is > 0, the test actually try to break the connection and

View File

@ -66,3 +66,13 @@ test "SDOWN is triggered by misconfigured instance repling with errors" {
R 0 bgsave
ensure_master_up
}
# We use this test setup to also test command renaming, as a side
# effect of the master going down if we send PONG instead of PING
test "SDOWN is triggered if we rename PING to PONG" {
ensure_master_up
S 4 SENTINEL SET mymaster rename-command PING PONG
ensure_master_down
S 4 SENTINEL SET mymaster rename-command PING PING
ensure_master_up
}

View File

@ -276,6 +276,12 @@ proc start_server {options {code undefined}} {
error_and_quit $config_file $line
}
if {$::wait_server} {
set msg "server started PID: [dict get $srv "pid"]. press any key to continue..."
puts $msg
read stdin 1
}
while 1 {
# check that the server actually started and is ready for connections
if {[exec grep -i "Ready to accept" | wc -l < $stdout] > 0} {

View File

@ -375,3 +375,19 @@ proc start_write_load {host port seconds} {
proc stop_write_load {handle} {
catch {exec /bin/kill -9 $handle}
}
proc K { x y } { set x }
# Shuffle a list. From Tcl wiki. Originally from Steve Cohen that improved
# other versions. Code should be under public domain.
proc lshuffle {list} {
set n [llength $list]
while {$n>0} {
set j [expr {int(rand()*$n)}]
lappend slist [lindex $list $j]
incr n -1
set temp [lindex $list $n]
set list [lreplace [K $list [set list {}]] $j $j $temp]
}
return $slist
}

View File

@ -61,6 +61,7 @@ set ::all_tests {
unit/hyperloglog
unit/lazyfree
unit/wait
unit/pendingquerybuf
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
@ -82,6 +83,8 @@ set ::force_failure 0
set ::timeout 600; # 10 minutes without progresses will quit the test.
set ::last_progress [clock seconds]
set ::active_servers {} ; # Pids of active Redis instances.
set ::dont_clean 0
set ::wait_server 0
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@ -175,6 +178,9 @@ proc s {args} {
}
proc cleanup {} {
if {$::dont_clean} {
return
}
if {!$::quiet} {puts -nonewline "Cleanup: may take some time... "}
flush stdout
catch {exec rm -rf {*}[glob tests/tmp/redis.conf.*]}
@ -224,6 +230,7 @@ proc test_server_cron {} {
if {$elapsed > $::timeout} {
set err "\[[colorstr red TIMEOUT]\]: clients state report follows."
puts $err
lappend ::failed_tests $err
show_clients_state
kill_clients
force_kill_all_servers
@ -410,6 +417,8 @@ proc print_help_screen {} {
"--clients <num> Number of test clients (default 16)."
"--timeout <sec> Test timeout in seconds (default 10 min)."
"--force-failure Force the execution of a test that always fails."
"--dont-clean don't delete redis log files after the run"
"--wait-server wait after server is started (so that you can attach a debugger)"
"--help Print this help screen."
} "\n"]
}
@ -463,6 +472,10 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--clients}} {
set ::numclients $arg
incr j
} elseif {$opt eq {--dont-clean}} {
set ::dont_clean 1
} elseif {$opt eq {--wait-server}} {
set ::wait_server 1
} elseif {$opt eq {--timeout}} {
set ::timeout $arg
incr j

View File

@ -25,6 +25,39 @@ start_server {tags {"dump"}} {
assert {$ttl >= (2569591501-3000) && $ttl <= 2569591501}
r get foo
} {bar}
test {RESTORE can set an absolute expire} {
r set foo bar
set encoded [r dump foo]
r del foo
set now [clock milliseconds]
r restore foo [expr $now+3000] $encoded absttl
set ttl [r pttl foo]
assert {$ttl >= 2998 && $ttl <= 3000}
r get foo
} {bar}
test {RESTORE can set LRU} {
r set foo bar
set encoded [r dump foo]
r del foo
r config set maxmemory-policy allkeys-lru
r restore foo 0 $encoded idletime 1000
set idle [r object idletime foo]
assert {$idle >= 1000 && $idle <= 1002}
r get foo
} {bar}
test {RESTORE can set LFU} {
r set foo bar
set encoded [r dump foo]
r del foo
r config set maxmemory-policy allkeys-lfu
r restore foo 0 $encoded freq 100
set freq [r object freq foo]
assert {$freq == 100}
r get foo
} {bar}
test {RESTORE returns an error of the key already exists} {
r set foo bar
@ -246,7 +279,7 @@ start_server {tags {"dump"}} {
set e
} {*empty string*}
test {MIGRATE with mutliple keys migrate just existing ones} {
test {MIGRATE with multiple keys migrate just existing ones} {
set first [srv 0 client]
r set key1 "v1"
r set key2 "v2"

View File

@ -121,7 +121,7 @@ start_server {tags {"expire"}} {
list $a $b
} {somevalue {}}
test {TTL returns tiem to live in seconds} {
test {TTL returns time to live in seconds} {
r del x
r setex x 10 somevalue
set ttl [r ttl x]

View File

@ -142,3 +142,95 @@ start_server {tags {"maxmemory"}} {
}
}
}
proc test_slave_buffers {cmd_count payload_len limit_memory pipeline} {
start_server {tags {"maxmemory"}} {
start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
# add 100 keys of 100k (10MB total)
for {set j 0} {$j < 100} {incr j} {
$master setrange "key:$j" 100000 asdf
}
$master config set maxmemory-policy allkeys-random
$master config set client-output-buffer-limit "slave 100000000 100000000 60"
$master config set repl-backlog-size [expr {10*1024}]
$slave slaveof $master_host $master_port
wait_for_condition 50 100 {
[s 0 master_link_status] eq {up}
} else {
fail "Replication not started."
}
# measure used memory after the slave connected and set maxmemory
set orig_used [s -1 used_memory]
set orig_client_buf [s -1 mem_clients_normal]
set orig_mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
set orig_used_no_repl [expr {$orig_used - $orig_mem_not_counted_for_evict}]
set limit [expr {$orig_used - $orig_mem_not_counted_for_evict + 20*1024}]
if {$limit_memory==1} {
$master config set maxmemory $limit
}
# put the slave to sleep
set rd_slave [redis_deferring_client]
$rd_slave debug sleep 60
# send some 10mb woth of commands that don't increase the memory usage
if {$pipeline == 1} {
set rd_master [redis_deferring_client -1]
for {set k 0} {$k < $cmd_count} {incr k} {
$rd_master setrange key:0 0 [string repeat A $payload_len]
}
for {set k 0} {$k < $cmd_count} {incr k} {
#$rd_master read
}
} else {
for {set k 0} {$k < $cmd_count} {incr k} {
$master setrange key:0 0 [string repeat A $payload_len]
}
}
set new_used [s -1 used_memory]
set slave_buf [s -1 mem_clients_slaves]
set client_buf [s -1 mem_clients_normal]
set mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
set used_no_repl [expr {$new_used - $mem_not_counted_for_evict}]
set delta [expr {($used_no_repl - $client_buf) - ($orig_used_no_repl - $orig_client_buf)}]
assert {[$master dbsize] == 100}
assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers
assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
$master client kill type slave
set killed_used [s -1 used_memory]
set killed_slave_buf [s -1 mem_clients_slaves]
set killed_mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}]
set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}]
assert {$killed_slave_buf == 0}
assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
}
}
}
test {slave buffer are counted correctly} {
# we wanna use many small commands, and we don't wanna wait long
# so we need to use a pipeline (redis_deferring_client)
# that may cause query buffer to fill and induce eviction, so we disable it
test_slave_buffers 1000000 10 0 1
}
test {slave buffer don't induce eviction} {
# test again with fewer (and bigger) commands without pipeline, but with eviction
test_slave_buffers 100000 100 1 0
}

View File

@ -41,7 +41,7 @@ start_server {tags {"defrag"}} {
test "Active defrag" {
r config set activedefrag no
r config set active-defrag-threshold-lower 5
r config set active-defrag-cycle-min 25
r config set active-defrag-cycle-min 65
r config set active-defrag-cycle-max 75
r config set active-defrag-ignore-bytes 2mb
r config set maxmemory 100mb
@ -66,9 +66,10 @@ start_server {tags {"defrag"}} {
}
# Wait for the active defrag to stop working.
wait_for_condition 100 100 {
wait_for_condition 150 100 {
[s active_defrag_running] eq 0
} else {
after 120 ;# serverCron only updates the info once in 100ms
puts [r info memory]
puts [r memory malloc-stats]
fail "defrag didn't stop."
@ -97,10 +98,15 @@ start_server {tags {"defrag"}} {
r config set active-defrag-ignore-bytes 2mb
r config set maxmemory 0
r config set list-max-ziplist-size 5 ;# list of 10k items will have 2000 quicklist nodes
r config set stream-node-max-entries 5
r hmset hash h1 v1 h2 v2 h3 v3
r lpush list a b c d
r zadd zset 0 a 1 b 2 c 3 d
r sadd set a b c d
r xadd stream * item 1 value a
r xadd stream * item 2 value b
r xgroup create stream mygroup 0
r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
# create big keys with 10k items
set rd [redis_deferring_client]
@ -109,8 +115,9 @@ start_server {tags {"defrag"}} {
$rd lpush biglist [concat "asdfasdfasdf" $j]
$rd zadd bigzset $j [concat "asdfasdfasdf" $j]
$rd sadd bigset [concat "asdfasdfasdf" $j]
$rd xadd bigstream * item 1 value a
}
for {set j 0} {$j < 40000} {incr j} {
for {set j 0} {$j < 50000} {incr j} {
$rd read ; # Discard replies
}
@ -134,7 +141,7 @@ start_server {tags {"defrag"}} {
for {set j 0} {$j < 500000} {incr j} {
$rd read ; # Discard replies
}
assert {[r dbsize] == 500008}
assert {[r dbsize] == 500010}
# create some fragmentation
for {set j 0} {$j < 500000} {incr j 2} {
@ -143,7 +150,7 @@ start_server {tags {"defrag"}} {
for {set j 0} {$j < 500000} {incr j 2} {
$rd read ; # Discard replies
}
assert {[r dbsize] == 250008}
assert {[r dbsize] == 250010}
# start defrag
after 120 ;# serverCron only updates the info once in 100ms
@ -155,6 +162,7 @@ start_server {tags {"defrag"}} {
r config set latency-monitor-threshold 5
r latency reset
set digest [r debug digest]
catch {r config set activedefrag yes} e
if {![string match {DISABLED*} $e]} {
# wait for the active defrag to start working (decision once a second)
@ -168,6 +176,7 @@ start_server {tags {"defrag"}} {
wait_for_condition 500 100 {
[s active_defrag_running] eq 0
} else {
after 120 ;# serverCron only updates the info once in 100ms
puts [r info memory]
puts [r memory malloc-stats]
fail "defrag didn't stop."
@ -193,9 +202,11 @@ start_server {tags {"defrag"}} {
# due to high fragmentation, 10hz, and active-defrag-cycle-max set to 75,
# we expect max latency to be not much higher than 75ms
assert {$max_latency <= 80}
} else {
set _ ""
}
} {}
# verify the data isn't corrupted or changed
set newdigest [r debug digest]
assert {$digest eq $newdigest}
r save ;# saving an rdb iterates over all the data / pointers
} {OK}
}
}

View File

@ -0,0 +1,35 @@
proc info_memory {r property} {
if {[regexp "\r\n$property:(.*?)\r\n" [{*}$r info memory] _ value]} {
set _ $value
}
}
proc prepare_value {size} {
set _v "c"
for {set i 1} {$i < $size} {incr i} {
append _v 0
}
return $_v
}
start_server {tags {"wait"}} {
start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
test "pending querybuf: check size of pending_querybuf after set a big value" {
$slave slaveof $master_host $master_port
set _v [prepare_value [expr 32*1024*1024]]
$master set key $_v
after 2000
set m_usedmemory [info_memory $master used_memory]
set s_usedmemory [info_memory $slave used_memory]
if { $s_usedmemory > $m_usedmemory + 10*1024*1024 } {
fail "the used_memory of slave is too larger than master.Master:$m_usedmemory Slave:$s_usedmemory"
}
}
}}

View File

@ -236,4 +236,50 @@ start_server {tags {"scan"}} {
set first_score [lindex $res 1]
assert {$first_score != 0}
}
test "SCAN regression test for issue #4906" {
for {set k 0} {$k < 100} {incr k} {
r del set
r sadd set x; # Make sure it's not intset encoded
set toremove {}
unset -nocomplain found
array set found {}
# Populate the set
set numele [expr {101+[randomInt 1000]}]
for {set j 0} {$j < $numele} {incr j} {
r sadd set $j
if {$j >= 100} {
lappend toremove $j
}
}
# Start scanning
set cursor 0
set iteration 0
set del_iteration [randomInt 10]
while {!($cursor == 0 && $iteration != 0)} {
lassign [r sscan set $cursor] cursor items
# Mark found items. We expect to find from 0 to 99 at the end
# since those elements will never be removed during the scanning.
foreach i $items {
set found($i) 1
}
incr iteration
# At some point remove most of the items to trigger the
# rehashing to a smaller hash table.
if {$iteration == $del_iteration} {
r srem set {*}$toremove
}
}
# Verify that SSCAN reported everything from 0 to 99
for {set j 0} {$j < 100} {incr j} {
if {![info exists found($j)]} {
fail "SSCAN element missing $j"
}
}
}
}
}

View File

@ -517,7 +517,7 @@ start_server {tags {"scripting"}} {
# Note: keep this test at the end of this server stanza because it
# kills the server.
test {SHUTDOWN NOSAVE can kill a timedout script anyway} {
# The server sould be still unresponding to normal commands.
# The server could be still unresponding to normal commands.
catch {r ping} e
assert_match {BUSY*} $e
catch {r shutdown nosave}

View File

@ -78,4 +78,14 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
set e [lindex [r slowlog get] 0]
assert_equal {lastentry_client} [lindex $e 5]
}
test {SLOWLOG - can be disabled} {
r config set slowlog-log-slower-than 1
r slowlog reset
assert_equal [r slowlog len] 1
r config set slowlog-log-slower-than -1
r slowlog reset
r debug sleep 0.2
assert_equal [r slowlog len] 0
}
}

View File

@ -81,4 +81,19 @@ start_server {
# just ID2.
assert {[r XACK mystream mygroup $id1 $id2] eq 1}
}
test {PEL NACK reassignment after XGROUP SETID event} {
r del events
r xadd events * f1 v1
r xadd events * f1 v1
r xadd events * f1 v1
r xadd events * f1 v1
r xgroup create events g1 $
r xadd events * f1 v1
set c [llength [lindex [r xreadgroup group g1 c1 streams events >] 0 1]]
assert {$c == 1}
r xgroup setid events g1 -
set c [llength [lindex [r xreadgroup group g1 c2 streams events >] 0 1]]
assert {$c == 5}
}
}

View File

@ -234,6 +234,53 @@ start_server {
assert {[lindex $res 0 1 1 1] eq {field two}}
}
test {XDEL basic test} {
r del somestream
r xadd somestream * foo value0
set id [r xadd somestream * foo value1]
r xadd somestream * foo value2
r xdel somestream $id
assert {[r xlen somestream] == 2}
set result [r xrange somestream - +]
assert {[lindex $result 0 1 1] eq {value0}}
assert {[lindex $result 1 1 1] eq {value2}}
}
# Here the idea is to check the consistency of the stream data structure
# as we remove all the elements down to zero elements.
test {XDEL fuzz test} {
r del somestream
set ids {}
set x 0; # Length of the stream
while 1 {
lappend ids [r xadd somestream * item $x]
incr x
# Add enough elements to have a few radix tree nodes inside the stream.
if {[dict get [r xinfo stream somestream] radix-tree-keys] > 20} break
}
# Now remove all the elements till we reach an empty stream
# and after every deletion, check that the stream is sane enough
# to report the right number of elements with XRANGE: this will also
# force accessing the whole data structure to check sanity.
assert {[r xlen somestream] == $x}
# We want to remove elements in random order to really test the
# implementation in a better way.
set ids [lshuffle $ids]
foreach id $ids {
assert {[r xdel somestream $id] == 1}
incr x -1
assert {[r xlen somestream] == $x}
# The test would be too slow calling XRANGE for every iteration.
# Do it every 100 removal.
if {$x % 100 == 0} {
set res [r xrange somestream - +]
assert {[llength $res] == $x}
}
}
}
test {XRANGE fuzzing} {
set low_id [lindex $items 0 0]
set high_id [lindex $items end 0]
@ -253,4 +300,20 @@ start_server {
}
}
}
test {XREVRANGE regression test for issue #5006} {
# Add non compressed entries
r xadd teststream 1234567891230 key1 value1
r xadd teststream 1234567891240 key2 value2
r xadd teststream 1234567891250 key3 value3
# Add SAMEFIELD compressed entries
r xadd teststream2 1234567891230 key1 value1
r xadd teststream2 1234567891240 key1 value2
r xadd teststream2 1234567891250 key1 value3
assert_equal [r xrevrange teststream 1234567891245 -] {{1234567891240-0 {key2 value2}} {1234567891230-0 {key1 value1}}}
assert_equal [r xrevrange teststream2 1234567891245 -] {{1234567891240-0 {key1 value2}} {1234567891230-0 {key1 value1}}}
}
}

View File

@ -84,7 +84,7 @@ start_server {tags {"zset"}} {
set err
} {ERR*}
test "ZADD NX with non exisitng key" {
test "ZADD NX with non existing key" {
r del ztmp
r zadd ztmp nx 10 x 20 y 30 z
assert {[r zcard ztmp] == 3}
@ -653,11 +653,11 @@ start_server {tags {"zset"}} {
r del zset
assert_equal {} [r zpopmin zset]
create_zset zset {-1 a 1 b 2 c 3 d 4 e}
assert_equal {-1 a} [r zpopmin zset]
assert_equal {1 b} [r zpopmin zset]
assert_equal {4 e} [r zpopmax zset]
assert_equal {3 d} [r zpopmax zset]
assert_equal {2 c} [r zpopmin zset]
assert_equal {a -1} [r zpopmin zset]
assert_equal {b 1} [r zpopmin zset]
assert_equal {e 4} [r zpopmax zset]
assert_equal {d 3} [r zpopmax zset]
assert_equal {c 2} [r zpopmin zset]
assert_equal 0 [r exists zset]
r set foo bar
assert_error "*WRONGTYPE*" {r zpopmin foo}
@ -669,8 +669,8 @@ start_server {tags {"zset"}} {
assert_equal {} [r zpopmin z1 2]
assert_error "*WRONGTYPE*" {r zpopmin foo 2}
create_zset z1 {0 a 1 b 2 c 3 d}
assert_equal {0 a 1 b} [r zpopmin z1 2]
assert_equal {3 d 2 c} [r zpopmax z1 2]
assert_equal {a 0 b 1} [r zpopmin z1 2]
assert_equal {d 3 c 2} [r zpopmax z1 2]
}
test "BZPOP with a single existing sorted set - $encoding" {
@ -678,11 +678,11 @@ start_server {tags {"zset"}} {
create_zset zset {0 a 1 b 2 c}
$rd bzpopmin zset 5
assert_equal {zset 0 a} [$rd read]
assert_equal {zset a 0} [$rd read]
$rd bzpopmin zset 5
assert_equal {zset 1 b} [$rd read]
assert_equal {zset b 1} [$rd read]
$rd bzpopmax zset 5
assert_equal {zset 2 c} [$rd read]
assert_equal {zset c 2} [$rd read]
assert_equal 0 [r exists zset]
}
@ -692,16 +692,16 @@ start_server {tags {"zset"}} {
create_zset z2 {3 d 4 e 5 f}
$rd bzpopmin z1 z2 5
assert_equal {z1 0 a} [$rd read]
assert_equal {z1 a 0} [$rd read]
$rd bzpopmax z1 z2 5
assert_equal {z1 2 c} [$rd read]
assert_equal {z1 c 2} [$rd read]
assert_equal 1 [r zcard z1]
assert_equal 3 [r zcard z2]
$rd bzpopmax z2 z1 5
assert_equal {z2 5 f} [$rd read]
assert_equal {z2 f 5} [$rd read]
$rd bzpopmin z2 z1 5
assert_equal {z2 3 d} [$rd read]
assert_equal {z2 d 3} [$rd read]
assert_equal 1 [r zcard z1]
assert_equal 1 [r zcard z2]
}
@ -711,9 +711,9 @@ start_server {tags {"zset"}} {
r del z1
create_zset z2 {3 d 4 e 5 f}
$rd bzpopmax z1 z2 5
assert_equal {z2 5 f} [$rd read]
assert_equal {z2 f 5} [$rd read]
$rd bzpopmin z2 z1 5
assert_equal {z2 3 d} [$rd read]
assert_equal {z2 d 3} [$rd read]
assert_equal 0 [r zcard z1]
assert_equal 1 [r zcard z2]
}
@ -1107,7 +1107,7 @@ start_server {tags {"zset"}} {
r del zset
r zadd zset 1 bar
$rd read
} {zset 1 bar}
} {zset bar 1}
test "BZPOPMIN, ZADD + DEL + SET should not awake blocked client" {
set rd [redis_deferring_client]
@ -1124,7 +1124,7 @@ start_server {tags {"zset"}} {
r del zset
r zadd zset 1 bar
$rd read
} {zset 1 bar}
} {zset bar 1}
test "BZPOPMIN with same key multiple times should work" {
set rd [redis_deferring_client]
@ -1133,18 +1133,18 @@ start_server {tags {"zset"}} {
# Data arriving after the BZPOPMIN.
$rd bzpopmin z1 z2 z2 z1 0
r zadd z1 0 a
assert_equal [$rd read] {z1 0 a}
assert_equal [$rd read] {z1 a 0}
$rd bzpopmin z1 z2 z2 z1 0
r zadd z2 1 b
assert_equal [$rd read] {z2 1 b}
assert_equal [$rd read] {z2 b 1}
# Data already there.
r zadd z1 0 a
r zadd z2 1 b
$rd bzpopmin z1 z2 z2 z1 0
assert_equal [$rd read] {z1 0 a}
assert_equal [$rd read] {z1 a 0}
$rd bzpopmin z1 z2 z2 z1 0
assert_equal [$rd read] {z2 1 b}
assert_equal [$rd read] {z2 b 1}
}
test "MULTI/EXEC is isolated from the point of view of BZPOPMIN" {
@ -1157,7 +1157,7 @@ start_server {tags {"zset"}} {
r zadd zset 2 c
r exec
$rd read
} {zset 0 a}
} {zset a 0}
test "BZPOPMIN with variadic ZADD" {
set rd [redis_deferring_client]
@ -1167,7 +1167,7 @@ start_server {tags {"zset"}} {
if {$::valgrind} {after 100}
assert_equal 2 [r zadd zset -1 foo 1 bar]
if {$::valgrind} {after 100}
assert_equal {zset -1 foo} [$rd read]
assert_equal {zset foo -1} [$rd read]
assert_equal {bar} [r zrange zset 0 -1]
}
@ -1177,7 +1177,7 @@ start_server {tags {"zset"}} {
$rd bzpopmin zset 0
after 1000
r zadd zset 0 foo
assert_equal {zset 0 foo} [$rd read]
assert_equal {zset foo 0} [$rd read]
}
}

View File

@ -14,7 +14,8 @@ GROUPS = [
"scripting",
"hyperloglog",
"cluster",
"geo"
"geo",
"stream"
].freeze
GROUPS_BY_NAME = Hash[*

View File

@ -5,7 +5,7 @@ rehashing.c
Visually show buckets in the two hash tables between rehashings. Also stress
test getRandomKeys() implementation, that may actually disappear from
Redis soon, however visualizaiton some code is reusable in new bugs
Redis soon, however visualization some code is reusable in new bugs
investigation.
Compile with: