2012-11-08 12:25:23 -05:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2015-07-26 09:14:57 -04:00
|
|
|
#include "server.h"
|
2013-10-09 09:37:20 -04:00
|
|
|
#include "cluster.h"
|
2015-09-28 04:47:45 -04:00
|
|
|
#include "atomicvar.h"
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
#include <signal.h>
|
2011-11-09 10:51:19 -05:00
|
|
|
#include <ctype.h>
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* C-level DB API
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
2018-10-19 06:00:57 -04:00
|
|
|
int keyIsExpired(redisDb *db, robj *key);
|
|
|
|
|
2017-10-15 08:17:55 -04:00
|
|
|
/* Update LFU when an object is accessed.
|
|
|
|
* Firstly, decrement the counter if the decrement time is reached.
|
|
|
|
* Then logarithmically increment the counter, and update the access time. */
|
|
|
|
void updateLFU(robj *val) {
|
|
|
|
unsigned long counter = LFUDecrAndReturn(val);
|
|
|
|
counter = LFULogIncr(counter);
|
|
|
|
val->lru = (LFUGetTimeInMinutes()<<8) | counter;
|
|
|
|
}
|
|
|
|
|
2016-06-14 09:33:59 -04:00
|
|
|
/* Low level key lookup API, not actually called directly from commands
|
|
|
|
* implementations that should instead rely on lookupKeyRead(),
|
|
|
|
* lookupKeyWrite() and lookupKeyReadWithFlags(). */
|
|
|
|
robj *lookupKey(redisDb *db, robj *key, int flags) {
|
2010-06-21 18:07:48 -04:00
|
|
|
dictEntry *de = dictFind(db->dict,key->ptr);
|
|
|
|
if (de) {
|
2011-11-08 11:07:55 -05:00
|
|
|
robj *val = dictGetVal(de);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2013-01-16 12:00:20 -05:00
|
|
|
/* Update the access time for the ageing algorithm.
|
2010-11-02 13:59:48 -04:00
|
|
|
* Don't do it if we have a saving child, as this will trigger
|
|
|
|
* a copy on write madness. */
|
2019-09-27 06:03:09 -04:00
|
|
|
if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)){
|
2016-07-15 06:12:52 -04:00
|
|
|
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
|
2017-10-15 08:17:55 -04:00
|
|
|
updateLFU(val);
|
2016-07-15 06:12:52 -04:00
|
|
|
} else {
|
|
|
|
val->lru = LRU_CLOCK();
|
|
|
|
}
|
2016-06-14 09:33:59 -04:00
|
|
|
}
|
2010-06-21 18:07:48 -04:00
|
|
|
return val;
|
|
|
|
} else {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-14 09:33:59 -04:00
|
|
|
/* Lookup a key for read operations, or return NULL if the key is not found
|
|
|
|
* in the specified DB.
|
|
|
|
*
|
|
|
|
* As a side effect of calling this function:
|
|
|
|
* 1. A key gets expired if it reached it's TTL.
|
|
|
|
* 2. The key last access time is updated.
|
|
|
|
* 3. The global keys hits/misses stats are updated (reported in INFO).
|
2019-03-21 14:33:11 -04:00
|
|
|
* 4. If keyspace notifications are enabled, a "keymiss" notification is fired.
|
2016-06-14 09:33:59 -04:00
|
|
|
*
|
|
|
|
* This API should not be used when we write to the key after obtaining
|
|
|
|
* the object linked to the key, but only for read only operations.
|
|
|
|
*
|
|
|
|
* Flags change the behavior of this command:
|
|
|
|
*
|
|
|
|
* LOOKUP_NONE (or zero): no special flags are passed.
|
|
|
|
* LOOKUP_NOTOUCH: don't alter the last access time of the key.
|
|
|
|
*
|
2018-06-21 10:08:09 -04:00
|
|
|
* Note: this function also returns NULL if the key is logically expired
|
2016-06-14 09:33:59 -04:00
|
|
|
* but still existing, in case this is a slave, since this API is called only
|
|
|
|
* for read operations. Even if the key expiry is master-driven, we can
|
|
|
|
* correctly report a key is expired on slaves even if the master is lagging
|
|
|
|
* expiring our key via DELs in the replication link. */
|
|
|
|
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
|
2012-02-01 15:47:41 -05:00
|
|
|
robj *val;
|
|
|
|
|
Better read-only behavior for expired keys in slaves.
Slaves key expire is orchestrated by the master. Sometimes the master
will send the synthesized DEL to expire keys on the slave with a non
trivial delay (when the key is not accessed, only the incremental expiry
algorithm will expire it in background).
During that time, a key is logically expired, but slaves still return
the key if you GET (or whatever) it. This is a bad behavior.
However we can't simply trust the slave view of the key, since we need
the master to be able to send write commands to update the slave data
set, and DELs should only happen when the key is expired in the master
in order to ensure consistency.
However 99.99% of the issues with this behavior is when a client which
is not a master sends a read only command. In this case we are safe and
can consider the key as non existing.
This commit does a few changes in order to make this sane:
1. lookupKeyRead() is modified in order to return NULL if the above
conditions are met.
2. Calls to lookupKeyRead() in commands actually writing to the data set
are repliaced with calls to lookupKeyWrite().
There are redundand checks, so for example, if in "2" something was
overlooked, we should be still safe, since anyway, when the master
writes the behavior is to don't care about what expireIfneeded()
returns.
This commit is related to #1768, #1770, #2131.
2014-12-10 10:10:21 -05:00
|
|
|
if (expireIfNeeded(db,key) == 1) {
|
|
|
|
/* Key expired. If we are in the context of a master, expireIfNeeded()
|
2017-06-13 04:35:51 -04:00
|
|
|
* returns 0 only when the key does not exist at all, so it's safe
|
Better read-only behavior for expired keys in slaves.
Slaves key expire is orchestrated by the master. Sometimes the master
will send the synthesized DEL to expire keys on the slave with a non
trivial delay (when the key is not accessed, only the incremental expiry
algorithm will expire it in background).
During that time, a key is logically expired, but slaves still return
the key if you GET (or whatever) it. This is a bad behavior.
However we can't simply trust the slave view of the key, since we need
the master to be able to send write commands to update the slave data
set, and DELs should only happen when the key is expired in the master
in order to ensure consistency.
However 99.99% of the issues with this behavior is when a client which
is not a master sends a read only command. In this case we are safe and
can consider the key as non existing.
This commit does a few changes in order to make this sane:
1. lookupKeyRead() is modified in order to return NULL if the above
conditions are met.
2. Calls to lookupKeyRead() in commands actually writing to the data set
are repliaced with calls to lookupKeyWrite().
There are redundand checks, so for example, if in "2" something was
overlooked, we should be still safe, since anyway, when the master
writes the behavior is to don't care about what expireIfneeded()
returns.
This commit is related to #1768, #1770, #2131.
2014-12-10 10:10:21 -05:00
|
|
|
* to return NULL ASAP. */
|
2018-10-19 06:16:29 -04:00
|
|
|
if (server.masterhost == NULL) {
|
|
|
|
server.stat_keyspace_misses++;
|
2019-03-21 14:33:11 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
|
2018-10-19 06:16:29 -04:00
|
|
|
return NULL;
|
|
|
|
}
|
Better read-only behavior for expired keys in slaves.
Slaves key expire is orchestrated by the master. Sometimes the master
will send the synthesized DEL to expire keys on the slave with a non
trivial delay (when the key is not accessed, only the incremental expiry
algorithm will expire it in background).
During that time, a key is logically expired, but slaves still return
the key if you GET (or whatever) it. This is a bad behavior.
However we can't simply trust the slave view of the key, since we need
the master to be able to send write commands to update the slave data
set, and DELs should only happen when the key is expired in the master
in order to ensure consistency.
However 99.99% of the issues with this behavior is when a client which
is not a master sends a read only command. In this case we are safe and
can consider the key as non existing.
This commit does a few changes in order to make this sane:
1. lookupKeyRead() is modified in order to return NULL if the above
conditions are met.
2. Calls to lookupKeyRead() in commands actually writing to the data set
are repliaced with calls to lookupKeyWrite().
There are redundand checks, so for example, if in "2" something was
overlooked, we should be still safe, since anyway, when the master
writes the behavior is to don't care about what expireIfneeded()
returns.
This commit is related to #1768, #1770, #2131.
2014-12-10 10:10:21 -05:00
|
|
|
|
|
|
|
/* However if we are in the context of a slave, expireIfNeeded() will
|
|
|
|
* not really try to expire the key, it only returns information
|
|
|
|
* about the "logical" status of the key: key expiring is up to the
|
|
|
|
* master in order to have a consistent view of master's data set.
|
|
|
|
*
|
|
|
|
* However, if the command caller is not the master, and as additional
|
|
|
|
* safety measure, the command invoked is a read-only command, we can
|
|
|
|
* safely return NULL here, and provide a more consistent behavior
|
|
|
|
* to clients accessign expired values in a read-only fashion, that
|
2018-07-01 01:24:50 -04:00
|
|
|
* will say the key as non existing.
|
Better read-only behavior for expired keys in slaves.
Slaves key expire is orchestrated by the master. Sometimes the master
will send the synthesized DEL to expire keys on the slave with a non
trivial delay (when the key is not accessed, only the incremental expiry
algorithm will expire it in background).
During that time, a key is logically expired, but slaves still return
the key if you GET (or whatever) it. This is a bad behavior.
However we can't simply trust the slave view of the key, since we need
the master to be able to send write commands to update the slave data
set, and DELs should only happen when the key is expired in the master
in order to ensure consistency.
However 99.99% of the issues with this behavior is when a client which
is not a master sends a read only command. In this case we are safe and
can consider the key as non existing.
This commit does a few changes in order to make this sane:
1. lookupKeyRead() is modified in order to return NULL if the above
conditions are met.
2. Calls to lookupKeyRead() in commands actually writing to the data set
are repliaced with calls to lookupKeyWrite().
There are redundand checks, so for example, if in "2" something was
overlooked, we should be still safe, since anyway, when the master
writes the behavior is to don't care about what expireIfneeded()
returns.
This commit is related to #1768, #1770, #2131.
2014-12-10 10:10:21 -05:00
|
|
|
*
|
|
|
|
* Notably this covers GETs when slaves are used to scale reads. */
|
|
|
|
if (server.current_client &&
|
|
|
|
server.current_client != server.master &&
|
|
|
|
server.current_client->cmd &&
|
2015-07-27 03:41:48 -04:00
|
|
|
server.current_client->cmd->flags & CMD_READONLY)
|
Better read-only behavior for expired keys in slaves.
Slaves key expire is orchestrated by the master. Sometimes the master
will send the synthesized DEL to expire keys on the slave with a non
trivial delay (when the key is not accessed, only the incremental expiry
algorithm will expire it in background).
During that time, a key is logically expired, but slaves still return
the key if you GET (or whatever) it. This is a bad behavior.
However we can't simply trust the slave view of the key, since we need
the master to be able to send write commands to update the slave data
set, and DELs should only happen when the key is expired in the master
in order to ensure consistency.
However 99.99% of the issues with this behavior is when a client which
is not a master sends a read only command. In this case we are safe and
can consider the key as non existing.
This commit does a few changes in order to make this sane:
1. lookupKeyRead() is modified in order to return NULL if the above
conditions are met.
2. Calls to lookupKeyRead() in commands actually writing to the data set
are repliaced with calls to lookupKeyWrite().
There are redundand checks, so for example, if in "2" something was
overlooked, we should be still safe, since anyway, when the master
writes the behavior is to don't care about what expireIfneeded()
returns.
This commit is related to #1768, #1770, #2131.
2014-12-10 10:10:21 -05:00
|
|
|
{
|
2018-10-19 06:16:29 -04:00
|
|
|
server.stat_keyspace_misses++;
|
2019-03-21 14:33:11 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
|
Better read-only behavior for expired keys in slaves.
Slaves key expire is orchestrated by the master. Sometimes the master
will send the synthesized DEL to expire keys on the slave with a non
trivial delay (when the key is not accessed, only the incremental expiry
algorithm will expire it in background).
During that time, a key is logically expired, but slaves still return
the key if you GET (or whatever) it. This is a bad behavior.
However we can't simply trust the slave view of the key, since we need
the master to be able to send write commands to update the slave data
set, and DELs should only happen when the key is expired in the master
in order to ensure consistency.
However 99.99% of the issues with this behavior is when a client which
is not a master sends a read only command. In this case we are safe and
can consider the key as non existing.
This commit does a few changes in order to make this sane:
1. lookupKeyRead() is modified in order to return NULL if the above
conditions are met.
2. Calls to lookupKeyRead() in commands actually writing to the data set
are repliaced with calls to lookupKeyWrite().
There are redundand checks, so for example, if in "2" something was
overlooked, we should be still safe, since anyway, when the master
writes the behavior is to don't care about what expireIfneeded()
returns.
This commit is related to #1768, #1770, #2131.
2014-12-10 10:10:21 -05:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
2016-06-14 09:33:59 -04:00
|
|
|
val = lookupKey(db,key,flags);
|
2019-03-19 07:11:37 -04:00
|
|
|
if (val == NULL) {
|
2012-02-01 15:47:41 -05:00
|
|
|
server.stat_keyspace_misses++;
|
2019-03-21 14:33:11 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
|
2019-03-19 07:11:37 -04:00
|
|
|
}
|
2012-02-01 15:47:41 -05:00
|
|
|
else
|
|
|
|
server.stat_keyspace_hits++;
|
|
|
|
return val;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2016-06-14 09:33:59 -04:00
|
|
|
/* Like lookupKeyReadWithFlags(), but does not use any flag, which is the
|
|
|
|
* common case. */
|
|
|
|
robj *lookupKeyRead(redisDb *db, robj *key) {
|
|
|
|
return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lookup a key for write operations, and as a side effect, if needed, expires
|
|
|
|
* the key if its TTL is reached.
|
|
|
|
*
|
|
|
|
* Returns the linked value object if the key exists or NULL if the key
|
|
|
|
* does not exist in the specified DB. */
|
2019-10-23 04:53:15 -04:00
|
|
|
robj *lookupKeyWriteWithFlags(redisDb *db, robj *key, int flags) {
|
2010-08-02 12:13:39 -04:00
|
|
|
expireIfNeeded(db,key);
|
2019-10-23 04:53:15 -04:00
|
|
|
return lookupKey(db,key,flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
robj *lookupKeyWrite(redisDb *db, robj *key) {
|
|
|
|
return lookupKeyWriteWithFlags(db, key, LOOKUP_NONE);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *o = lookupKeyRead(c->db, key);
|
|
|
|
if (!o) addReply(c,reply);
|
|
|
|
return o;
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *o = lookupKeyWrite(c->db, key);
|
|
|
|
if (!o) addReply(c,reply);
|
|
|
|
return o;
|
|
|
|
}
|
|
|
|
|
2011-06-14 09:34:27 -04:00
|
|
|
/* Add the key to the DB. It's up to the caller to increment the reference
|
2013-01-16 12:00:20 -05:00
|
|
|
* counter of the value if needed.
|
2011-06-14 09:34:27 -04:00
|
|
|
*
|
|
|
|
* The program is aborted if the key already exists. */
|
|
|
|
void dbAdd(redisDb *db, robj *key, robj *val) {
|
|
|
|
sds copy = sdsdup(key->ptr);
|
|
|
|
int retval = dictAdd(db->dict, copy, val);
|
|
|
|
|
2016-04-25 09:49:57 -04:00
|
|
|
serverAssertWithInfo(NULL,key,retval == DICT_OK);
|
2018-04-29 19:10:42 -04:00
|
|
|
if (val->type == OBJ_LIST ||
|
|
|
|
val->type == OBJ_ZSET)
|
|
|
|
signalKeyAsReady(db, key);
|
2013-02-22 04:16:21 -05:00
|
|
|
if (server.cluster_enabled) slotToKeyAdd(key);
|
2017-09-06 09:43:28 -04:00
|
|
|
}
|
2011-06-14 09:34:27 -04:00
|
|
|
|
|
|
|
/* Overwrite an existing key with a new value. Incrementing the reference
|
|
|
|
* count of the new value is up to the caller.
|
|
|
|
* This function does not modify the expire time of the existing key.
|
|
|
|
*
|
|
|
|
* The program is aborted if the key was not already present. */
|
|
|
|
void dbOverwrite(redisDb *db, robj *key, robj *val) {
|
2014-03-20 11:20:37 -04:00
|
|
|
dictEntry *de = dictFind(db->dict,key->ptr);
|
2014-06-26 12:48:40 -04:00
|
|
|
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssertWithInfo(NULL,key,de != NULL);
|
2018-07-31 00:07:57 -04:00
|
|
|
dictEntry auxentry = *de;
|
|
|
|
robj *old = dictGetVal(de);
|
2016-07-18 07:49:31 -04:00
|
|
|
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
|
2018-07-31 00:07:57 -04:00
|
|
|
val->lru = old->lru;
|
|
|
|
}
|
|
|
|
dictSetVal(db->dict, de, val);
|
|
|
|
|
|
|
|
if (server.lazyfree_lazy_server_del) {
|
|
|
|
freeObjAsync(old);
|
|
|
|
dictSetVal(db->dict, &auxentry, NULL);
|
2016-07-18 07:49:31 -04:00
|
|
|
}
|
2018-07-31 00:07:57 -04:00
|
|
|
|
|
|
|
dictFreeVal(db->dict, &auxentry);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2011-06-14 09:34:27 -04:00
|
|
|
/* High level Set operation. This function can be used in order to set
|
|
|
|
* a key, whatever it was existing or not, to a new object.
|
2010-06-21 18:07:48 -04:00
|
|
|
*
|
2011-06-14 09:34:27 -04:00
|
|
|
* 1) The ref count of the value object is incremented.
|
|
|
|
* 2) clients WATCHing for the destination key notified.
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
* 3) The expire time of the key is reset (the key is made persistent).
|
|
|
|
*
|
2017-08-29 02:44:05 -04:00
|
|
|
* All the new keys in the database should be created via this interface. */
|
2011-06-14 09:34:27 -04:00
|
|
|
void setKey(redisDb *db, robj *key, robj *val) {
|
|
|
|
if (lookupKeyWrite(db,key) == NULL) {
|
|
|
|
dbAdd(db,key,val);
|
2010-06-21 18:07:48 -04:00
|
|
|
} else {
|
2011-06-14 09:34:27 -04:00
|
|
|
dbOverwrite(db,key,val);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
2011-06-14 09:34:27 -04:00
|
|
|
incrRefCount(val);
|
|
|
|
removeExpire(db,key);
|
2012-01-30 04:27:50 -05:00
|
|
|
signalModifiedKey(db,key);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
int dbExists(redisDb *db, robj *key) {
|
|
|
|
return dictFind(db->dict,key->ptr) != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return a random key, in form of a Redis object.
|
|
|
|
* If there are no keys, NULL is returned.
|
|
|
|
*
|
|
|
|
* The function makes sure to return keys not already expired. */
|
|
|
|
robj *dbRandomKey(redisDb *db) {
|
2014-03-20 11:20:37 -04:00
|
|
|
dictEntry *de;
|
2018-06-14 12:08:05 -04:00
|
|
|
int maxtries = 100;
|
|
|
|
int allvolatile = dictSize(db->dict) == dictSize(db->expires);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
while(1) {
|
|
|
|
sds key;
|
|
|
|
robj *keyobj;
|
|
|
|
|
2019-02-19 11:29:51 -05:00
|
|
|
de = dictGetFairRandomKey(db->dict);
|
2010-06-21 18:07:48 -04:00
|
|
|
if (de == NULL) return NULL;
|
|
|
|
|
2011-11-08 11:07:55 -05:00
|
|
|
key = dictGetKey(de);
|
2010-06-21 18:07:48 -04:00
|
|
|
keyobj = createStringObject(key,sdslen(key));
|
|
|
|
if (dictFind(db->expires,key)) {
|
2018-06-14 12:08:05 -04:00
|
|
|
if (allvolatile && server.masterhost && --maxtries == 0) {
|
|
|
|
/* If the DB is composed only of keys with an expire set,
|
|
|
|
* it could happen that all the keys are already logically
|
|
|
|
* expired in the slave, so the function cannot stop because
|
|
|
|
* expireIfNeeded() is false, nor it can stop because
|
|
|
|
* dictGetRandomKey() returns NULL (there are keys to return).
|
|
|
|
* To prevent the infinite loop we do some tries, but if there
|
|
|
|
* are the conditions for an infinite loop, eventually we
|
|
|
|
* return a key name that may be already expired. */
|
|
|
|
return keyobj;
|
|
|
|
}
|
2010-06-21 18:07:48 -04:00
|
|
|
if (expireIfNeeded(db,keyobj)) {
|
|
|
|
decrRefCount(keyobj);
|
|
|
|
continue; /* search for another key. This expired. */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return keyobj;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Delete a key, value, and associated expiration entry if any, from the DB */
|
2015-07-30 05:46:31 -04:00
|
|
|
int dbSyncDelete(redisDb *db, robj *key) {
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Deleting an entry from the expires dict will not free the sds of
|
|
|
|
* the key, because it is shared with the main dictionary. */
|
|
|
|
if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
|
2011-04-28 13:00:33 -04:00
|
|
|
if (dictDelete(db->dict,key->ptr) == DICT_OK) {
|
2013-02-22 04:16:21 -05:00
|
|
|
if (server.cluster_enabled) slotToKeyDel(key);
|
2011-04-28 13:00:33 -04:00
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-30 05:46:31 -04:00
|
|
|
/* This is a wrapper whose behavior depends on the Redis lazy free
|
|
|
|
* configuration. Deletes the key synchronously or asynchronously. */
|
|
|
|
int dbDelete(redisDb *db, robj *key) {
|
2015-10-02 09:27:57 -04:00
|
|
|
return server.lazyfree_lazy_server_del ? dbAsyncDelete(db,key) :
|
|
|
|
dbSyncDelete(db,key);
|
2015-07-30 05:46:31 -04:00
|
|
|
}
|
|
|
|
|
2014-03-30 12:32:17 -04:00
|
|
|
/* Prepare the string object stored at 'key' to be modified destructively
|
|
|
|
* to implement commands like SETBIT or APPEND.
|
|
|
|
*
|
|
|
|
* An object is usually ready to be modified unless one of the two conditions
|
|
|
|
* are true:
|
|
|
|
*
|
|
|
|
* 1) The object 'o' is shared (refcount > 1), we don't want to affect
|
|
|
|
* other users.
|
|
|
|
* 2) The object encoding is not "RAW".
|
|
|
|
*
|
|
|
|
* If the object is found in one of the above conditions (or both) by the
|
|
|
|
* function, an unshared / not-encoded copy of the string object is stored
|
|
|
|
* at 'key' in the specified 'db'. Otherwise the object 'o' itself is
|
|
|
|
* returned.
|
|
|
|
*
|
|
|
|
* USAGE:
|
|
|
|
*
|
|
|
|
* The object 'o' is what the caller already obtained by looking up 'key'
|
|
|
|
* in 'db', the usage pattern looks like this:
|
|
|
|
*
|
|
|
|
* o = lookupKeyWrite(db,key);
|
2015-07-26 09:28:00 -04:00
|
|
|
* if (checkType(c,o,OBJ_STRING)) return;
|
2014-03-30 12:32:17 -04:00
|
|
|
* o = dbUnshareStringValue(db,key,o);
|
|
|
|
*
|
|
|
|
* At this point the caller is ready to modify the object, for example
|
|
|
|
* using an sdscat() call to append some data, or anything else.
|
|
|
|
*/
|
|
|
|
robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssert(o->type == OBJ_STRING);
|
2015-07-26 09:28:00 -04:00
|
|
|
if (o->refcount != 1 || o->encoding != OBJ_ENCODING_RAW) {
|
2014-03-30 12:32:17 -04:00
|
|
|
robj *decoded = getDecodedObject(o);
|
|
|
|
o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
|
|
|
|
decrRefCount(decoded);
|
|
|
|
dbOverwrite(db,key,o);
|
|
|
|
}
|
|
|
|
return o;
|
|
|
|
}
|
|
|
|
|
2015-09-28 04:47:45 -04:00
|
|
|
/* Remove all keys from all the databases in a Redis server.
|
|
|
|
* If callback is given the function is called from time to time to
|
|
|
|
* signal that work is in progress.
|
|
|
|
*
|
2018-07-01 01:24:50 -04:00
|
|
|
* The dbnum can be -1 if all the DBs should be flushed, or the specified
|
2015-09-28 04:47:45 -04:00
|
|
|
* DB number if we want to flush only a single Redis database number.
|
|
|
|
*
|
|
|
|
* Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or
|
2015-09-28 05:05:39 -04:00
|
|
|
* EMPTYDB_ASYNC if we want the memory to be freed in a different thread
|
2015-09-28 04:47:45 -04:00
|
|
|
* and the function to return ASAP.
|
|
|
|
*
|
|
|
|
* On success the fuction returns the number of keys removed from the
|
|
|
|
* database(s). Otherwise -1 is returned in the specific case the
|
|
|
|
* DB number is out of range, and errno is set to EINVAL. */
|
2019-07-01 08:22:29 -04:00
|
|
|
long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)) {
|
2018-07-25 06:13:34 -04:00
|
|
|
int async = (flags & EMPTYDB_ASYNC);
|
2010-06-21 18:07:48 -04:00
|
|
|
long long removed = 0;
|
|
|
|
|
2015-09-28 04:47:45 -04:00
|
|
|
if (dbnum < -1 || dbnum >= server.dbnum) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-10-23 04:22:46 -04:00
|
|
|
/* Fire the flushdb modules event. */
|
|
|
|
RedisModuleFlushInfoV1 fi = {REDISMODULE_FLUSHINFO_VERSION,!async,dbnum};
|
|
|
|
moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB,
|
|
|
|
REDISMODULE_SUBEVENT_FLUSHDB_START,
|
|
|
|
&fi);
|
|
|
|
|
2019-07-30 05:20:51 -04:00
|
|
|
/* Make sure the WATCHed keys are affected by the FLUSH* commands.
|
|
|
|
* Note that we need to call the function while the keys are still
|
|
|
|
* there. */
|
|
|
|
signalFlushedDb(dbnum);
|
|
|
|
|
2018-07-25 10:32:52 -04:00
|
|
|
int startdb, enddb;
|
|
|
|
if (dbnum == -1) {
|
|
|
|
startdb = 0;
|
|
|
|
enddb = server.dbnum-1;
|
|
|
|
} else {
|
|
|
|
startdb = enddb = dbnum;
|
|
|
|
}
|
2018-07-25 06:13:34 -04:00
|
|
|
|
2018-07-25 10:32:52 -04:00
|
|
|
for (int j = startdb; j <= enddb; j++) {
|
2019-07-01 08:22:29 -04:00
|
|
|
removed += dictSize(dbarray[j].dict);
|
2015-09-28 04:47:45 -04:00
|
|
|
if (async) {
|
2019-07-01 08:22:29 -04:00
|
|
|
emptyDbAsync(&dbarray[j]);
|
2015-09-28 04:47:45 -04:00
|
|
|
} else {
|
2019-07-01 08:22:29 -04:00
|
|
|
dictEmpty(dbarray[j].dict,callback);
|
|
|
|
dictEmpty(dbarray[j].expires,callback);
|
2015-09-28 04:47:45 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (server.cluster_enabled) {
|
|
|
|
if (async) {
|
|
|
|
slotToKeyFlushAsync();
|
|
|
|
} else {
|
|
|
|
slotToKeyFlush();
|
|
|
|
}
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
if (dbnum == -1) flushSlaveKeysWithExpireList();
|
2019-10-23 04:22:46 -04:00
|
|
|
|
|
|
|
/* Also fire the end event. Note that this event will fire almost
|
|
|
|
* immediately after the start event if the flush is asynchronous. */
|
|
|
|
moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB,
|
|
|
|
REDISMODULE_SUBEVENT_FLUSHDB_END,
|
|
|
|
&fi);
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
return removed;
|
|
|
|
}
|
|
|
|
|
2019-07-01 08:22:29 -04:00
|
|
|
long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
|
|
|
|
return emptyDbGeneric(server.db, dbnum, flags, callback);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
int selectDb(client *c, int id) {
|
2010-06-21 18:07:48 -04:00
|
|
|
if (id < 0 || id >= server.dbnum)
|
2015-07-26 17:17:55 -04:00
|
|
|
return C_ERR;
|
2010-06-21 18:07:48 -04:00
|
|
|
c->db = &server.db[id];
|
2015-07-26 17:17:55 -04:00
|
|
|
return C_OK;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2019-07-01 08:22:29 -04:00
|
|
|
long long dbTotalServerKeyCount() {
|
|
|
|
long long total = 0;
|
|
|
|
int j;
|
|
|
|
for (j = 0; j < server.dbnum; j++) {
|
|
|
|
total += dictSize(server.db[j].dict);
|
|
|
|
}
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
|
2010-12-29 13:39:42 -05:00
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Hooks for key space changes.
|
|
|
|
*
|
|
|
|
* Every time a key in the database is modified the function
|
|
|
|
* signalModifiedKey() is called.
|
|
|
|
*
|
|
|
|
* Every time a DB is flushed the function signalFlushDb() is called.
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
void signalModifiedKey(redisDb *db, robj *key) {
|
|
|
|
touchWatchedKey(db,key);
|
2019-07-22 06:29:54 -04:00
|
|
|
trackingInvalidateKey(key);
|
2010-12-29 13:39:42 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
void signalFlushedDb(int dbid) {
|
|
|
|
touchWatchedKeysOnFlush(dbid);
|
2019-07-22 06:29:54 -04:00
|
|
|
trackingInvalidateKeysOnFlush(dbid);
|
2010-12-29 13:39:42 -05:00
|
|
|
}
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Type agnostic commands operating on the key space
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
2015-09-28 05:05:39 -04:00
|
|
|
/* Return the set of flags to use for the emptyDb() call for FLUSHALL
|
|
|
|
* and FLUSHDB commands.
|
|
|
|
*
|
|
|
|
* Currently the command just attempts to parse the "ASYNC" option. It
|
|
|
|
* also checks if the command arity is wrong.
|
|
|
|
*
|
|
|
|
* On success C_OK is returned and the flags are stored in *flags, otherwise
|
|
|
|
* C_ERR is returned and the function sends an error to the client. */
|
|
|
|
int getFlushCommandFlags(client *c, int *flags) {
|
|
|
|
/* Parse the optional ASYNC option. */
|
|
|
|
if (c->argc > 1) {
|
|
|
|
if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) {
|
|
|
|
addReply(c,shared.syntaxerr);
|
|
|
|
return C_ERR;
|
|
|
|
}
|
|
|
|
*flags = EMPTYDB_ASYNC;
|
|
|
|
} else {
|
|
|
|
*flags = EMPTYDB_NO_FLAGS;
|
|
|
|
}
|
|
|
|
return C_OK;
|
|
|
|
}
|
|
|
|
|
2019-11-03 10:35:35 -05:00
|
|
|
/* Flushes the whole server data set. */
|
|
|
|
void flushAllDataAndResetRDB(int flags) {
|
|
|
|
server.dirty += emptyDb(-1,flags,NULL);
|
|
|
|
if (server.rdb_child_pid != -1) killRDBChild();
|
|
|
|
if (server.saveparamslen > 0) {
|
|
|
|
/* Normally rdbSave() will reset dirty, but we don't want this here
|
|
|
|
* as otherwise FLUSHALL will not be replicated nor put into the AOF. */
|
|
|
|
int saved_dirty = server.dirty;
|
|
|
|
rdbSaveInfo rsi, *rsiptr;
|
|
|
|
rsiptr = rdbPopulateSaveInfo(&rsi);
|
|
|
|
rdbSave(server.rdb_filename,rsiptr);
|
|
|
|
server.dirty = saved_dirty;
|
|
|
|
}
|
|
|
|
server.dirty++;
|
|
|
|
#if defined(USE_JEMALLOC)
|
|
|
|
/* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
|
|
|
|
* for large databases, flushdb blocks for long anyway, so a bit more won't
|
|
|
|
* harm and this way the flush and purge will be synchroneus. */
|
|
|
|
if (!(flags & EMPTYDB_ASYNC))
|
|
|
|
jemalloc_purge();
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-09-28 05:05:39 -04:00
|
|
|
/* FLUSHDB [ASYNC]
|
|
|
|
*
|
|
|
|
* Flushes the currently SELECTed Redis DB. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void flushdbCommand(client *c) {
|
2015-09-28 05:05:39 -04:00
|
|
|
int flags;
|
|
|
|
|
|
|
|
if (getFlushCommandFlags(c,&flags) == C_ERR) return;
|
|
|
|
server.dirty += emptyDb(c->db->id,flags,NULL);
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.ok);
|
2019-05-30 05:51:32 -04:00
|
|
|
#if defined(USE_JEMALLOC)
|
|
|
|
/* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
|
|
|
|
* for large databases, flushdb blocks for long anyway, so a bit more won't
|
|
|
|
* harm and this way the flush and purge will be synchroneus. */
|
|
|
|
if (!(flags & EMPTYDB_ASYNC))
|
|
|
|
jemalloc_purge();
|
|
|
|
#endif
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-09-28 05:05:39 -04:00
|
|
|
/* FLUSHALL [ASYNC]
|
|
|
|
*
|
|
|
|
* Flushes the whole server data set. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void flushallCommand(client *c) {
|
2015-09-28 05:05:39 -04:00
|
|
|
int flags;
|
|
|
|
if (getFlushCommandFlags(c,&flags) == C_ERR) return;
|
2019-11-03 10:35:35 -05:00
|
|
|
flushAllDataAndResetRDB(flags);
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|
|
|
|
|
2015-07-30 05:46:31 -04:00
|
|
|
/* This command implements DEL and LAZYDEL. */
|
|
|
|
void delGenericCommand(client *c, int lazy) {
|
|
|
|
int numdel = 0, j;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
for (j = 1; j < c->argc; j++) {
|
2014-04-10 17:08:02 -04:00
|
|
|
expireIfNeeded(c->db,c->argv[j]);
|
2015-07-30 05:46:31 -04:00
|
|
|
int deleted = lazy ? dbAsyncDelete(c->db,c->argv[j]) :
|
|
|
|
dbSyncDelete(c->db,c->argv[j]);
|
|
|
|
if (deleted) {
|
2010-12-29 13:39:42 -05:00
|
|
|
signalModifiedKey(c->db,c->argv[j]);
|
2015-07-27 03:41:48 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_GENERIC,
|
2013-01-25 07:19:08 -05:00
|
|
|
"del",c->argv[j],c->db->id);
|
2010-06-21 18:07:48 -04:00
|
|
|
server.dirty++;
|
2015-07-30 05:46:31 -04:00
|
|
|
numdel++;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
}
|
2015-07-30 05:46:31 -04:00
|
|
|
addReplyLongLong(c,numdel);
|
|
|
|
}
|
|
|
|
|
|
|
|
void delCommand(client *c) {
|
|
|
|
delGenericCommand(c,0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void unlinkCommand(client *c) {
|
|
|
|
delGenericCommand(c,1);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-13 12:06:24 -04:00
|
|
|
/* EXISTS key1 key2 ... key_N.
|
|
|
|
* Return value is the number of keys existing. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void existsCommand(client *c) {
|
2015-07-13 12:06:24 -04:00
|
|
|
long long count = 0;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
for (j = 1; j < c->argc; j++) {
|
2018-06-13 13:30:07 -04:00
|
|
|
if (lookupKeyRead(c->db,c->argv[j])) count++;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
2015-07-13 12:06:24 -04:00
|
|
|
addReplyLongLong(c,count);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void selectCommand(client *c) {
|
2012-09-11 04:32:04 -04:00
|
|
|
long id;
|
|
|
|
|
|
|
|
if (getLongFromObjectOrReply(c, c->argv[1], &id,
|
2015-07-26 17:17:55 -04:00
|
|
|
"invalid DB index") != C_OK)
|
2012-09-11 04:32:04 -04:00
|
|
|
return;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2011-05-05 12:10:02 -04:00
|
|
|
if (server.cluster_enabled && id != 0) {
|
2011-03-29 11:51:15 -04:00
|
|
|
addReplyError(c,"SELECT is not allowed in cluster mode");
|
|
|
|
return;
|
|
|
|
}
|
2015-07-26 17:17:55 -04:00
|
|
|
if (selectDb(c,id) == C_ERR) {
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
addReplyError(c,"DB index is out of range");
|
2010-06-21 18:07:48 -04:00
|
|
|
} else {
|
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void randomkeyCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *key;
|
|
|
|
|
|
|
|
if ((key = dbRandomKey(c->db)) == NULL) {
|
2018-11-30 03:41:54 -05:00
|
|
|
addReplyNull(c);
|
2010-06-21 18:07:48 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
addReplyBulk(c,key);
|
|
|
|
decrRefCount(key);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void keysCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
dictIterator *di;
|
|
|
|
dictEntry *de;
|
|
|
|
sds pattern = c->argv[1]->ptr;
|
2010-08-30 05:51:45 -04:00
|
|
|
int plen = sdslen(pattern), allkeys;
|
2010-06-21 18:07:48 -04:00
|
|
|
unsigned long numkeys = 0;
|
2018-11-09 06:59:00 -05:00
|
|
|
void *replylen = addReplyDeferredLen(c);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2012-04-30 13:16:20 -04:00
|
|
|
di = dictGetSafeIterator(c->db->dict);
|
2010-08-30 05:51:45 -04:00
|
|
|
allkeys = (pattern[0] == '*' && pattern[1] == '\0');
|
2010-06-21 18:07:48 -04:00
|
|
|
while((de = dictNext(di)) != NULL) {
|
2011-11-08 11:07:55 -05:00
|
|
|
sds key = dictGetKey(de);
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *keyobj;
|
|
|
|
|
2010-08-30 05:51:45 -04:00
|
|
|
if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
|
2010-06-21 18:07:48 -04:00
|
|
|
keyobj = createStringObject(key,sdslen(key));
|
2018-10-19 06:00:57 -04:00
|
|
|
if (!keyIsExpired(c->db,keyobj)) {
|
2010-06-21 18:07:48 -04:00
|
|
|
addReplyBulk(c,keyobj);
|
|
|
|
numkeys++;
|
|
|
|
}
|
|
|
|
decrRefCount(keyobj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dictReleaseIterator(di);
|
2018-11-09 06:59:00 -05:00
|
|
|
setDeferredArrayLen(c,replylen,numkeys);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* This callback is used by scanGenericCommand in order to collect elements
|
|
|
|
* returned by the dictionary iterator into a list. */
|
2012-07-09 04:00:26 -04:00
|
|
|
void scanCallback(void *privdata, const dictEntry *de) {
|
2013-10-28 06:11:34 -04:00
|
|
|
void **pd = (void**) privdata;
|
|
|
|
list *keys = pd[0];
|
|
|
|
robj *o = pd[1];
|
|
|
|
robj *key, *val = NULL;
|
|
|
|
|
|
|
|
if (o == NULL) {
|
|
|
|
sds sdskey = dictGetKey(de);
|
|
|
|
key = createStringObject(sdskey, sdslen(sdskey));
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_SET) {
|
2015-07-31 12:01:23 -04:00
|
|
|
sds keysds = dictGetKey(de);
|
|
|
|
key = createStringObject(keysds,sdslen(keysds));
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_HASH) {
|
2015-09-22 03:42:46 -04:00
|
|
|
sds sdskey = dictGetKey(de);
|
|
|
|
sds sdsval = dictGetVal(de);
|
2015-09-23 03:33:23 -04:00
|
|
|
key = createStringObject(sdskey,sdslen(sdskey));
|
|
|
|
val = createStringObject(sdsval,sdslen(sdsval));
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_ZSET) {
|
2015-09-23 03:33:23 -04:00
|
|
|
sds sdskey = dictGetKey(de);
|
|
|
|
key = createStringObject(sdskey,sdslen(sdskey));
|
2014-12-02 12:19:30 -05:00
|
|
|
val = createStringObjectFromLongDouble(*(double*)dictGetVal(de),0);
|
2013-10-28 06:11:34 -04:00
|
|
|
} else {
|
2015-07-27 03:41:48 -04:00
|
|
|
serverPanic("Type not handled in SCAN callback.");
|
2013-10-28 06:11:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
listAddNodeTail(keys, key);
|
|
|
|
if (val) listAddNodeTail(keys, val);
|
2012-07-09 04:00:26 -04:00
|
|
|
}
|
|
|
|
|
2013-11-05 11:23:11 -05:00
|
|
|
/* Try to parse a SCAN cursor stored at object 'o':
|
2013-11-05 09:47:50 -05:00
|
|
|
* if the cursor is valid, store it as unsigned integer into *cursor and
|
2015-07-26 17:17:55 -04:00
|
|
|
* returns C_OK. Otherwise return C_ERR and send an error to the
|
2013-11-05 09:47:50 -05:00
|
|
|
* client. */
|
2015-07-26 09:20:46 -04:00
|
|
|
int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) {
|
2013-11-05 09:47:50 -05:00
|
|
|
char *eptr;
|
|
|
|
|
|
|
|
/* Use strtoul() because we need an *unsigned* long, so
|
|
|
|
* getLongLongFromObject() does not cover the whole cursor space. */
|
|
|
|
errno = 0;
|
|
|
|
*cursor = strtoul(o->ptr, &eptr, 10);
|
|
|
|
if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || errno == ERANGE)
|
|
|
|
{
|
|
|
|
addReplyError(c, "invalid cursor");
|
2015-07-26 17:17:55 -04:00
|
|
|
return C_ERR;
|
2013-11-05 09:47:50 -05:00
|
|
|
}
|
2015-07-26 17:17:55 -04:00
|
|
|
return C_OK;
|
2013-11-05 09:47:50 -05:00
|
|
|
}
|
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* This command implements SCAN, HSCAN and SSCAN commands.
|
2019-05-22 11:39:04 -04:00
|
|
|
* If object 'o' is passed, then it must be a Hash, Set or Zset object, otherwise
|
2013-10-28 06:11:34 -04:00
|
|
|
* if 'o' is NULL the command will operate on the dictionary associated with
|
|
|
|
* the current database.
|
|
|
|
*
|
|
|
|
* When 'o' is not NULL the function assumes that the first argument in
|
|
|
|
* the client arguments vector is a key so it skips it before iterating
|
|
|
|
* in order to parse options.
|
|
|
|
*
|
2013-12-05 10:35:32 -05:00
|
|
|
* In the case of a Hash object the function returns both the field and value
|
2013-10-28 06:11:34 -04:00
|
|
|
* of every element on the Hash. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
|
2012-07-09 04:00:26 -04:00
|
|
|
int i, j;
|
|
|
|
list *keys = listCreate();
|
2013-10-25 05:54:37 -04:00
|
|
|
listNode *node, *nextnode;
|
2013-10-28 06:11:34 -04:00
|
|
|
long count = 10;
|
2014-12-11 22:02:39 -05:00
|
|
|
sds pat = NULL;
|
2019-05-22 11:39:04 -04:00
|
|
|
sds typename = NULL;
|
2014-12-11 22:02:39 -05:00
|
|
|
int patlen = 0, use_pattern = 0;
|
2013-10-28 06:11:34 -04:00
|
|
|
dict *ht;
|
|
|
|
|
|
|
|
/* Object must be NULL (to iterate keys names), or the type of the object
|
|
|
|
* must be Set, Sorted Set, or Hash. */
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssert(o == NULL || o->type == OBJ_SET || o->type == OBJ_HASH ||
|
2015-07-26 09:28:00 -04:00
|
|
|
o->type == OBJ_ZSET);
|
2013-10-28 06:11:34 -04:00
|
|
|
|
|
|
|
/* Set i to the first option argument. The previous one is the cursor. */
|
|
|
|
i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */
|
2012-07-09 04:00:26 -04:00
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* Step 1: Parse options. */
|
2012-07-09 04:00:26 -04:00
|
|
|
while (i < c->argc) {
|
|
|
|
j = c->argc - i;
|
|
|
|
if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) {
|
2013-10-25 06:01:49 -04:00
|
|
|
if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL)
|
2015-07-26 17:17:55 -04:00
|
|
|
!= C_OK)
|
2013-10-25 06:01:49 -04:00
|
|
|
{
|
2012-07-09 04:00:26 -04:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count < 1) {
|
|
|
|
addReply(c,shared.syntaxerr);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
i += 2;
|
2013-10-25 05:45:32 -04:00
|
|
|
} else if (!strcasecmp(c->argv[i]->ptr, "match") && j >= 2) {
|
2012-07-09 04:00:26 -04:00
|
|
|
pat = c->argv[i+1]->ptr;
|
|
|
|
patlen = sdslen(pat);
|
|
|
|
|
2013-10-31 05:35:56 -04:00
|
|
|
/* The pattern always matches if it is exactly "*", so it is
|
|
|
|
* equivalent to disabling it. */
|
|
|
|
use_pattern = !(pat[0] == '*' && patlen == 1);
|
2012-07-09 04:00:26 -04:00
|
|
|
|
|
|
|
i += 2;
|
2019-05-22 11:39:04 -04:00
|
|
|
} else if (!strcasecmp(c->argv[i]->ptr, "type") && o == NULL && j >= 2) {
|
|
|
|
/* SCAN for a particular type only applies to the db dict */
|
|
|
|
typename = c->argv[i+1]->ptr;
|
|
|
|
i+= 2;
|
2012-07-09 04:00:26 -04:00
|
|
|
} else {
|
|
|
|
addReply(c,shared.syntaxerr);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* Step 2: Iterate the collection.
|
|
|
|
*
|
|
|
|
* Note that if the object is encoded with a ziplist, intset, or any other
|
2013-12-05 10:35:32 -05:00
|
|
|
* representation that is not a hash table, we are sure that it is also
|
2013-10-28 06:11:34 -04:00
|
|
|
* composed of a small number of elements. So to avoid taking state we
|
|
|
|
* just return everything inside the object in a single call, setting the
|
|
|
|
* cursor to zero to signal the end of the iteration. */
|
|
|
|
|
2013-12-05 10:35:32 -05:00
|
|
|
/* Handle the case of a hash table. */
|
2013-10-28 06:11:34 -04:00
|
|
|
ht = NULL;
|
|
|
|
if (o == NULL) {
|
|
|
|
ht = c->db->dict;
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
|
2013-10-28 06:11:34 -04:00
|
|
|
ht = o->ptr;
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
|
2013-10-28 06:11:34 -04:00
|
|
|
ht = o->ptr;
|
|
|
|
count *= 2; /* We return key / value for this type. */
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
|
2013-10-28 06:11:34 -04:00
|
|
|
zset *zs = o->ptr;
|
|
|
|
ht = zs->dict;
|
|
|
|
count *= 2; /* We return key / value for this type. */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ht) {
|
|
|
|
void *privdata[2];
|
2014-09-09 05:53:30 -04:00
|
|
|
/* We set the max number of iterations to ten times the specified
|
|
|
|
* COUNT, so if the hash table is in a pathological state (very
|
|
|
|
* sparsely populated) we avoid to block too much time at the cost
|
|
|
|
* of returning no or very few elements. */
|
|
|
|
long maxiterations = count*10;
|
2013-10-28 06:11:34 -04:00
|
|
|
|
|
|
|
/* We pass two pointers to the callback: the list to which it will
|
|
|
|
* add new elements, and the object containing the dictionary so that
|
|
|
|
* it is possible to fetch more data in a type-dependent way. */
|
|
|
|
privdata[0] = keys;
|
|
|
|
privdata[1] = o;
|
|
|
|
do {
|
2016-12-29 20:37:52 -05:00
|
|
|
cursor = dictScan(ht, cursor, scanCallback, NULL, privdata);
|
2014-09-09 05:53:30 -04:00
|
|
|
} while (cursor &&
|
|
|
|
maxiterations-- &&
|
|
|
|
listLength(keys) < (unsigned long)count);
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_SET) {
|
2013-10-28 06:11:34 -04:00
|
|
|
int pos = 0;
|
2013-11-05 05:57:30 -05:00
|
|
|
int64_t ll;
|
2013-10-28 06:11:34 -04:00
|
|
|
|
|
|
|
while(intsetGet(o->ptr,pos++,&ll))
|
|
|
|
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
|
2013-11-05 09:32:21 -05:00
|
|
|
cursor = 0;
|
2015-07-26 09:28:00 -04:00
|
|
|
} else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
|
2013-10-28 06:11:34 -04:00
|
|
|
unsigned char *p = ziplistIndex(o->ptr,0);
|
|
|
|
unsigned char *vstr;
|
|
|
|
unsigned int vlen;
|
|
|
|
long long vll;
|
|
|
|
|
|
|
|
while(p) {
|
|
|
|
ziplistGet(p,&vstr,&vlen,&vll);
|
|
|
|
listAddNodeTail(keys,
|
|
|
|
(vstr != NULL) ? createStringObject((char*)vstr,vlen) :
|
|
|
|
createStringObjectFromLongLong(vll));
|
2013-10-28 06:32:34 -04:00
|
|
|
p = ziplistNext(o->ptr,p);
|
2013-10-28 06:11:34 -04:00
|
|
|
}
|
2013-11-05 09:32:21 -05:00
|
|
|
cursor = 0;
|
2013-10-28 06:11:34 -04:00
|
|
|
} else {
|
2015-07-27 03:41:48 -04:00
|
|
|
serverPanic("Not handled encoding in SCAN.");
|
2013-10-28 06:11:34 -04:00
|
|
|
}
|
2012-07-09 04:00:26 -04:00
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* Step 3: Filter elements. */
|
2013-10-25 05:54:37 -04:00
|
|
|
node = listFirst(keys);
|
|
|
|
while (node) {
|
|
|
|
robj *kobj = listNodeValue(node);
|
|
|
|
nextnode = listNextNode(node);
|
2013-10-31 05:32:33 -04:00
|
|
|
int filter = 0;
|
|
|
|
|
|
|
|
/* Filter element if it does not match the pattern. */
|
2013-10-31 05:35:56 -04:00
|
|
|
if (!filter && use_pattern) {
|
2013-10-31 05:32:33 -04:00
|
|
|
if (sdsEncodedObject(kobj)) {
|
|
|
|
if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0))
|
|
|
|
filter = 1;
|
|
|
|
} else {
|
2015-07-27 03:41:48 -04:00
|
|
|
char buf[LONG_STR_SIZE];
|
2013-10-31 05:32:33 -04:00
|
|
|
int len;
|
|
|
|
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssert(kobj->encoding == OBJ_ENCODING_INT);
|
2013-10-31 05:32:33 -04:00
|
|
|
len = ll2string(buf,sizeof(buf),(long)kobj->ptr);
|
|
|
|
if (!stringmatchlen(pat, patlen, buf, len, 0)) filter = 1;
|
|
|
|
}
|
|
|
|
}
|
2012-07-09 04:00:26 -04:00
|
|
|
|
2019-05-22 11:39:04 -04:00
|
|
|
/* Filter an element if it isn't the type we want. */
|
|
|
|
if (!filter && o == NULL && typename){
|
2019-06-10 12:41:44 -04:00
|
|
|
robj* typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH);
|
2019-07-08 06:04:37 -04:00
|
|
|
char* type = getObjectTypeName(typecheck);
|
2019-05-22 11:39:04 -04:00
|
|
|
if (strcasecmp((char*) typename, type)) filter = 1;
|
|
|
|
}
|
|
|
|
|
2013-10-31 05:32:33 -04:00
|
|
|
/* Filter element if it is an expired key. */
|
|
|
|
if (!filter && o == NULL && expireIfNeeded(c->db, kobj)) filter = 1;
|
|
|
|
|
|
|
|
/* Remove the element and its associted value if needed. */
|
|
|
|
if (filter) {
|
2012-07-09 04:00:26 -04:00
|
|
|
decrRefCount(kobj);
|
2013-10-25 05:54:37 -04:00
|
|
|
listDelNode(keys, node);
|
2013-11-05 06:16:29 -05:00
|
|
|
}
|
|
|
|
|
2013-12-05 10:35:32 -05:00
|
|
|
/* If this is a hash or a sorted set, we have a flat list of
|
2013-11-05 06:16:29 -05:00
|
|
|
* key-value elements, so if this element was filtered, remove the
|
|
|
|
* value, or skip it if it was not filtered: we only match keys. */
|
2015-07-26 09:28:00 -04:00
|
|
|
if (o && (o->type == OBJ_ZSET || o->type == OBJ_HASH)) {
|
2013-11-05 06:16:29 -05:00
|
|
|
node = nextnode;
|
|
|
|
nextnode = listNextNode(node);
|
|
|
|
if (filter) {
|
2013-10-28 06:11:34 -04:00
|
|
|
kobj = listNodeValue(node);
|
|
|
|
decrRefCount(kobj);
|
|
|
|
listDelNode(keys, node);
|
|
|
|
}
|
2012-07-09 04:00:26 -04:00
|
|
|
}
|
2013-10-25 05:54:37 -04:00
|
|
|
node = nextnode;
|
2012-07-09 04:00:26 -04:00
|
|
|
}
|
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* Step 4: Reply to the client. */
|
2018-11-09 06:59:00 -05:00
|
|
|
addReplyArrayLen(c, 2);
|
2014-08-13 05:44:38 -04:00
|
|
|
addReplyBulkLongLong(c,cursor);
|
2012-07-09 04:00:26 -04:00
|
|
|
|
2018-11-09 06:59:00 -05:00
|
|
|
addReplyArrayLen(c, listLength(keys));
|
2013-10-25 05:54:37 -04:00
|
|
|
while ((node = listFirst(keys)) != NULL) {
|
|
|
|
robj *kobj = listNodeValue(node);
|
2012-07-09 04:00:26 -04:00
|
|
|
addReplyBulk(c, kobj);
|
|
|
|
decrRefCount(kobj);
|
2013-10-25 05:54:37 -04:00
|
|
|
listDelNode(keys, node);
|
2012-07-09 04:00:26 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
2013-10-25 05:58:03 -04:00
|
|
|
listSetFreeMethod(keys,decrRefCountVoid);
|
2012-07-09 04:00:26 -04:00
|
|
|
listRelease(keys);
|
|
|
|
}
|
|
|
|
|
2013-10-28 06:11:34 -04:00
|
|
|
/* The SCAN command completely relies on scanGenericCommand. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void scanCommand(client *c) {
|
2013-11-05 09:47:50 -05:00
|
|
|
unsigned long cursor;
|
2015-07-26 17:17:55 -04:00
|
|
|
if (parseScanCursorOrReply(c,c->argv[1],&cursor) == C_ERR) return;
|
2013-11-05 09:47:50 -05:00
|
|
|
scanGenericCommand(c,NULL,cursor);
|
2013-10-28 06:11:34 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void dbsizeCommand(client *c) {
|
2010-09-02 08:30:56 -04:00
|
|
|
addReplyLongLong(c,dictSize(c->db->dict));
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void lastsaveCommand(client *c) {
|
2010-09-02 08:30:56 -04:00
|
|
|
addReplyLongLong(c,server.lastsave);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2019-07-08 06:04:37 -04:00
|
|
|
char* getObjectTypeName(robj *o) {
|
2019-06-10 12:41:44 -04:00
|
|
|
char* type;
|
2010-06-21 18:07:48 -04:00
|
|
|
if (o == NULL) {
|
2010-09-02 13:52:24 -04:00
|
|
|
type = "none";
|
2010-06-21 18:07:48 -04:00
|
|
|
} else {
|
|
|
|
switch(o->type) {
|
2015-07-26 09:28:00 -04:00
|
|
|
case OBJ_STRING: type = "string"; break;
|
|
|
|
case OBJ_LIST: type = "list"; break;
|
|
|
|
case OBJ_SET: type = "set"; break;
|
|
|
|
case OBJ_ZSET: type = "zset"; break;
|
|
|
|
case OBJ_HASH: type = "hash"; break;
|
2017-11-08 16:57:10 -05:00
|
|
|
case OBJ_STREAM: type = "stream"; break;
|
2016-05-18 05:45:40 -04:00
|
|
|
case OBJ_MODULE: {
|
|
|
|
moduleValue *mv = o->ptr;
|
|
|
|
type = mv->type->name;
|
|
|
|
}; break;
|
2010-09-02 13:52:24 -04:00
|
|
|
default: type = "unknown"; break;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
}
|
2019-06-10 12:41:44 -04:00
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
|
|
|
void typeCommand(client *c) {
|
|
|
|
robj *o;
|
|
|
|
o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH);
|
2019-07-08 06:04:37 -04:00
|
|
|
addReplyStatus(c, getObjectTypeName(o));
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void shutdownCommand(client *c) {
|
2011-11-18 08:10:48 -05:00
|
|
|
int flags = 0;
|
|
|
|
|
|
|
|
if (c->argc > 2) {
|
|
|
|
addReply(c,shared.syntaxerr);
|
|
|
|
return;
|
|
|
|
} else if (c->argc == 2) {
|
|
|
|
if (!strcasecmp(c->argv[1]->ptr,"nosave")) {
|
2015-07-27 03:41:48 -04:00
|
|
|
flags |= SHUTDOWN_NOSAVE;
|
2011-11-18 08:10:48 -05:00
|
|
|
} else if (!strcasecmp(c->argv[1]->ptr,"save")) {
|
2015-07-27 03:41:48 -04:00
|
|
|
flags |= SHUTDOWN_SAVE;
|
2011-11-18 08:10:48 -05:00
|
|
|
} else {
|
|
|
|
addReply(c,shared.syntaxerr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2014-02-07 05:22:24 -05:00
|
|
|
/* When SHUTDOWN is called while the server is loading a dataset in
|
|
|
|
* memory we need to make sure no attempt is performed to save
|
2013-06-27 06:14:23 -04:00
|
|
|
* the dataset on shutdown (otherwise it could overwrite the current DB
|
2014-02-07 05:22:24 -05:00
|
|
|
* with half-read data).
|
|
|
|
*
|
|
|
|
* Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
|
|
|
|
if (server.loading || server.sentinel_mode)
|
2015-07-27 03:41:48 -04:00
|
|
|
flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
|
2015-07-26 17:17:55 -04:00
|
|
|
if (prepareForShutdown(flags) == C_OK) exit(0);
|
2010-09-02 13:52:24 -04:00
|
|
|
addReplyError(c,"Errors trying to SHUTDOWN. Check logs.");
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void renameGenericCommand(client *c, int nx) {
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *o;
|
2011-11-09 10:51:19 -05:00
|
|
|
long long expire;
|
2015-02-23 05:24:24 -05:00
|
|
|
int samekey = 0;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2015-02-23 05:24:24 -05:00
|
|
|
/* When source and dest key is the same, no operation is performed,
|
|
|
|
* if the key exists, however we still return an error on unexisting key. */
|
|
|
|
if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) samekey = 1;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
|
|
|
|
return;
|
|
|
|
|
2015-02-23 05:24:24 -05:00
|
|
|
if (samekey) {
|
|
|
|
addReply(c,nx ? shared.czero : shared.ok);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
incrRefCount(o);
|
2011-10-10 09:21:19 -04:00
|
|
|
expire = getExpire(c->db,c->argv[1]);
|
2011-06-14 09:34:27 -04:00
|
|
|
if (lookupKeyWrite(c->db,c->argv[2]) != NULL) {
|
2010-06-21 18:07:48 -04:00
|
|
|
if (nx) {
|
|
|
|
decrRefCount(o);
|
|
|
|
addReply(c,shared.czero);
|
|
|
|
return;
|
|
|
|
}
|
2013-01-23 10:44:45 -05:00
|
|
|
/* Overwrite: delete the old key before creating the new one
|
|
|
|
* with the same name. */
|
2011-10-10 09:21:19 -04:00
|
|
|
dbDelete(c->db,c->argv[2]);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
2011-10-10 09:21:19 -04:00
|
|
|
dbAdd(c->db,c->argv[2],o);
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
if (expire != -1) setExpire(c,c->db,c->argv[2],expire);
|
2010-06-21 18:07:48 -04:00
|
|
|
dbDelete(c->db,c->argv[1]);
|
2010-12-29 13:39:42 -05:00
|
|
|
signalModifiedKey(c->db,c->argv[1]);
|
|
|
|
signalModifiedKey(c->db,c->argv[2]);
|
2015-07-27 03:41:48 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
|
2013-01-25 07:19:08 -05:00
|
|
|
c->argv[1],c->db->id);
|
2015-07-27 03:41:48 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to",
|
2013-01-25 07:19:08 -05:00
|
|
|
c->argv[2],c->db->id);
|
2010-06-21 18:07:48 -04:00
|
|
|
server.dirty++;
|
|
|
|
addReply(c,nx ? shared.cone : shared.ok);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void renameCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
renameGenericCommand(c,0);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void renamenxCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
renameGenericCommand(c,1);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void moveCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *o;
|
|
|
|
redisDb *src, *dst;
|
|
|
|
int srcid;
|
2015-09-14 06:28:22 -04:00
|
|
|
long long dbid, expire;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2011-03-29 11:51:15 -04:00
|
|
|
if (server.cluster_enabled) {
|
|
|
|
addReplyError(c,"MOVE is not allowed in cluster mode");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Obtain source and target DB pointers */
|
|
|
|
src = c->db;
|
|
|
|
srcid = c->db->id;
|
2014-08-01 14:55:24 -04:00
|
|
|
|
2015-07-26 17:17:55 -04:00
|
|
|
if (getLongLongFromObject(c->argv[2],&dbid) == C_ERR ||
|
2014-08-01 14:55:24 -04:00
|
|
|
dbid < INT_MIN || dbid > INT_MAX ||
|
2015-07-26 17:17:55 -04:00
|
|
|
selectDb(c,dbid) == C_ERR)
|
2014-08-01 14:55:24 -04:00
|
|
|
{
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.outofrangeerr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dst = c->db;
|
|
|
|
selectDb(c,srcid); /* Back to the source DB */
|
|
|
|
|
|
|
|
/* If the user is moving using as target the same
|
|
|
|
* DB as the source DB it is probably an error. */
|
|
|
|
if (src == dst) {
|
|
|
|
addReply(c,shared.sameobjecterr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if the element exists and get a reference */
|
|
|
|
o = lookupKeyWrite(c->db,c->argv[1]);
|
|
|
|
if (!o) {
|
|
|
|
addReply(c,shared.czero);
|
|
|
|
return;
|
|
|
|
}
|
2015-09-14 06:28:22 -04:00
|
|
|
expire = getExpire(c->db,c->argv[1]);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2011-06-14 09:34:27 -04:00
|
|
|
/* Return zero if the key already exists in the target DB */
|
|
|
|
if (lookupKeyWrite(dst,c->argv[1]) != NULL) {
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.czero);
|
|
|
|
return;
|
|
|
|
}
|
2011-06-14 09:34:27 -04:00
|
|
|
dbAdd(dst,c->argv[1],o);
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
if (expire != -1) setExpire(c,dst,c->argv[1],expire);
|
2010-06-21 18:07:48 -04:00
|
|
|
incrRefCount(o);
|
|
|
|
|
|
|
|
/* OK! key moved, free the entry in the source DB */
|
|
|
|
dbDelete(src,c->argv[1]);
|
2019-11-19 00:02:45 -05:00
|
|
|
signalModifiedKey(src,c->argv[1]);
|
|
|
|
signalModifiedKey(dst,c->argv[1]);
|
|
|
|
notifyKeyspaceEvent(NOTIFY_GENERIC,
|
|
|
|
"move_from",c->argv[1],src->id);
|
|
|
|
notifyKeyspaceEvent(NOTIFY_GENERIC,
|
|
|
|
"move_to",c->argv[1],dst->id);
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
server.dirty++;
|
|
|
|
addReply(c,shared.cone);
|
|
|
|
}
|
|
|
|
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
/* Helper function for dbSwapDatabases(): scans the list of keys that have
|
2018-06-12 11:28:40 -04:00
|
|
|
* one or more blocked clients for B[LR]POP or other blocking commands
|
|
|
|
* and signal the keys as ready if they are of the right type. See the comment
|
|
|
|
* where the function is used for more info. */
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
void scanDatabaseForReadyLists(redisDb *db) {
|
|
|
|
dictEntry *de;
|
|
|
|
dictIterator *di = dictGetSafeIterator(db->blocking_keys);
|
|
|
|
while((de = dictNext(di)) != NULL) {
|
|
|
|
robj *key = dictGetKey(de);
|
|
|
|
robj *value = lookupKey(db,key,LOOKUP_NOTOUCH);
|
2018-06-12 11:28:40 -04:00
|
|
|
if (value && (value->type == OBJ_LIST ||
|
|
|
|
value->type == OBJ_STREAM ||
|
|
|
|
value->type == OBJ_ZSET))
|
2017-09-06 09:43:28 -04:00
|
|
|
signalKeyAsReady(db, key);
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
}
|
|
|
|
dictReleaseIterator(di);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Swap two databases at runtime so that all clients will magically see
|
|
|
|
* the new database even if already connected. Note that the client
|
|
|
|
* structure c->db points to a given DB, so we need to be smarter and
|
|
|
|
* swap the underlying referenced structures, otherwise we would need
|
|
|
|
* to fix all the references to the Redis DB structure.
|
|
|
|
*
|
|
|
|
* Returns C_ERR if at least one of the DB ids are out of range, otherwise
|
|
|
|
* C_OK is returned. */
|
2018-12-31 04:51:03 -05:00
|
|
|
int dbSwapDatabases(long id1, long id2) {
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
if (id1 < 0 || id1 >= server.dbnum ||
|
|
|
|
id2 < 0 || id2 >= server.dbnum) return C_ERR;
|
|
|
|
if (id1 == id2) return C_OK;
|
|
|
|
redisDb aux = server.db[id1];
|
|
|
|
redisDb *db1 = &server.db[id1], *db2 = &server.db[id2];
|
|
|
|
|
|
|
|
/* Swap hash tables. Note that we don't swap blocking_keys,
|
|
|
|
* ready_keys and watched_keys, since we want clients to
|
|
|
|
* remain in the same DB they were. */
|
|
|
|
db1->dict = db2->dict;
|
|
|
|
db1->expires = db2->expires;
|
|
|
|
db1->avg_ttl = db2->avg_ttl;
|
2019-11-14 12:27:37 -05:00
|
|
|
db1->expires_cursor = db2->expires_cursor;
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
|
|
|
|
db2->dict = aux.dict;
|
|
|
|
db2->expires = aux.expires;
|
|
|
|
db2->avg_ttl = aux.avg_ttl;
|
2019-11-14 12:27:37 -05:00
|
|
|
db2->expires_cursor = aux.expires_cursor;
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
|
|
|
|
/* Now we need to handle clients blocked on lists: as an effect
|
|
|
|
* of swapping the two DBs, a client that was waiting for list
|
|
|
|
* X in a given DB, may now actually be unblocked if X happens
|
|
|
|
* to exist in the new version of the DB, after the swap.
|
|
|
|
*
|
|
|
|
* However normally we only do this check for efficiency reasons
|
|
|
|
* in dbAdd() when a list is created. So here we need to rescan
|
|
|
|
* the list of clients blocked on lists and signal lists as ready
|
|
|
|
* if needed. */
|
|
|
|
scanDatabaseForReadyLists(db1);
|
|
|
|
scanDatabaseForReadyLists(db2);
|
|
|
|
return C_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* SWAPDB db1 db2 */
|
|
|
|
void swapdbCommand(client *c) {
|
|
|
|
long id1, id2;
|
|
|
|
|
|
|
|
/* Not allowed in cluster mode: we have just DB 0 there. */
|
|
|
|
if (server.cluster_enabled) {
|
|
|
|
addReplyError(c,"SWAPDB is not allowed in cluster mode");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the two DBs indexes. */
|
|
|
|
if (getLongFromObjectOrReply(c, c->argv[1], &id1,
|
|
|
|
"invalid first DB index") != C_OK)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (getLongFromObjectOrReply(c, c->argv[2], &id2,
|
|
|
|
"invalid second DB index") != C_OK)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Swap... */
|
|
|
|
if (dbSwapDatabases(id1,id2) == C_ERR) {
|
|
|
|
addReplyError(c,"DB index is out of range");
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
server.dirty++;
|
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Expires API
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
int removeExpire(redisDb *db, robj *key) {
|
|
|
|
/* An expire may only be removed if there is a corresponding entry in the
|
|
|
|
* main dict. Otherwise, the key will never be freed. */
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
|
2010-08-03 08:19:20 -04:00
|
|
|
return dictDelete(db->expires,key->ptr) == DICT_OK;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
/* Set an expire to the specified key. If the expire is set in the context
|
|
|
|
* of an user calling a command 'c' is the client, otherwise 'c' is set
|
|
|
|
* to NULL. The 'when' parameter is the absolute unix time in milliseconds
|
|
|
|
* after which the key will no longer be considered valid. */
|
|
|
|
void setExpire(client *c, redisDb *db, robj *key, long long when) {
|
2011-11-09 10:51:19 -05:00
|
|
|
dictEntry *kde, *de;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Reuse the sds from the main dict in the expire dict */
|
2011-11-09 10:51:19 -05:00
|
|
|
kde = dictFind(db->dict,key->ptr);
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssertWithInfo(NULL,key,kde != NULL);
|
2016-09-14 10:43:38 -04:00
|
|
|
de = dictAddOrFind(db->expires,dictGetKey(kde));
|
2011-11-09 10:51:19 -05:00
|
|
|
dictSetSignedIntegerVal(de,when);
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
|
|
|
|
int writable_slave = server.masterhost && server.repl_slave_ro == 0;
|
|
|
|
if (c && writable_slave && !(c->flags & CLIENT_MASTER))
|
|
|
|
rememberSlaveKeyWithExpire(db,key);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Return the expire time of the specified key, or -1 if no expire
|
|
|
|
* is associated with this key (i.e. the key is non volatile) */
|
2011-11-09 10:51:19 -05:00
|
|
|
long long getExpire(redisDb *db, robj *key) {
|
2010-06-21 18:07:48 -04:00
|
|
|
dictEntry *de;
|
|
|
|
|
|
|
|
/* No expire? return ASAP */
|
|
|
|
if (dictSize(db->expires) == 0 ||
|
|
|
|
(de = dictFind(db->expires,key->ptr)) == NULL) return -1;
|
|
|
|
|
|
|
|
/* The entry was found in the expire dict, this means it should also
|
|
|
|
* be present in the main dict (safety check). */
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
|
2011-11-09 10:51:19 -05:00
|
|
|
return dictGetSignedIntegerVal(de);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2010-08-02 12:13:39 -04:00
|
|
|
/* Propagate expires into slaves and the AOF file.
|
|
|
|
* When a key expires in the master, a DEL operation for this key is sent
|
|
|
|
* to all the slaves and the AOF file if enabled.
|
|
|
|
*
|
|
|
|
* This way the key expiry is centralized in one place, and since both
|
|
|
|
* AOF and the master->slave link guarantee operation ordering, everything
|
|
|
|
* will be consistent even if we allow write operations against expiring
|
|
|
|
* keys. */
|
2015-10-02 09:27:57 -04:00
|
|
|
void propagateExpire(redisDb *db, robj *key, int lazy) {
|
2010-08-02 12:13:39 -04:00
|
|
|
robj *argv[2];
|
|
|
|
|
2015-10-02 09:27:57 -04:00
|
|
|
argv[0] = lazy ? shared.unlink : shared.del;
|
2010-08-02 12:13:39 -04:00
|
|
|
argv[1] = key;
|
2012-02-04 02:58:37 -05:00
|
|
|
incrRefCount(argv[0]);
|
|
|
|
incrRefCount(argv[1]);
|
2010-08-02 12:13:39 -04:00
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
if (server.aof_state != AOF_OFF)
|
2010-11-03 06:23:59 -04:00
|
|
|
feedAppendOnlyFile(server.delCommand,db->id,argv,2);
|
2013-01-30 12:33:16 -05:00
|
|
|
replicationFeedSlaves(server.slaves,db->id,argv,2);
|
2010-08-02 12:13:39 -04:00
|
|
|
|
2010-08-02 15:37:39 -04:00
|
|
|
decrRefCount(argv[0]);
|
|
|
|
decrRefCount(argv[1]);
|
2010-08-02 12:13:39 -04:00
|
|
|
}
|
|
|
|
|
2018-10-19 06:00:57 -04:00
|
|
|
/* Check if the key is expired. */
|
|
|
|
int keyIsExpired(redisDb *db, robj *key) {
|
|
|
|
mstime_t when = getExpire(db,key);
|
2019-11-06 03:57:29 -05:00
|
|
|
mstime_t now;
|
2018-10-19 06:00:57 -04:00
|
|
|
|
|
|
|
if (when < 0) return 0; /* No expire for this key */
|
|
|
|
|
|
|
|
/* Don't expire anything while loading. It will be done later. */
|
|
|
|
if (server.loading) return 0;
|
|
|
|
|
|
|
|
/* If we are in the context of a Lua script, we pretend that time is
|
|
|
|
* blocked to when the Lua script started. This way a key can expire
|
|
|
|
* only the first time it is accessed and not in the middle of the
|
|
|
|
* script execution, making propagation to slaves / AOF consistent.
|
2019-11-06 03:57:29 -05:00
|
|
|
* See issue #1525 on Github for more information. */
|
|
|
|
if (server.lua_caller) {
|
|
|
|
now = server.lua_time_start;
|
|
|
|
}
|
|
|
|
/* If we are in the middle of a command execution, we still want to use
|
|
|
|
* a reference time that does not change: in that case we just use the
|
|
|
|
* cached time, that we update before each call in the call() function.
|
|
|
|
* This way we avoid that commands such as RPOPLPUSH or similar, that
|
|
|
|
* may re-open the same key multiple times, can invalidate an already
|
|
|
|
* open object in a next call, if the next call will see the key expired,
|
|
|
|
* while the first did not. */
|
2019-11-19 05:28:04 -05:00
|
|
|
else if (server.fixed_time_expire > 0) {
|
2019-11-06 03:57:29 -05:00
|
|
|
now = server.mstime;
|
|
|
|
}
|
|
|
|
/* For the other cases, we want to use the most fresh time we have. */
|
|
|
|
else {
|
|
|
|
now = mstime();
|
|
|
|
}
|
2018-10-19 06:00:57 -04:00
|
|
|
|
2019-11-06 03:57:29 -05:00
|
|
|
/* The key expired if the current (virtual or real) time is greater
|
|
|
|
* than the expire time of the key. */
|
2018-10-19 06:00:57 -04:00
|
|
|
return now > when;
|
|
|
|
}
|
|
|
|
|
2018-02-27 10:44:39 -05:00
|
|
|
/* This function is called when we are going to perform some operation
|
|
|
|
* in a given key, but such key may be already logically expired even if
|
|
|
|
* it still exists in the database. The main way this function is called
|
|
|
|
* is via lookupKey*() family of functions.
|
|
|
|
*
|
|
|
|
* The behavior of the function depends on the replication role of the
|
|
|
|
* instance, because slave instances do not expire keys, they wait
|
|
|
|
* for DELs from the master for consistency matters. However even
|
|
|
|
* slaves will try to have a coherent return value for the function,
|
|
|
|
* so that read commands executed in the slave side will be able to
|
|
|
|
* behave like if the key is expired even if still present (because the
|
|
|
|
* master has yet to propagate the DEL).
|
|
|
|
*
|
|
|
|
* In masters as a side effect of finding a key which is expired, such
|
|
|
|
* key will be evicted from the database. Also this may trigger the
|
|
|
|
* propagation of a DEL/UNLINK command in AOF / replication stream.
|
|
|
|
*
|
|
|
|
* The return value of the function is 0 if the key is still valid,
|
|
|
|
* otherwise the function returns 1 if the key is expired. */
|
2010-06-21 18:07:48 -04:00
|
|
|
int expireIfNeeded(redisDb *db, robj *key) {
|
2018-10-24 06:26:27 -04:00
|
|
|
if (!keyIsExpired(db,key)) return 0;
|
|
|
|
|
|
|
|
/* If we are running in the context of a slave, instead of
|
|
|
|
* evicting the expired key from the database, we return ASAP:
|
|
|
|
* the slave key expiration is controlled by the master that will
|
|
|
|
* send us synthesized DEL operations for expired keys.
|
|
|
|
*
|
|
|
|
* Still we try to return the right information to the caller,
|
|
|
|
* that is, 0 if we think the key should be still valid, 1 if
|
|
|
|
* we think the key is expired at this time. */
|
|
|
|
if (server.masterhost != NULL) return 1;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Delete the key */
|
|
|
|
server.stat_expiredkeys++;
|
2015-10-02 09:27:57 -04:00
|
|
|
propagateExpire(db,key,server.lazyfree_lazy_expire);
|
2015-07-27 03:41:48 -04:00
|
|
|
notifyKeyspaceEvent(NOTIFY_EXPIRED,
|
2013-01-28 07:00:03 -05:00
|
|
|
"expired",key,db->id);
|
2015-10-02 09:27:57 -04:00
|
|
|
return server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
|
|
|
|
dbSyncDelete(db,key);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2011-03-23 13:09:17 -04:00
|
|
|
/* -----------------------------------------------------------------------------
|
|
|
|
* API to get key arguments from commands
|
|
|
|
* ---------------------------------------------------------------------------*/
|
|
|
|
|
2014-03-10 10:24:38 -04:00
|
|
|
/* The base case is to use the keys position as given in the command table
|
|
|
|
* (firstkey, lastkey, step). */
|
2011-03-23 13:09:17 -04:00
|
|
|
int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, int *numkeys) {
|
|
|
|
int j, i = 0, last, *keys;
|
2015-07-27 03:41:48 -04:00
|
|
|
UNUSED(argv);
|
2011-03-23 13:09:17 -04:00
|
|
|
|
|
|
|
if (cmd->firstkey == 0) {
|
|
|
|
*numkeys = 0;
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-04-19 10:17:08 -04:00
|
|
|
|
2011-03-23 13:09:17 -04:00
|
|
|
last = cmd->lastkey;
|
|
|
|
if (last < 0) last = argc+last;
|
|
|
|
keys = zmalloc(sizeof(int)*((last - cmd->firstkey)+1));
|
|
|
|
for (j = cmd->firstkey; j <= last; j += cmd->keystep) {
|
2017-04-19 10:17:08 -04:00
|
|
|
if (j >= argc) {
|
2018-01-12 05:21:10 -05:00
|
|
|
/* Modules commands, and standard commands with a not fixed number
|
2018-07-01 01:24:50 -04:00
|
|
|
* of arguments (negative arity parameter) do not have dispatch
|
2018-01-12 05:21:10 -05:00
|
|
|
* time arity checks, so we need to handle the case where the user
|
|
|
|
* passed an invalid number of arguments here. In this case we
|
|
|
|
* return no keys and expect the command implementation to report
|
|
|
|
* an arity or syntax error. */
|
|
|
|
if (cmd->flags & CMD_MODULE || cmd->arity < 0) {
|
2017-04-19 10:17:08 -04:00
|
|
|
zfree(keys);
|
|
|
|
*numkeys = 0;
|
|
|
|
return NULL;
|
|
|
|
} else {
|
|
|
|
serverPanic("Redis built-in command declared keys positions not matching the arity requirements.");
|
|
|
|
}
|
|
|
|
}
|
2011-03-28 11:54:42 -04:00
|
|
|
keys[i++] = j;
|
2011-03-23 13:09:17 -04:00
|
|
|
}
|
2011-03-28 11:54:42 -04:00
|
|
|
*numkeys = i;
|
2011-03-23 13:09:17 -04:00
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2014-03-10 10:31:01 -04:00
|
|
|
/* Return all the arguments that are keys in the command passed via argc / argv.
|
|
|
|
*
|
|
|
|
* The command returns the positions of all the key arguments inside the array,
|
|
|
|
* so the actual return value is an heap allocated array of integers. The
|
|
|
|
* length of the array is returned by reference into *numkeys.
|
|
|
|
*
|
|
|
|
* 'cmd' must be point to the corresponding entry into the redisCommand
|
|
|
|
* table, according to the command name in argv[0].
|
2014-03-10 10:24:38 -04:00
|
|
|
*
|
|
|
|
* This function uses the command table if a command-specific helper function
|
|
|
|
* is not required, otherwise it calls the command-specific function. */
|
2014-03-10 10:31:01 -04:00
|
|
|
int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
2016-04-27 12:09:31 -04:00
|
|
|
if (cmd->flags & CMD_MODULE_GETKEYS) {
|
|
|
|
return moduleGetCommandKeysViaAPI(cmd,argv,argc,numkeys);
|
|
|
|
} else if (!(cmd->flags & CMD_MODULE) && cmd->getkeys_proc) {
|
2014-03-10 08:18:41 -04:00
|
|
|
return cmd->getkeys_proc(cmd,argv,argc,numkeys);
|
2011-03-23 13:09:17 -04:00
|
|
|
} else {
|
|
|
|
return getKeysUsingCommandTable(cmd,argv,argc,numkeys);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-10 10:24:38 -04:00
|
|
|
/* Free the result of getKeysFromCommand. */
|
2011-03-23 13:09:17 -04:00
|
|
|
void getKeysFreeResult(int *result) {
|
|
|
|
zfree(result);
|
|
|
|
}
|
|
|
|
|
2014-03-10 10:24:38 -04:00
|
|
|
/* Helper function to extract keys from following commands:
|
|
|
|
* ZUNIONSTORE <destkey> <num-keys> <key> <key> ... <key> <options>
|
|
|
|
* ZINTERSTORE <destkey> <num-keys> <key> <key> ... <key> <options> */
|
|
|
|
int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
2011-03-23 13:09:17 -04:00
|
|
|
int i, num, *keys;
|
2015-07-27 03:41:48 -04:00
|
|
|
UNUSED(cmd);
|
2011-03-23 13:09:17 -04:00
|
|
|
|
|
|
|
num = atoi(argv[2]->ptr);
|
|
|
|
/* Sanity check. Don't return any key if the command is going to
|
|
|
|
* reply with syntax error. */
|
2018-04-20 08:19:03 -04:00
|
|
|
if (num < 1 || num > (argc-3)) {
|
2011-03-23 13:09:17 -04:00
|
|
|
*numkeys = 0;
|
|
|
|
return NULL;
|
|
|
|
}
|
2014-03-07 16:32:04 -05:00
|
|
|
|
|
|
|
/* Keys in z{union,inter}store come from two places:
|
2014-03-10 06:43:56 -04:00
|
|
|
* argv[1] = storage key,
|
|
|
|
* argv[3...n] = keys to intersect */
|
2014-03-07 16:32:04 -05:00
|
|
|
keys = zmalloc(sizeof(int)*(num+1));
|
|
|
|
|
|
|
|
/* Add all key positions for argv[3...n] to keys[] */
|
2011-03-23 13:09:17 -04:00
|
|
|
for (i = 0; i < num; i++) keys[i] = 3+i;
|
2014-03-07 16:32:04 -05:00
|
|
|
|
2014-03-10 06:43:56 -04:00
|
|
|
/* Finally add the argv[1] key position (the storage key target). */
|
2014-03-07 16:32:04 -05:00
|
|
|
keys[num] = 1;
|
|
|
|
*numkeys = num+1; /* Total keys = {union,inter} keys + storage key */
|
2011-03-23 13:09:17 -04:00
|
|
|
return keys;
|
|
|
|
}
|
2011-04-28 13:00:33 -04:00
|
|
|
|
2014-03-10 10:26:10 -04:00
|
|
|
/* Helper function to extract keys from the following commands:
|
|
|
|
* EVAL <script> <num-keys> <key> <key> ... <key> [more stuff]
|
|
|
|
* EVALSHA <script> <num-keys> <key> <key> ... <key> [more stuff] */
|
|
|
|
int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
|
|
|
int i, num, *keys;
|
2015-07-27 03:41:48 -04:00
|
|
|
UNUSED(cmd);
|
2014-03-10 10:26:10 -04:00
|
|
|
|
|
|
|
num = atoi(argv[2]->ptr);
|
|
|
|
/* Sanity check. Don't return any key if the command is going to
|
|
|
|
* reply with syntax error. */
|
2018-04-20 08:19:03 -04:00
|
|
|
if (num <= 0 || num > (argc-3)) {
|
2014-03-10 10:26:10 -04:00
|
|
|
*numkeys = 0;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
keys = zmalloc(sizeof(int)*num);
|
2014-03-10 11:23:42 -04:00
|
|
|
*numkeys = num;
|
2014-03-10 10:26:10 -04:00
|
|
|
|
|
|
|
/* Add all key positions for argv[3...n] to keys[] */
|
|
|
|
for (i = 0; i < num; i++) keys[i] = 3+i;
|
|
|
|
|
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2014-03-10 11:26:08 -04:00
|
|
|
/* Helper function to extract keys from the SORT command.
|
|
|
|
*
|
|
|
|
* SORT <sort-key> ... STORE <store-key> ...
|
|
|
|
*
|
|
|
|
* The first argument of SORT is always a key, however a list of options
|
|
|
|
* follow in SQL-alike style. Here we parse just the minimum in order to
|
|
|
|
* correctly identify keys in the "STORE" option. */
|
|
|
|
int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
2014-07-21 17:31:21 -04:00
|
|
|
int i, j, num, *keys, found_store = 0;
|
2015-07-27 03:41:48 -04:00
|
|
|
UNUSED(cmd);
|
2014-03-10 11:26:08 -04:00
|
|
|
|
|
|
|
num = 0;
|
|
|
|
keys = zmalloc(sizeof(int)*2); /* Alloc 2 places for the worst case. */
|
|
|
|
|
|
|
|
keys[num++] = 1; /* <sort-key> is always present. */
|
|
|
|
|
|
|
|
/* Search for STORE option. By default we consider options to don't
|
|
|
|
* have arguments, so if we find an unknown option name we scan the
|
|
|
|
* next. However there are options with 1 or 2 arguments, so we
|
|
|
|
* provide a list here in order to skip the right number of args. */
|
|
|
|
struct {
|
|
|
|
char *name;
|
|
|
|
int skip;
|
|
|
|
} skiplist[] = {
|
|
|
|
{"limit", 2},
|
|
|
|
{"get", 1},
|
|
|
|
{"by", 1},
|
|
|
|
{NULL, 0} /* End of elements. */
|
|
|
|
};
|
|
|
|
|
|
|
|
for (i = 2; i < argc; i++) {
|
|
|
|
for (j = 0; skiplist[j].name != NULL; j++) {
|
|
|
|
if (!strcasecmp(argv[i]->ptr,skiplist[j].name)) {
|
|
|
|
i += skiplist[j].skip;
|
|
|
|
break;
|
|
|
|
} else if (!strcasecmp(argv[i]->ptr,"store") && i+1 < argc) {
|
2014-03-10 11:39:07 -04:00
|
|
|
/* Note: we don't increment "num" here and continue the loop
|
|
|
|
* to be sure to process the *last* "STORE" option if multiple
|
|
|
|
* ones are provided. This is same behavior as SORT. */
|
2014-07-21 17:31:21 -04:00
|
|
|
found_store = 1;
|
2014-03-10 11:39:07 -04:00
|
|
|
keys[num] = i+1; /* <store-key> */
|
2014-03-10 11:26:08 -04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-07-21 17:31:21 -04:00
|
|
|
*numkeys = num + found_store;
|
2014-03-10 11:26:08 -04:00
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2015-12-11 12:09:01 -05:00
|
|
|
int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
|
|
|
int i, num, first, *keys;
|
|
|
|
UNUSED(cmd);
|
|
|
|
|
|
|
|
/* Assume the obvious form. */
|
|
|
|
first = 3;
|
|
|
|
num = 1;
|
|
|
|
|
|
|
|
/* But check for the extended one with the KEYS option. */
|
|
|
|
if (argc > 6) {
|
|
|
|
for (i = 6; i < argc; i++) {
|
|
|
|
if (!strcasecmp(argv[i]->ptr,"keys") &&
|
|
|
|
sdslen(argv[3]->ptr) == 0)
|
|
|
|
{
|
|
|
|
first = i+1;
|
|
|
|
num = argc-first;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
keys = zmalloc(sizeof(int)*num);
|
|
|
|
for (i = 0; i < num; i++) keys[i] = first+i;
|
|
|
|
*numkeys = num;
|
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2017-04-07 18:31:11 -04:00
|
|
|
/* Helper function to extract keys from following commands:
|
|
|
|
* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
|
|
|
|
* [COUNT count] [STORE key] [STOREDIST key]
|
|
|
|
* GEORADIUSBYMEMBER key member radius unit ... options ... */
|
|
|
|
int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
|
|
|
int i, num, *keys;
|
|
|
|
UNUSED(cmd);
|
|
|
|
|
|
|
|
/* Check for the presence of the stored key in the command */
|
|
|
|
int stored_key = -1;
|
|
|
|
for (i = 5; i < argc; i++) {
|
|
|
|
char *arg = argv[i]->ptr;
|
|
|
|
/* For the case when user specifies both "store" and "storedist" options, the
|
2018-02-11 08:02:07 -05:00
|
|
|
* second key specified would override the first key. This behavior is kept
|
2017-04-07 18:31:11 -04:00
|
|
|
* the same as in georadiusCommand method.
|
|
|
|
*/
|
|
|
|
if ((!strcasecmp(arg, "store") || !strcasecmp(arg, "storedist")) && ((i+1) < argc)) {
|
|
|
|
stored_key = i+1;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
num = 1 + (stored_key == -1 ? 0 : 1);
|
|
|
|
|
|
|
|
/* Keys in the command come from two places:
|
|
|
|
* argv[1] = key,
|
|
|
|
* argv[5...n] = stored key if present
|
|
|
|
*/
|
|
|
|
keys = zmalloc(sizeof(int) * num);
|
|
|
|
|
|
|
|
/* Add all key positions to keys[] */
|
|
|
|
keys[0] = 1;
|
|
|
|
if(num > 1) {
|
|
|
|
keys[1] = stored_key;
|
|
|
|
}
|
2018-02-11 08:02:07 -05:00
|
|
|
*numkeys = num;
|
2017-04-07 18:31:11 -04:00
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2017-09-08 05:40:16 -04:00
|
|
|
/* XREAD [BLOCK <milliseconds>] [COUNT <count>] [GROUP <groupname> <ttl>]
|
2018-06-18 08:06:06 -04:00
|
|
|
* STREAMS key_1 key_2 ... key_N ID_1 ID_2 ... ID_N */
|
2017-09-08 05:40:16 -04:00
|
|
|
int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
2018-06-28 05:22:59 -04:00
|
|
|
int i, num = 0, *keys;
|
2017-09-08 05:40:16 -04:00
|
|
|
UNUSED(cmd);
|
|
|
|
|
2018-06-18 08:06:06 -04:00
|
|
|
/* We need to parse the options of the command in order to seek the first
|
|
|
|
* "STREAMS" string which is actually the option. This is needed because
|
|
|
|
* "STREAMS" could also be the name of the consumer group and even the
|
|
|
|
* name of the stream key. */
|
2017-09-08 05:40:16 -04:00
|
|
|
int streams_pos = -1;
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
char *arg = argv[i]->ptr;
|
2018-06-18 08:06:06 -04:00
|
|
|
if (!strcasecmp(arg, "block")) {
|
|
|
|
i++; /* Skip option argument. */
|
|
|
|
} else if (!strcasecmp(arg, "count")) {
|
|
|
|
i++; /* Skip option argument. */
|
|
|
|
} else if (!strcasecmp(arg, "group")) {
|
|
|
|
i += 2; /* Skip option argument. */
|
|
|
|
} else if (!strcasecmp(arg, "noack")) {
|
|
|
|
/* Nothing to do. */
|
|
|
|
} else if (!strcasecmp(arg, "streams")) {
|
|
|
|
streams_pos = i;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
break; /* Syntax error. */
|
|
|
|
}
|
2017-09-08 05:40:16 -04:00
|
|
|
}
|
2017-09-08 05:51:53 -04:00
|
|
|
if (streams_pos != -1) num = argc - streams_pos - 1;
|
2017-09-08 05:40:16 -04:00
|
|
|
|
|
|
|
/* Syntax error. */
|
2018-04-20 08:19:03 -04:00
|
|
|
if (streams_pos == -1 || num == 0 || num % 2 != 0) {
|
2017-09-08 05:40:16 -04:00
|
|
|
*numkeys = 0;
|
|
|
|
return NULL;
|
|
|
|
}
|
2017-09-08 05:51:53 -04:00
|
|
|
num /= 2; /* We have half the keys as there are arguments because
|
|
|
|
there are also the IDs, one per key. */
|
2017-09-08 05:40:16 -04:00
|
|
|
|
|
|
|
keys = zmalloc(sizeof(int) * num);
|
2018-06-18 07:51:19 -04:00
|
|
|
for (i = streams_pos+1; i < argc-num; i++) keys[i-streams_pos-1] = i;
|
2017-09-08 05:40:16 -04:00
|
|
|
*numkeys = num;
|
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2011-04-28 13:00:33 -04:00
|
|
|
/* Slot to Key API. This is used by Redis Cluster in order to obtain in
|
|
|
|
* a fast way a key that belongs to a specified hash slot. This is useful
|
2017-03-27 09:26:56 -04:00
|
|
|
* while rehashing the cluster and in other conditions when we need to
|
|
|
|
* understand if we have keys for a given hash slot. */
|
|
|
|
void slotToKeyUpdateKey(robj *key, int add) {
|
2011-04-28 13:00:33 -04:00
|
|
|
unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr));
|
2017-03-27 09:26:56 -04:00
|
|
|
unsigned char buf[64];
|
|
|
|
unsigned char *indexed = buf;
|
|
|
|
size_t keylen = sdslen(key->ptr);
|
|
|
|
|
|
|
|
server.cluster->slots_keys_count[hashslot] += add ? 1 : -1;
|
|
|
|
if (keylen+2 > 64) indexed = zmalloc(keylen+2);
|
|
|
|
indexed[0] = (hashslot >> 8) & 0xff;
|
|
|
|
indexed[1] = hashslot & 0xff;
|
|
|
|
memcpy(indexed+2,key->ptr,keylen);
|
|
|
|
if (add) {
|
2017-04-07 02:46:39 -04:00
|
|
|
raxInsert(server.cluster->slots_to_keys,indexed,keylen+2,NULL,NULL);
|
2017-03-27 09:26:56 -04:00
|
|
|
} else {
|
2017-04-07 02:46:39 -04:00
|
|
|
raxRemove(server.cluster->slots_to_keys,indexed,keylen+2,NULL);
|
2017-03-27 09:26:56 -04:00
|
|
|
}
|
|
|
|
if (indexed != buf) zfree(indexed);
|
|
|
|
}
|
2011-04-28 13:00:33 -04:00
|
|
|
|
2017-03-27 09:26:56 -04:00
|
|
|
void slotToKeyAdd(robj *key) {
|
|
|
|
slotToKeyUpdateKey(key,1);
|
2011-04-28 13:00:33 -04:00
|
|
|
}
|
|
|
|
|
2013-02-22 04:16:21 -05:00
|
|
|
void slotToKeyDel(robj *key) {
|
2017-03-27 09:26:56 -04:00
|
|
|
slotToKeyUpdateKey(key,0);
|
2011-04-28 13:00:33 -04:00
|
|
|
}
|
|
|
|
|
2013-02-22 04:16:21 -05:00
|
|
|
void slotToKeyFlush(void) {
|
2017-03-27 09:26:56 -04:00
|
|
|
raxFree(server.cluster->slots_to_keys);
|
|
|
|
server.cluster->slots_to_keys = raxNew();
|
|
|
|
memset(server.cluster->slots_keys_count,0,
|
|
|
|
sizeof(server.cluster->slots_keys_count));
|
2013-02-22 04:15:32 -05:00
|
|
|
}
|
|
|
|
|
2015-08-04 03:20:55 -04:00
|
|
|
/* Pupulate the specified array of objects with keys in the specified slot.
|
|
|
|
* New objects are returned to represent keys, it's up to the caller to
|
|
|
|
* decrement the reference count to release the keys names. */
|
2013-02-25 05:24:42 -05:00
|
|
|
unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count) {
|
2017-03-27 09:26:56 -04:00
|
|
|
raxIterator iter;
|
2011-04-29 10:17:58 -04:00
|
|
|
int j = 0;
|
2017-03-27 09:26:56 -04:00
|
|
|
unsigned char indexed[2];
|
|
|
|
|
|
|
|
indexed[0] = (hashslot >> 8) & 0xff;
|
|
|
|
indexed[1] = hashslot & 0xff;
|
|
|
|
raxStart(&iter,server.cluster->slots_to_keys);
|
2017-04-07 02:46:39 -04:00
|
|
|
raxSeek(&iter,">=",indexed,2);
|
|
|
|
while(count-- && raxNext(&iter)) {
|
2017-03-27 09:26:56 -04:00
|
|
|
if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
|
|
|
|
keys[j++] = createStringObject((char*)iter.key+2,iter.key_len-2);
|
2011-04-29 10:17:58 -04:00
|
|
|
}
|
2017-03-27 09:26:56 -04:00
|
|
|
raxStop(&iter);
|
2011-04-29 10:17:58 -04:00
|
|
|
return j;
|
2011-04-28 13:00:33 -04:00
|
|
|
}
|
2013-02-25 05:15:03 -05:00
|
|
|
|
2014-05-14 04:46:37 -04:00
|
|
|
/* Remove all the keys in the specified hash slot.
|
|
|
|
* The number of removed items is returned. */
|
|
|
|
unsigned int delKeysInSlot(unsigned int hashslot) {
|
2017-03-27 09:26:56 -04:00
|
|
|
raxIterator iter;
|
2014-05-14 04:46:37 -04:00
|
|
|
int j = 0;
|
2017-03-27 09:26:56 -04:00
|
|
|
unsigned char indexed[2];
|
2014-05-14 04:46:37 -04:00
|
|
|
|
2017-03-27 09:26:56 -04:00
|
|
|
indexed[0] = (hashslot >> 8) & 0xff;
|
|
|
|
indexed[1] = hashslot & 0xff;
|
|
|
|
raxStart(&iter,server.cluster->slots_to_keys);
|
|
|
|
while(server.cluster->slots_keys_count[hashslot]) {
|
2017-04-07 02:46:39 -04:00
|
|
|
raxSeek(&iter,">=",indexed,2);
|
|
|
|
raxNext(&iter);
|
2014-05-14 04:46:37 -04:00
|
|
|
|
2017-03-27 09:26:56 -04:00
|
|
|
robj *key = createStringObject((char*)iter.key+2,iter.key_len-2);
|
2014-05-14 04:46:37 -04:00
|
|
|
dbDelete(&server.db[0],key);
|
|
|
|
decrRefCount(key);
|
|
|
|
j++;
|
|
|
|
}
|
2017-03-27 09:26:56 -04:00
|
|
|
raxStop(&iter);
|
2014-05-14 04:46:37 -04:00
|
|
|
return j;
|
|
|
|
}
|
|
|
|
|
2013-02-25 05:24:42 -05:00
|
|
|
unsigned int countKeysInSlot(unsigned int hashslot) {
|
2017-03-27 09:26:56 -04:00
|
|
|
return server.cluster->slots_keys_count[hashslot];
|
2013-02-25 05:15:03 -05:00
|
|
|
}
|