redict/src/t_string.c

474 lines
15 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "server.h"
2011-11-12 13:27:35 -05:00
#include <math.h> /* isnan(), isinf() */
/*-----------------------------------------------------------------------------
* String Commands
*----------------------------------------------------------------------------*/
static int checkStringLength(client *c, long long size) {
if (size > 512*1024*1024) {
addReplyError(c,"string exceeds maximum allowed size (512MB)");
return C_ERR;
}
return C_OK;
}
/* The setGenericCommand() function implements the SET operation with different
* options and variants. This function is called in order to implement the
* following commands: SET, SETEX, PSETEX, SETNX.
*
* 'flags' changes the behavior of the command (NX or XX, see belove).
*
* 'expire' represents an expire to set in form of a Redis object as passed
* by the user. It is interpreted according to the specified 'unit'.
*
* 'ok_reply' and 'abort_reply' is what the function will reply to the client
* if the operation is performed, or when it is not because of NX or
* XX flags.
*
* If ok_reply is NULL "+OK" is used.
* If abort_reply is NULL, "$-1" is used. */
#define OBJ_SET_NO_FLAGS 0
#define OBJ_SET_NX (1<<0) /* Set if key not exists. */
#define OBJ_SET_XX (1<<1) /* Set if key exists. */
#define OBJ_SET_EX (1<<2) /* Set if time in seconds is given */
#define OBJ_SET_PX (1<<3) /* Set if time in ms in given */
void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
2013-01-16 12:00:20 -05:00
long long milliseconds = 0; /* initialized to avoid any harmness warning */
if (expire) {
if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != C_OK)
return;
if (milliseconds <= 0) {
addReplyErrorFormat(c,"invalid expire time in %s",c->cmd->name);
return;
}
if (unit == UNIT_SECONDS) milliseconds *= 1000;
}
if ((flags & OBJ_SET_NX && lookupKeyWrite(c->db,key) != NULL) ||
(flags & OBJ_SET_XX && lookupKeyWrite(c->db,key) == NULL))
{
addReply(c, abort_reply ? abort_reply : shared.nullbulk);
return;
}
setKey(c->db,key,val);
server.dirty++;
Replication: fix the infamous key leakage of writable slaves + EXPIRE. BACKGROUND AND USE CASEj Redis slaves are normally write only, however the supprot a "writable" mode which is very handy when scaling reads on slaves, that actually need write operations in order to access data. For instance imagine having slaves replicating certain Sets keys from the master. When accessing the data on the slave, we want to peform intersections between such Sets values. However we don't want to intersect each time: to cache the intersection for some time often is a good idea. To do so, it is possible to setup a slave as a writable slave, and perform the intersection on the slave side, perhaps setting a TTL on the resulting key so that it will expire after some time. THE BUG Problem: in order to have a consistent replication, expiring of keys in Redis replication is up to the master, that synthesize DEL operations to send in the replication stream. However slaves logically expire keys by hiding them from read attempts from clients so that if the master did not promptly sent a DEL, the client still see logically expired keys as non existing. Because slaves don't actively expire keys by actually evicting them but just masking from the POV of read operations, if a key is created in a writable slave, and an expire is set, the key will be leaked forever: 1. No DEL will be received from the master, which does not know about such a key at all. 2. No eviction will be performed by the slave, since it needs to disable eviction because it's up to masters, otherwise consistency of data is lost. THE FIX In order to fix the problem, the slave should be able to tag keys that were created in the slave side and have an expire set in some way. My solution involved using an unique additional dictionary created by the writable slave only if needed. The dictionary is obviously keyed by the key name that we need to track: all the keys that are set with an expire directly by a client writing to the slave are tracked. The value in the dictionary is a bitmap of all the DBs where such a key name need to be tracked, so that we can use a single dictionary to track keys in all the DBs used by the slave (actually this limits the solution to the first 64 DBs, but the default with Redis is to use 16 DBs). This solution allows to pay both a small complexity and CPU penalty, which is zero when the feature is not used, actually. The slave-side eviction is encapsulated in code which is not coupled with the rest of the Redis core, if not for the hook to track the keys. TODO I'm doing the first smoke tests to see if the feature works as expected: so far so good. Unit tests should be added before merging into the 4.0 branch.
2016-12-13 04:20:06 -05:00
if (expire) setExpire(c,c->db,key,mstime()+milliseconds);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id);
if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC,
"expire",key,c->db->id);
addReply(c, ok_reply ? ok_reply : shared.ok);
}
/* SET key value [NX] [XX] [EX <seconds>] [PX <milliseconds>] */
void setCommand(client *c) {
int j;
robj *expire = NULL;
int unit = UNIT_SECONDS;
int flags = OBJ_SET_NO_FLAGS;
for (j = 3; j < c->argc; j++) {
char *a = c->argv[j]->ptr;
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
if ((a[0] == 'n' || a[0] == 'N') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_XX))
{
flags |= OBJ_SET_NX;
} else if ((a[0] == 'x' || a[0] == 'X') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_NX))
{
flags |= OBJ_SET_XX;
} else if ((a[0] == 'e' || a[0] == 'E') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_PX) && next)
{
flags |= OBJ_SET_EX;
unit = UNIT_SECONDS;
expire = next;
j++;
} else if ((a[0] == 'p' || a[0] == 'P') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_EX) && next)
{
flags |= OBJ_SET_PX;
unit = UNIT_MILLISECONDS;
expire = next;
j++;
} else {
addReply(c,shared.syntaxerr);
return;
}
}
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
}
void setnxCommand(client *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,OBJ_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
}
void setexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
}
void psetexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
}
int getGenericCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
return C_OK;
if (o->type != OBJ_STRING) {
addReply(c,shared.wrongtypeerr);
return C_ERR;
} else {
addReplyBulk(c,o);
return C_OK;
}
}
void getCommand(client *c) {
getGenericCommand(c);
}
void getsetCommand(client *c) {
if (getGenericCommand(c) == C_ERR) return;
c->argv[2] = tryObjectEncoding(c->argv[2]);
setKey(c->db,c->argv[1],c->argv[2]);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[1],c->db->id);
server.dirty++;
2010-12-09 10:39:33 -05:00
}
void setrangeCommand(client *c) {
robj *o;
long offset;
sds value = c->argv[3]->ptr;
if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != C_OK)
return;
2010-12-15 05:30:50 -05:00
if (offset < 0) {
addReplyError(c,"offset is out of range");
return;
}
o = lookupKeyWrite(c->db,c->argv[1]);
if (o == NULL) {
/* Return 0 when setting nothing on a non-existing string */
if (sdslen(value) == 0) {
addReply(c,shared.czero);
return;
}
/* Return when the resulting string exceeds allowed size */
if (checkStringLength(c,offset+sdslen(value)) != C_OK)
return;
o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+sdslen(value)));
dbAdd(c->db,c->argv[1],o);
} else {
size_t olen;
/* Key exists, check type */
if (checkType(c,o,OBJ_STRING))
return;
/* Return existing string length when setting nothing */
olen = stringObjectLen(o);
if (sdslen(value) == 0) {
addReplyLongLong(c,olen);
return;
}
/* Return when the resulting string exceeds allowed size */
if (checkStringLength(c,offset+sdslen(value)) != C_OK)
return;
/* Create a copy when the object is shared or encoded. */
o = dbUnshareStringValue(c->db,c->argv[1],o);
}
if (sdslen(value) > 0) {
o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value));
memcpy((char*)o->ptr+offset,value,sdslen(value));
signalModifiedKey(c->db,c->argv[1]);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,
"setrange",c->argv[1],c->db->id);
server.dirty++;
}
addReplyLongLong(c,sdslen(o->ptr));
}
void getrangeCommand(client *c) {
robj *o;
long long start, end;
char *str, llbuf[32];
size_t strlen;
if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
return;
if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptybulk)) == NULL ||
checkType(c,o,OBJ_STRING)) return;
if (o->encoding == OBJ_ENCODING_INT) {
str = llbuf;
strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
} else {
str = o->ptr;
strlen = sdslen(str);
}
/* Convert negative indexes */
if (start < 0 && end < 0 && start > end) {
addReply(c,shared.emptybulk);
return;
}
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = 0;
if (end < 0) end = 0;
if ((unsigned long long)end >= strlen) end = strlen-1;
/* Precondition: end >= 0 && end < strlen, so the only condition where
* nothing can be returned is: start > end. */
if (start > end || strlen == 0) {
addReply(c,shared.emptybulk);
} else {
addReplyBulkCBuffer(c,(char*)str+start,end-start+1);
}
}
void mgetCommand(client *c) {
int j;
addReplyMultiBulkLen(c,c->argc-1);
for (j = 1; j < c->argc; j++) {
robj *o = lookupKeyRead(c->db,c->argv[j]);
if (o == NULL) {
addReply(c,shared.nullbulk);
} else {
if (o->type != OBJ_STRING) {
addReply(c,shared.nullbulk);
} else {
addReplyBulk(c,o);
}
}
}
}
void msetGenericCommand(client *c, int nx) {
int j, busykeys = 0;
if ((c->argc % 2) == 0) {
addReplyError(c,"wrong number of arguments for MSET");
return;
}
/* Handle the NX flag. The MSETNX semantic is to return zero and don't
* set nothing at all if at least one already key exists. */
if (nx) {
for (j = 1; j < c->argc; j += 2) {
if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
busykeys++;
}
}
if (busykeys) {
addReply(c, shared.czero);
return;
}
}
for (j = 1; j < c->argc; j += 2) {
c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
setKey(c->db,c->argv[j],c->argv[j+1]);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[j],c->db->id);
}
server.dirty += (c->argc-1)/2;
addReply(c, nx ? shared.cone : shared.ok);
}
void msetCommand(client *c) {
msetGenericCommand(c,0);
}
void msetnxCommand(client *c) {
msetGenericCommand(c,1);
}
void incrDecrCommand(client *c, long long incr) {
long long value, oldvalue;
robj *o, *new;
o = lookupKeyWrite(c->db,c->argv[1]);
if (o != NULL && checkType(c,o,OBJ_STRING)) return;
if (getLongLongFromObjectOrReply(c,o,&value,NULL) != C_OK) return;
oldvalue = value;
if ((incr < 0 && oldvalue < 0 && incr < (LLONG_MIN-oldvalue)) ||
(incr > 0 && oldvalue > 0 && incr > (LLONG_MAX-oldvalue))) {
addReplyError(c,"increment or decrement would overflow");
return;
}
value += incr;
if (o && o->refcount == 1 && o->encoding == OBJ_ENCODING_INT &&
2015-07-27 03:41:48 -04:00
(value < 0 || value >= OBJ_SHARED_INTEGERS) &&
value >= LONG_MIN && value <= LONG_MAX)
{
new = o;
o->ptr = (void*)((long)value);
} else {
new = createStringObjectFromLongLong(value);
if (o) {
dbOverwrite(c->db,c->argv[1],new);
} else {
dbAdd(c->db,c->argv[1],new);
}
}
signalModifiedKey(c->db,c->argv[1]);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,"incrby",c->argv[1],c->db->id);
server.dirty++;
addReply(c,shared.colon);
addReply(c,new);
addReply(c,shared.crlf);
}
void incrCommand(client *c) {
incrDecrCommand(c,1);
}
void decrCommand(client *c) {
incrDecrCommand(c,-1);
}
void incrbyCommand(client *c) {
long long incr;
if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
incrDecrCommand(c,incr);
}
void decrbyCommand(client *c) {
long long incr;
if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
incrDecrCommand(c,-incr);
}
void incrbyfloatCommand(client *c) {
2011-11-12 13:27:35 -05:00
long double incr, value;
robj *o, *new, *aux;
2011-11-12 13:27:35 -05:00
o = lookupKeyWrite(c->db,c->argv[1]);
if (o != NULL && checkType(c,o,OBJ_STRING)) return;
if (getLongDoubleFromObjectOrReply(c,o,&value,NULL) != C_OK ||
getLongDoubleFromObjectOrReply(c,c->argv[2],&incr,NULL) != C_OK)
2011-11-12 13:27:35 -05:00
return;
value += incr;
if (isnan(value) || isinf(value)) {
addReplyError(c,"increment would produce NaN or Infinity");
return;
}
new = createStringObjectFromLongDouble(value,1);
2011-11-12 13:27:35 -05:00
if (o)
dbOverwrite(c->db,c->argv[1],new);
else
dbAdd(c->db,c->argv[1],new);
signalModifiedKey(c->db,c->argv[1]);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id);
2011-11-12 13:27:35 -05:00
server.dirty++;
addReplyBulk(c,new);
/* Always replicate INCRBYFLOAT as a SET command with the final value
* in order to make sure that differences in float precision or formatting
* will not create differences in replicas or after an AOF restart. */
aux = createStringObject("SET",3);
rewriteClientCommandArgument(c,0,aux);
decrRefCount(aux);
rewriteClientCommandArgument(c,2,new);
2011-11-12 13:27:35 -05:00
}
void appendCommand(client *c) {
size_t totlen;
robj *o, *append;
o = lookupKeyWrite(c->db,c->argv[1]);
if (o == NULL) {
/* Create the key */
2010-12-15 05:40:36 -05:00
c->argv[2] = tryObjectEncoding(c->argv[2]);
dbAdd(c->db,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]);
totlen = stringObjectLen(c->argv[2]);
} else {
2010-12-15 05:40:36 -05:00
/* Key exists, check type */
if (checkType(c,o,OBJ_STRING))
return;
2010-12-15 05:40:36 -05:00
/* "append" is an argument, so always an sds */
append = c->argv[2];
totlen = stringObjectLen(o)+sdslen(append->ptr);
if (checkStringLength(c,totlen) != C_OK)
return;
/* Append the value */
o = dbUnshareStringValue(c->db,c->argv[1],o);
o->ptr = sdscatlen(o->ptr,append->ptr,sdslen(append->ptr));
totlen = sdslen(o->ptr);
}
signalModifiedKey(c->db,c->argv[1]);
2015-07-27 03:41:48 -04:00
notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id);
server.dirty++;
addReplyLongLong(c,totlen);
}
void strlenCommand(client *c) {
2010-07-27 04:09:26 -04:00
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,OBJ_STRING)) return;
addReplyLongLong(c,stringObjectLen(o));
2010-07-27 04:09:26 -04:00
}