Added the SRANDMEMBER key <count> variant.

SRANDMEMBER called with just the key argument can just return a single
random element from a Redis Set. However many users need to return
multiple unique elements from a Set, this is not a trivial problem to
handle in the client side, and for truly good performance a C
implementation was required.

After many requests for this feature it was finally implemented.

The problem implementing this command is the strategy to follow when
the number of elements the user asks for is near to the number of
elements that are already inside the set. In this case asking random
elements to the dictionary API, and trying to add it to a temporary set,
may result into an extremely poor performance, as most add operations
will be wasted on duplicated elements.

For this reason this implementation uses a different strategy in this
case: the Set is copied, and random elements are returned to reach the
specified count.

The code actually uses 4 different algorithms optimized for the
different cases.

If the count is negative, the command changes behavior and allows for
duplicated elements in the returned subset.
This commit is contained in:
antirez 2012-09-19 21:29:40 +02:00
parent d310fbedab
commit be90c803e3
2 changed files with 155 additions and 1 deletions

View File

@ -152,7 +152,7 @@ struct redisCommand redisCommandTable[] = {
{"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0}, {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0}, {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
{"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0}, {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0}, {"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0}, {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0}, {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
{"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0}, {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},

View File

@ -4,6 +4,8 @@
* Set Commands * Set Commands
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op);
/* Factory method to return a set that *can* hold "value". When the object has /* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular * an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */ * hash table. */
@ -360,11 +362,163 @@ void spopCommand(redisClient *c) {
server.dirty++; server.dirty++;
} }
/* handle the "SRANDMEMBER key <count>" variant. The normal version of the
* command is handled by the srandmemberCommand() function itself. */
/* How many times bigger should be the set compared to the requested size
* for us to don't use the "remove elements" strategy? Read later in the
* implementation for more info. */
#define SRANDMEMBER_SUB_STRATEGY_MUL 3
void srandmemberWithCountCommand(redisClient *c) {
long l;
unsigned long count, size;
int uniq = 1;
robj *set, *ele;
int64_t llele;
int encoding;
dict *d;
if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
if (l >= 0) {
count = (unsigned) l;
} else {
/* A negative count means: return the same elements multiple times
* (i.e. don't remove the extracted element after every extraction). */
count = -l;
uniq = 0;
}
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
== NULL || checkType(c,set,REDIS_SET)) return;
size = setTypeSize(set);
/* If count is zero, serve it ASAP to avoid special cases later. */
if (count == 0) {
addReply(c,shared.emptymultibulk);
return;
}
/* CASE 1: The count was negative, so the extraction method is just:
* "return N random elements" sampling the whole set every time.
* This case is trivial and can be served without auxiliary data
* structures. */
if (!uniq) {
addReplyMultiBulkLen(c,count);
while(count--) {
encoding = setTypeRandomElement(set,&ele,&llele);
if (encoding == REDIS_ENCODING_INTSET) {
addReplyBulkLongLong(c,llele);
} else {
addReplyBulk(c,ele);
}
}
return;
}
/* CASE 2:
* The number of requested elements is greater than the number of
* elements inside the set: simply return the whole set. */
if (count >= size) {
sunionDiffGenericCommand(c,c->argv,c->argc-1,NULL,REDIS_OP_UNION);
return;
}
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
d = dictCreate(&setDictType,NULL);
/* CASE 3:
* The number of elements inside the set is not greater than
* SRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
* In this case we create a set from scratch with all the elements, and
* subtract random elements to reach the requested number of elements.
*
* This is done because if the number of requsted elements is just
* a bit less than the number of elements in the set, the natural approach
* used into CASE 3 is highly inefficient. */
if (count*SRANDMEMBER_SUB_STRATEGY_MUL > size) {
setTypeIterator *si;
/* Add all the elements into the temporary dictionary. */
si = setTypeInitIterator(set);
while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
int retval;
if (encoding == REDIS_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
} else if (ele->encoding == REDIS_ENCODING_RAW) {
retval = dictAdd(d,dupStringObject(ele),NULL);
} else if (ele->encoding == REDIS_ENCODING_INT) {
retval = dictAdd(d,
createStringObjectFromLongLong((long)ele->ptr),NULL);
}
redisAssert(retval == DICT_OK);
}
setTypeReleaseIterator(si);
redisAssert(dictSize(d) == size);
/* Remove random elements to reach the right count. */
while(size > count) {
dictEntry *de;
de = dictGetRandomKey(d);
dictDelete(d,dictGetKey(de));
size--;
}
}
/* CASE 4: We have a big set compared to the requested number of elements.
* In this case we can simply get random elements from the set and add
* to the temporary set, trying to eventually get enough unique elements
* to reach the specified count. */
else {
unsigned long added = 0;
while(added < count) {
int retval;
encoding = setTypeRandomElement(set,&ele,&llele);
if (encoding == REDIS_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
} else if (ele->encoding == REDIS_ENCODING_RAW) {
retval = dictAdd(d,dupStringObject(ele),NULL);
} else if (ele->encoding == REDIS_ENCODING_INT) {
retval = dictAdd(d,
createStringObjectFromLongLong((long)ele->ptr),NULL);
}
if (retval == DICT_OK) added++;
}
}
/* CASE 3 & 4: send the result to the user. */
{
dictIterator *di;
dictEntry *de;
addReplyMultiBulkLen(c,count);
di = dictGetIterator(d);
while((de = dictNext(di)) != NULL)
addReplyBulk(c,dictGetKey(de));
dictReleaseIterator(di);
dictRelease(d);
}
}
void srandmemberCommand(redisClient *c) { void srandmemberCommand(redisClient *c) {
robj *set, *ele; robj *set, *ele;
int64_t llele; int64_t llele;
int encoding; int encoding;
if (c->argc == 3) {
srandmemberWithCountCommand(c);
return;
} else if (c->argc > 3) {
addReply(c,shared.syntaxerr);
return;
}
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL || if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
checkType(c,set,REDIS_SET)) return; checkType(c,set,REDIS_SET)) return;