redict/src/t_list.c

998 lines
34 KiB
C
Raw Normal View History

#include "redis.h"
/*-----------------------------------------------------------------------------
* List API
*----------------------------------------------------------------------------*/
/* Check the argument length to see if it requires us to convert the ziplist
* to a real list. Only check raw-encoded objects because integer encoded
* objects are never too long. */
void listTypeTryConversion(robj *subject, robj *value) {
if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
if (value->encoding == REDIS_ENCODING_RAW &&
sdslen(value->ptr) > server.list_max_ziplist_value)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
}
void listTypePush(robj *subject, robj *value, int where) {
/* Check if we need to convert the ziplist */
listTypeTryConversion(subject,value);
if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
value = getDecodedObject(value);
subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
decrRefCount(value);
} else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
if (where == REDIS_HEAD) {
listAddNodeHead(subject->ptr,value);
} else {
listAddNodeTail(subject->ptr,value);
}
incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
}
robj *listTypePop(robj *subject, int where) {
robj *value = NULL;
if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
int pos = (where == REDIS_HEAD) ? 0 : -1;
p = ziplistIndex(subject->ptr,pos);
if (ziplistGet(p,&vstr,&vlen,&vlong)) {
if (vstr) {
value = createStringObject((char*)vstr,vlen);
} else {
value = createStringObjectFromLongLong(vlong);
}
/* We only need to delete an element when it exists */
subject->ptr = ziplistDelete(subject->ptr,&p);
}
} else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
list *list = subject->ptr;
listNode *ln;
if (where == REDIS_HEAD) {
ln = listFirst(list);
} else {
ln = listLast(list);
}
if (ln != NULL) {
value = listNodeValue(ln);
incrRefCount(value);
listDelNode(list,ln);
}
} else {
redisPanic("Unknown list encoding");
}
return value;
}
unsigned long listTypeLength(robj *subject) {
if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
return ziplistLen(subject->ptr);
} else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
return listLength((list*)subject->ptr);
} else {
redisPanic("Unknown list encoding");
}
}
/* Initialize an iterator at the specified index. */
listTypeIterator *listTypeInitIterator(robj *subject, int index, unsigned char direction) {
listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
li->subject = subject;
li->encoding = subject->encoding;
li->direction = direction;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
li->zi = ziplistIndex(subject->ptr,index);
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
li->ln = listIndex(subject->ptr,index);
} else {
redisPanic("Unknown list encoding");
}
return li;
}
/* Clean up the iterator. */
void listTypeReleaseIterator(listTypeIterator *li) {
zfree(li);
}
/* Stores pointer to current the entry in the provided entry structure
* and advances the position of the iterator. Returns 1 when the current
* entry is in fact an entry, 0 otherwise. */
int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Protect from converting when iterating */
redisAssert(li->subject->encoding == li->encoding);
entry->li = li;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
entry->zi = li->zi;
if (entry->zi != NULL) {
if (li->direction == REDIS_TAIL)
li->zi = ziplistNext(li->subject->ptr,li->zi);
else
li->zi = ziplistPrev(li->subject->ptr,li->zi);
return 1;
}
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
entry->ln = li->ln;
if (entry->ln != NULL) {
if (li->direction == REDIS_TAIL)
li->ln = li->ln->next;
else
li->ln = li->ln->prev;
return 1;
}
} else {
redisPanic("Unknown list encoding");
}
return 0;
}
/* Return entry or NULL at the current position of the iterator. */
robj *listTypeGet(listTypeEntry *entry) {
listTypeIterator *li = entry->li;
robj *value = NULL;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
redisAssert(entry->zi != NULL);
if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
if (vstr) {
value = createStringObject((char*)vstr,vlen);
} else {
value = createStringObjectFromLongLong(vlong);
}
}
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
redisAssert(entry->ln != NULL);
value = listNodeValue(entry->ln);
incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
return value;
}
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
robj *subject = entry->li->subject;
if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
value = getDecodedObject(value);
if (where == REDIS_TAIL) {
unsigned char *next = ziplistNext(subject->ptr,entry->zi);
/* When we insert after the current element, but the current element
* is the tail of the list, we need to do a push. */
if (next == NULL) {
subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
} else {
subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
}
} else {
subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
}
decrRefCount(value);
} else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
if (where == REDIS_TAIL) {
listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
} else {
listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
}
incrRefCount(value);
} else {
redisPanic("Unknown list encoding");
}
}
/* Compare the given object with the entry at the current position. */
int listTypeEqual(listTypeEntry *entry, robj *o) {
listTypeIterator *li = entry->li;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
redisAssert(o->encoding == REDIS_ENCODING_RAW);
return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
} else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
return equalStringObjects(o,listNodeValue(entry->ln));
} else {
redisPanic("Unknown list encoding");
}
}
/* Delete the element pointed to. */
void listTypeDelete(listTypeEntry *entry) {
listTypeIterator *li = entry->li;
if (li->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p = entry->zi;
li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
/* Update position of the iterator depending on the direction */
if (li->direction == REDIS_TAIL)
li->zi = p;
else
li->zi = ziplistPrev(li->subject->ptr,p);
} else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *next;
if (li->direction == REDIS_TAIL)
next = entry->ln->next;
else
next = entry->ln->prev;
listDelNode(li->subject->ptr,entry->ln);
li->ln = next;
} else {
redisPanic("Unknown list encoding");
}
}
void listTypeConvert(robj *subject, int enc) {
listTypeIterator *li;
listTypeEntry entry;
redisAssert(subject->type == REDIS_LIST);
if (enc == REDIS_ENCODING_LINKEDLIST) {
list *l = listCreate();
listSetFreeMethod(l,decrRefCount);
/* listTypeGet returns a robj with incremented refcount */
li = listTypeInitIterator(subject,0,REDIS_TAIL);
while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
listTypeReleaseIterator(li);
subject->encoding = REDIS_ENCODING_LINKEDLIST;
zfree(subject->ptr);
subject->ptr = l;
} else {
redisPanic("Unsupported list conversion");
}
}
/*-----------------------------------------------------------------------------
* List Commands
*----------------------------------------------------------------------------*/
void pushGenericCommand(redisClient *c, int where) {
2011-04-15 10:35:27 -04:00
int j, addlen = 0, pushed = 0;
robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
2011-04-15 10:35:27 -04:00
int may_have_waiting_clients = (lobj == NULL);
if (lobj && lobj->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
return;
}
for (j = 2; j < c->argc; j++) {
c->argv[j] = tryObjectEncoding(c->argv[j]);
if (may_have_waiting_clients) {
if (handleClientsWaitingListPush(c,c->argv[1],c->argv[j])) {
addlen++;
continue;
} else {
may_have_waiting_clients = 0;
}
}
2011-04-15 10:35:27 -04:00
if (!lobj) {
lobj = createZiplistObject();
dbAdd(c->db,c->argv[1],lobj);
}
2011-04-15 10:35:27 -04:00
listTypePush(lobj,c->argv[j],where);
pushed++;
}
2011-04-15 10:35:27 -04:00
addReplyLongLong(c,addlen + (lobj ? listTypeLength(lobj) : 0));
if (pushed) signalModifiedKey(c->db,c->argv[1]);
server.dirty += pushed;
}
void lpushCommand(redisClient *c) {
pushGenericCommand(c,REDIS_HEAD);
}
void rpushCommand(redisClient *c) {
pushGenericCommand(c,REDIS_TAIL);
}
void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
robj *subject;
listTypeIterator *iter;
listTypeEntry entry;
int inserted = 0;
if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,subject,REDIS_LIST)) return;
if (refval != NULL) {
/* Note: we expect refval to be string-encoded because it is *not* the
* last argument of the multi-bulk LINSERT. */
redisAssert(refval->encoding == REDIS_ENCODING_RAW);
/* We're not sure if this value can be inserted yet, but we cannot
* convert the list inside the iterator. We don't want to loop over
* the list twice (once to see if the value can be inserted and once
* to do the actual insert), so we assume this value can be inserted
* and convert the ziplist to a regular list if necessary. */
listTypeTryConversion(subject,val);
/* Seek refval from head to tail */
iter = listTypeInitIterator(subject,0,REDIS_TAIL);
while (listTypeNext(iter,&entry)) {
if (listTypeEqual(&entry,refval)) {
listTypeInsert(&entry,val,where);
inserted = 1;
break;
}
}
listTypeReleaseIterator(iter);
if (inserted) {
/* Check if the length exceeds the ziplist length threshold. */
if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
} else {
/* Notify client of a failed insert */
addReply(c,shared.cnegone);
return;
}
} else {
listTypePush(subject,val,where);
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
addReplyLongLong(c,listTypeLength(subject));
}
void lpushxCommand(redisClient *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
pushxGenericCommand(c,NULL,c->argv[2],REDIS_HEAD);
}
void rpushxCommand(redisClient *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
pushxGenericCommand(c,NULL,c->argv[2],REDIS_TAIL);
}
void linsertCommand(redisClient *c) {
c->argv[4] = tryObjectEncoding(c->argv[4]);
if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_TAIL);
} else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_HEAD);
} else {
addReply(c,shared.syntaxerr);
}
}
void llenCommand(redisClient *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
addReplyLongLong(c,listTypeLength(o));
}
void lindexCommand(redisClient *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
int index = atoi(c->argv[2]->ptr);
robj *value = NULL;
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
p = ziplistIndex(o->ptr,index);
if (ziplistGet(p,&vstr,&vlen,&vlong)) {
if (vstr) {
value = createStringObject((char*)vstr,vlen);
} else {
value = createStringObjectFromLongLong(vlong);
}
addReplyBulk(c,value);
decrRefCount(value);
} else {
addReply(c,shared.nullbulk);
}
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *ln = listIndex(o->ptr,index);
if (ln != NULL) {
value = listNodeValue(ln);
addReplyBulk(c,value);
} else {
addReply(c,shared.nullbulk);
}
} else {
redisPanic("Unknown list encoding");
}
}
void lsetCommand(redisClient *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
int index = atoi(c->argv[2]->ptr);
robj *value = (c->argv[3] = tryObjectEncoding(c->argv[3]));
listTypeTryConversion(o,value);
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p, *zl = o->ptr;
p = ziplistIndex(zl,index);
if (p == NULL) {
addReply(c,shared.outofrangeerr);
} else {
o->ptr = ziplistDelete(o->ptr,&p);
value = getDecodedObject(value);
o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
decrRefCount(value);
addReply(c,shared.ok);
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *ln = listIndex(o->ptr,index);
if (ln == NULL) {
addReply(c,shared.outofrangeerr);
} else {
decrRefCount((robj*)listNodeValue(ln));
listNodeValue(ln) = value;
incrRefCount(value);
addReply(c,shared.ok);
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
} else {
redisPanic("Unknown list encoding");
}
}
void popGenericCommand(redisClient *c, int where) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
if (o == NULL || checkType(c,o,REDIS_LIST)) return;
robj *value = listTypePop(o,where);
if (value == NULL) {
addReply(c,shared.nullbulk);
} else {
addReplyBulk(c,value);
decrRefCount(value);
if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
}
void lpopCommand(redisClient *c) {
popGenericCommand(c,REDIS_HEAD);
}
void rpopCommand(redisClient *c) {
popGenericCommand(c,REDIS_TAIL);
}
void lrangeCommand(redisClient *c) {
robj *o;
int start = atoi(c->argv[2]->ptr);
int end = atoi(c->argv[3]->ptr);
int llen;
int rangelen;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
|| checkType(c,o,REDIS_LIST)) return;
llen = listTypeLength(o);
/* convert negative indexes */
if (start < 0) start = llen+start;
if (end < 0) end = llen+end;
if (start < 0) start = 0;
/* Invariant: start >= 0, so this test will be true when end < 0.
* The range is empty when start > end or start >= length. */
if (start > end || start >= llen) {
addReply(c,shared.emptymultibulk);
return;
}
if (end >= llen) end = llen-1;
rangelen = (end-start)+1;
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c,rangelen);
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
unsigned char *p = ziplistIndex(o->ptr,start);
unsigned char *vstr;
unsigned int vlen;
long long vlong;
while(rangelen--) {
ziplistGet(p,&vstr,&vlen,&vlong);
if (vstr) {
addReplyBulkCBuffer(c,vstr,vlen);
} else {
addReplyBulkLongLong(c,vlong);
}
p = ziplistNext(o->ptr,p);
}
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNode *ln = listIndex(o->ptr,start);
while(rangelen--) {
addReplyBulk(c,ln->value);
ln = ln->next;
}
} else {
redisPanic("List encoding is not LINKEDLIST nor ZIPLIST!");
}
}
void ltrimCommand(redisClient *c) {
robj *o;
int start = atoi(c->argv[2]->ptr);
int end = atoi(c->argv[3]->ptr);
int llen;
int j, ltrim, rtrim;
list *list;
listNode *ln;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
checkType(c,o,REDIS_LIST)) return;
llen = listTypeLength(o);
/* convert negative indexes */
if (start < 0) start = llen+start;
if (end < 0) end = llen+end;
if (start < 0) start = 0;
/* Invariant: start >= 0, so this test will be true when end < 0.
* The range is empty when start > end or start >= length. */
if (start > end || start >= llen) {
/* Out of range start or start > end result in empty list */
ltrim = llen;
rtrim = 0;
} else {
if (end >= llen) end = llen-1;
ltrim = start;
rtrim = llen-end-1;
}
/* Remove list elements to perform the trim */
if (o->encoding == REDIS_ENCODING_ZIPLIST) {
o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
list = o->ptr;
for (j = 0; j < ltrim; j++) {
ln = listFirst(list);
listDelNode(list,ln);
}
for (j = 0; j < rtrim; j++) {
ln = listLast(list);
listDelNode(list,ln);
}
} else {
redisPanic("Unknown list encoding");
}
if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.ok);
}
void lremCommand(redisClient *c) {
robj *subject, *obj;
obj = c->argv[3] = tryObjectEncoding(c->argv[3]);
int toremove = atoi(c->argv[2]->ptr);
int removed = 0;
listTypeEntry entry;
subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
/* Make sure obj is raw when we're dealing with a ziplist */
if (subject->encoding == REDIS_ENCODING_ZIPLIST)
obj = getDecodedObject(obj);
listTypeIterator *li;
if (toremove < 0) {
toremove = -toremove;
li = listTypeInitIterator(subject,-1,REDIS_HEAD);
} else {
li = listTypeInitIterator(subject,0,REDIS_TAIL);
}
while (listTypeNext(li,&entry)) {
if (listTypeEqual(&entry,obj)) {
listTypeDelete(&entry);
server.dirty++;
removed++;
if (toremove && removed == toremove) break;
}
}
listTypeReleaseIterator(li);
/* Clean up raw encoded object */
if (subject->encoding == REDIS_ENCODING_ZIPLIST)
decrRefCount(obj);
if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
addReplyLongLong(c,removed);
if (removed) signalModifiedKey(c->db,c->argv[1]);
}
/* This is the semantic of this command:
* RPOPLPUSH srclist dstlist:
* IF LLEN(srclist) > 0
* element = RPOP srclist
* LPUSH dstlist element
* RETURN element
* ELSE
* RETURN nil
* END
* END
*
* The idea is to be able to get an element from a list in a reliable way
* since the element is not just returned but pushed against another list
* as well. This command was originally proposed by Ezra Zygmuntowicz.
*/
void rpoplpushHandlePush(redisClient *origclient, redisClient *c, robj *dstkey, robj *dstobj, robj *value) {
robj *aux;
if (!handleClientsWaitingListPush(origclient,dstkey,value)) {
/* Create the list if the key does not exist */
if (!dstobj) {
dstobj = createZiplistObject();
dbAdd(c->db,dstkey,dstobj);
} else {
signalModifiedKey(c->db,dstkey);
}
listTypePush(dstobj,value,REDIS_HEAD);
/* If we are pushing as a result of LPUSH against a key
* watched by BRPOPLPUSH, we need to rewrite the command vector
* as an LPUSH.
*
* If this is called directly by RPOPLPUSH (either directly
* or via a BRPOPLPUSH where the popped list exists)
* we should replicate the RPOPLPUSH command itself. */
if (c != origclient) {
aux = createStringObject("LPUSH",5);
rewriteClientCommandVector(origclient,3,aux,dstkey,value);
decrRefCount(aux);
} else {
/* Make sure to always use RPOPLPUSH in the replication / AOF,
* even if the original command was BRPOPLPUSH. */
aux = createStringObject("RPOPLPUSH",9);
rewriteClientCommandVector(origclient,3,aux,c->argv[1],c->argv[2]);
decrRefCount(aux);
}
server.dirty++;
}
/* Always send the pushed value to the client. */
addReplyBulk(c,value);
}
2010-11-08 08:43:21 -05:00
void rpoplpushCommand(redisClient *c) {
robj *sobj, *value;
if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
checkType(c,sobj,REDIS_LIST)) return;
if (listTypeLength(sobj) == 0) {
addReply(c,shared.nullbulk);
} else {
robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
robj *touchedkey = c->argv[1];
if (dobj && checkType(c,dobj,REDIS_LIST)) return;
value = listTypePop(sobj,REDIS_TAIL);
/* We saved touched key, and protect it, since rpoplpushHandlePush
* may change the client command argument vector. */
incrRefCount(touchedkey);
rpoplpushHandlePush(c,c,c->argv[2],dobj,value);
/* listTypePop returns an object with its refcount incremented */
decrRefCount(value);
/* Delete the source list when it is empty */
if (listTypeLength(sobj) == 0) dbDelete(c->db,touchedkey);
signalModifiedKey(c->db,touchedkey);
decrRefCount(touchedkey);
server.dirty++;
}
}
/*-----------------------------------------------------------------------------
* Blocking POP operations
*----------------------------------------------------------------------------*/
/* Currently Redis blocking operations support is limited to list POP ops,
* so the current implementation is not fully generic, but it is also not
* completely specific so it will not require a rewrite to support new
* kind of blocking operations in the future.
*
* Still it's important to note that list blocking operations can be already
* used as a notification mechanism in order to implement other blocking
* operations at application level, so there must be a very strong evidence
* of usefulness and generality before new blocking operations are implemented.
*
* This is how the current blocking POP works, we use BLPOP as example:
* - If the user calls BLPOP and the key exists and contains a non empty list
* then LPOP is called instead. So BLPOP is semantically the same as LPOP
* if there is not to block.
* - If instead BLPOP is called and the key does not exists or the list is
* empty we need to block. In order to do so we remove the notification for
* new data to read in the client socket (so that we'll not serve new
* requests if the blocking request is not served). Also we put the client
* in a dictionary (db->blocking_keys) mapping keys to a list of clients
* blocking for this keys.
* - If a PUSH operation against a key with blocked clients waiting is
* performed, we serve the first in the list: basically instead to push
* the new element inside the list we return it to the (first / oldest)
* blocking client, unblock the client, and remove it form the list.
*
* The above comment and the source code should be enough in order to understand
* the implementation and modify / fix it later.
*/
/* Set a client in blocking mode for the specified key, with the specified
* timeout */
2010-11-08 18:47:46 -05:00
void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout, robj *target) {
dictEntry *de;
list *l;
int j;
2010-11-09 13:06:25 -05:00
c->bpop.keys = zmalloc(sizeof(robj*)*numkeys);
c->bpop.count = numkeys;
c->bpop.timeout = timeout;
c->bpop.target = target;
2010-11-08 18:47:46 -05:00
if (target != NULL) {
2010-12-06 10:04:42 -05:00
incrRefCount(target);
2010-11-08 18:47:46 -05:00
}
for (j = 0; j < numkeys; j++) {
/* Add the key in the client structure, to map clients -> keys */
2010-11-09 13:06:25 -05:00
c->bpop.keys[j] = keys[j];
incrRefCount(keys[j]);
/* And in the other "side", to map keys -> clients */
de = dictFind(c->db->blocking_keys,keys[j]);
if (de == NULL) {
int retval;
/* For every key we take a list of clients blocked for it */
l = listCreate();
retval = dictAdd(c->db->blocking_keys,keys[j],l);
incrRefCount(keys[j]);
redisAssert(retval == DICT_OK);
} else {
l = dictGetEntryVal(de);
}
listAddNodeTail(l,c);
}
/* Mark the client as a blocked client */
c->flags |= REDIS_BLOCKED;
server.bpop_blocked_clients++;
}
/* Unblock a client that's waiting in a blocking operation such as BLPOP */
void unblockClientWaitingData(redisClient *c) {
dictEntry *de;
list *l;
int j;
2010-11-09 13:06:25 -05:00
redisAssert(c->bpop.keys != NULL);
/* The client may wait for multiple keys, so unblock it for every key. */
2010-11-09 13:06:25 -05:00
for (j = 0; j < c->bpop.count; j++) {
/* Remove this client from the list of clients waiting for this key. */
2010-11-09 13:06:25 -05:00
de = dictFind(c->db->blocking_keys,c->bpop.keys[j]);
redisAssert(de != NULL);
l = dictGetEntryVal(de);
listDelNode(l,listSearchKey(l,c));
/* If the list is empty we need to remove it to avoid wasting memory */
if (listLength(l) == 0)
2010-11-09 13:06:25 -05:00
dictDelete(c->db->blocking_keys,c->bpop.keys[j]);
decrRefCount(c->bpop.keys[j]);
}
2010-11-08 18:47:46 -05:00
/* Cleanup the client structure */
2010-11-09 13:06:25 -05:00
zfree(c->bpop.keys);
c->bpop.keys = NULL;
if (c->bpop.target) decrRefCount(c->bpop.target);
2010-11-09 13:06:25 -05:00
c->bpop.target = NULL;
c->flags &= ~REDIS_BLOCKED;
c->flags |= REDIS_UNBLOCKED;
server.bpop_blocked_clients--;
listAddNodeTail(server.unblocked_clients,c);
}
/* This should be called from any function PUSHing into lists.
* 'c' is the "pushing client", 'key' is the key it is pushing data against,
* 'ele' is the element pushed.
*
* If the function returns 0 there was no client waiting for a list push
* against this key.
*
* If the function returns 1 there was a client waiting for a list push
* against this key, the element was passed to this client thus it's not
* needed to actually add it to the list and the caller should return asap. */
int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
struct dictEntry *de;
redisClient *receiver;
int numclients;
list *clients;
listNode *ln;
robj *dstkey, *dstobj;
de = dictFind(c->db->blocking_keys,key);
if (de == NULL) return 0;
clients = dictGetEntryVal(de);
numclients = listLength(clients);
/* Try to handle the push as long as there are clients waiting for a push.
* Note that "numclients" is used because the list of clients waiting for a
* push on "key" is deleted by unblockClient() when empty.
*
* This loop will have more than 1 iteration when there is a BRPOPLPUSH
* that cannot push the target list because it does not contain a list. If
* this happens, it simply tries the next client waiting for a push. */
while (numclients--) {
ln = listFirst(clients);
redisAssert(ln != NULL);
receiver = ln->value;
dstkey = receiver->bpop.target;
/* Protect receiver->bpop.target, that will be freed by
* the next unblockClientWaitingData() call. */
if (dstkey) incrRefCount(dstkey);
/* This should remove the first element of the "clients" list. */
unblockClientWaitingData(receiver);
if (dstkey == NULL) {
/* BRPOP/BLPOP */
addReplyMultiBulkLen(receiver,2);
addReplyBulk(receiver,key);
addReplyBulk(receiver,ele);
return 1; /* Serve just the first client as in B[RL]POP semantics */
} else {
2011-01-31 10:50:17 -05:00
/* BRPOPLPUSH, note that receiver->db is always equal to c->db. */
dstobj = lookupKeyWrite(receiver->db,dstkey);
if (!(dstobj && checkType(receiver,dstobj,REDIS_LIST))) {
rpoplpushHandlePush(c,receiver,dstkey,dstobj,ele);
decrRefCount(dstkey);
return 1;
}
decrRefCount(dstkey);
}
2010-11-08 13:25:59 -05:00
}
return 0;
}
2010-12-06 07:45:48 -05:00
int getTimeoutFromObjectOrReply(redisClient *c, robj *object, time_t *timeout) {
long tval;
2010-11-09 09:00:54 -05:00
2010-12-06 07:45:48 -05:00
if (getLongFromObjectOrReply(c,object,&tval,
"timeout is not an integer or out of range") != REDIS_OK)
2010-11-09 09:00:54 -05:00
return REDIS_ERR;
2010-12-06 07:45:48 -05:00
if (tval < 0) {
addReplyError(c,"timeout is negative");
2010-11-09 09:00:54 -05:00
return REDIS_ERR;
}
2010-12-06 07:45:48 -05:00
if (tval > 0) tval += time(NULL);
*timeout = tval;
2010-11-09 09:00:54 -05:00
return REDIS_OK;
}
/* Blocking RPOP/LPOP */
void blockingPopGenericCommand(redisClient *c, int where) {
robj *o;
time_t timeout;
int j;
2010-12-06 07:45:48 -05:00
if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout) != REDIS_OK)
return;
for (j = 1; j < c->argc-1; j++) {
o = lookupKeyWrite(c->db,c->argv[j]);
if (o != NULL) {
if (o->type != REDIS_LIST) {
addReply(c,shared.wrongtypeerr);
return;
} else {
if (listTypeLength(o) != 0) {
/* If the list contains elements fall back to the usual
* non-blocking POP operation */
struct redisCommand *orig_cmd;
robj *argv[2], **orig_argv;
int orig_argc;
/* We need to alter the command arguments before to call
* popGenericCommand() as the command takes a single key. */
orig_argv = c->argv;
orig_argc = c->argc;
orig_cmd = c->cmd;
argv[1] = c->argv[j];
c->argv = argv;
c->argc = 2;
/* Also the return value is different, we need to output
* the multi bulk reply header and the key name. The
* "real" command will add the last element (the value)
* for us. If this souds like an hack to you it's just
* because it is... */
addReplyMultiBulkLen(c,2);
addReplyBulk(c,argv[1]);
2010-11-08 18:47:46 -05:00
popGenericCommand(c,where);
/* Fix the client structure with the original stuff */
c->argv = orig_argv;
c->argc = orig_argc;
c->cmd = orig_cmd;
2010-11-08 13:25:59 -05:00
return;
}
}
}
}
/* If we are inside a MULTI/EXEC and the list is empty the only thing
* we can do is treating it as a timeout (even with timeout 0). */
if (c->flags & REDIS_MULTI) {
addReply(c,shared.nullmultibulk);
return;
}
/* If the list is empty or the key does not exists we must block */
2010-11-08 18:47:46 -05:00
blockForKeys(c, c->argv + 1, c->argc - 2, timeout, NULL);
}
void blpopCommand(redisClient *c) {
blockingPopGenericCommand(c,REDIS_HEAD);
}
void brpopCommand(redisClient *c) {
blockingPopGenericCommand(c,REDIS_TAIL);
}
2010-11-08 13:25:59 -05:00
void brpoplpushCommand(redisClient *c) {
2010-11-08 18:47:46 -05:00
time_t timeout;
2010-11-08 13:25:59 -05:00
2010-12-06 07:45:48 -05:00
if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout) != REDIS_OK)
2010-11-08 18:47:46 -05:00
return;
robj *key = lookupKeyWrite(c->db, c->argv[1]);
if (key == NULL) {
if (c->flags & REDIS_MULTI) {
/* Blocking against an empty list in a multi state
* returns immediately. */
addReply(c, shared.nullbulk);
2010-11-08 18:47:46 -05:00
} else {
/* The list is empty and the client blocks. */
2010-11-08 18:47:46 -05:00
blockForKeys(c, c->argv + 1, 1, timeout, c->argv[2]);
}
} else {
if (key->type != REDIS_LIST) {
addReply(c, shared.wrongtypeerr);
} else {
/* The list exists and has elements, so
* the regular rpoplpushCommand is executed. */
redisAssert(listTypeLength(key) > 0);
rpoplpushCommand(c);
}
2010-11-08 18:47:46 -05:00
}
2010-11-08 13:25:59 -05:00
}