#include "redis.h" #include static void setProtocolError(redisClient *c, int pos); /* To evaluate the output buffer size of a client we need to get size of * allocated objects, however we can't used zmalloc_size() directly on sds * strings because of the trick they use to work (the header is before the * returned pointer), so we use this helper function. */ size_t zmalloc_size_sds(sds s) { return zmalloc_size(s-sizeof(struct sdshdr)); } void *dupClientReplyValue(void *o) { incrRefCount((robj*)o); return o; } int listMatchObjects(void *a, void *b) { return equalStringObjects(a,b); } redisClient *createClient(int fd) { redisClient *c = zmalloc(sizeof(redisClient)); /* passing -1 as fd it is possible to create a non connected client. * This is useful since all the Redis commands needs to be executed * in the context of a client. When commands are executed in other * contexts (for instance a Lua script) we need a non connected client. */ if (fd != -1) { anetNonBlock(NULL,fd); anetTcpNoDelay(NULL,fd); if (aeCreateFileEvent(server.el,fd,AE_READABLE, readQueryFromClient, c) == AE_ERR) { close(fd); zfree(c); return NULL; } } selectDb(c,0); c->fd = fd; c->bufpos = 0; c->querybuf = sdsempty(); c->reqtype = 0; c->argc = 0; c->argv = NULL; c->cmd = c->lastcmd = NULL; c->multibulklen = 0; c->bulklen = -1; c->sentlen = 0; c->flags = 0; c->ctime = c->lastinteraction = time(NULL); c->authenticated = 0; c->replstate = REDIS_REPL_NONE; c->reply = listCreate(); c->reply_bytes = 0; c->obuf_soft_limit_reached_time = 0; listSetFreeMethod(c->reply,decrRefCount); listSetDupMethod(c->reply,dupClientReplyValue); c->bpop.keys = NULL; c->bpop.count = 0; c->bpop.timeout = 0; c->bpop.target = NULL; c->io_keys = listCreate(); c->watched_keys = listCreate(); listSetFreeMethod(c->io_keys,decrRefCount); c->pubsub_channels = dictCreate(&setDictType,NULL); c->pubsub_patterns = listCreate(); listSetFreeMethod(c->pubsub_patterns,decrRefCount); listSetMatchMethod(c->pubsub_patterns,listMatchObjects); if (fd != -1) listAddNodeTail(server.clients,c); initClientMultiState(c); return c; } /* This function is called every time we are going to transmit new data * to the client. The behavior is the following: * * If the client should receive new data (normal clients will) the function * returns REDIS_OK, and make sure to install the write handler in our event * loop so that when the socket is writable new data gets written. * * If the client should not receive new data, because it is a fake client * or a slave, or because the setup of the write handler failed, the function * returns REDIS_ERR. * * Typically gets called every time a reply is built, before adding more * data to the clients output buffers. If the function returns REDIS_ERR no * data should be appended to the output buffers. */ int prepareClientToWrite(redisClient *c) { if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK; if (c->fd <= 0) return REDIS_ERR; /* Fake client */ if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || c->replstate == REDIS_REPL_ONLINE) && aeCreateFileEvent(server.el, c->fd, AE_WRITABLE, sendReplyToClient, c) == AE_ERR) return REDIS_ERR; return REDIS_OK; } /* Create a duplicate of the last object in the reply list when * it is not exclusively owned by the reply list. */ robj *dupLastObjectIfNeeded(list *reply) { robj *new, *cur; listNode *ln; redisAssert(listLength(reply) > 0); ln = listLast(reply); cur = listNodeValue(ln); if (cur->refcount > 1) { new = dupStringObject(cur); decrRefCount(cur); listNodeValue(ln) = new; } return listNodeValue(ln); } /* ----------------------------------------------------------------------------- * Low level functions to add more data to output buffers. * -------------------------------------------------------------------------- */ int _addReplyToBuffer(redisClient *c, char *s, size_t len) { size_t available = sizeof(c->buf)-c->bufpos; if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK; /* If there already are entries in the reply list, we cannot * add anything more to the static buffer. */ if (listLength(c->reply) > 0) return REDIS_ERR; /* Check that the buffer has enough space available for this string. */ if (len > available) return REDIS_ERR; memcpy(c->buf+c->bufpos,s,len); c->bufpos+=len; return REDIS_OK; } void _addReplyObjectToList(redisClient *c, robj *o) { robj *tail; if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; if (listLength(c->reply) == 0) { incrRefCount(o); listAddNodeTail(c->reply,o); c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); /* Append to this object when possible. */ if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES) { c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr)); c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { incrRefCount(o); listAddNodeTail(c->reply,o); c->reply_bytes += zmalloc_size_sds(o->ptr); } } asyncCloseClientOnOutputBufferLimitReached(c); } /* This method takes responsibility over the sds. When it is no longer * needed it will be free'd, otherwise it ends up in a robj. */ void _addReplySdsToList(redisClient *c, sds s) { robj *tail; if (c->flags & REDIS_CLOSE_AFTER_REPLY) { sdsfree(s); return; } if (listLength(c->reply) == 0) { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); c->reply_bytes += zmalloc_size_sds(s); } else { tail = listNodeValue(listLast(c->reply)); /* Append to this object when possible. */ if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES) { c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,sdslen(s)); c->reply_bytes += zmalloc_size_sds(tail->ptr); sdsfree(s); } else { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); c->reply_bytes += zmalloc_size_sds(s); } } asyncCloseClientOnOutputBufferLimitReached(c); } void _addReplyStringToList(redisClient *c, char *s, size_t len) { robj *tail; if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; if (listLength(c->reply) == 0) { robj *o = createStringObject(s,len); listAddNodeTail(c->reply,o); c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); /* Append to this object when possible. */ if (tail->ptr != NULL && sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES) { c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,len); c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { robj *o = createStringObject(s,len); listAddNodeTail(c->reply,o); c->reply_bytes += zmalloc_size_sds(o->ptr); } } asyncCloseClientOnOutputBufferLimitReached(c); } /* ----------------------------------------------------------------------------- * Higher level functions to queue data on the client output buffer. * The following functions are the ones that commands implementations will call. * -------------------------------------------------------------------------- */ void addReply(redisClient *c, robj *obj) { if (prepareClientToWrite(c) != REDIS_OK) return; /* This is an important place where we can avoid copy-on-write * when there is a saving child running, avoiding touching the * refcount field of the object if it's not needed. * * If the encoding is RAW and there is room in the static buffer * we'll be able to send the object to the client without * messing with its page. */ if (obj->encoding == REDIS_ENCODING_RAW) { if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) _addReplyObjectToList(c,obj); } else if (obj->encoding == REDIS_ENCODING_INT) { /* Optimization: if there is room in the static buffer for 32 bytes * (more than the max chars a 64 bit integer can take as string) we * avoid decoding the object and go for the lower level approach. */ if (listLength(c->reply) == 0 && (sizeof(c->buf) - c->bufpos) >= 32) { char buf[32]; int len; len = ll2string(buf,sizeof(buf),(long)obj->ptr); if (_addReplyToBuffer(c,buf,len) == REDIS_OK) return; /* else... continue with the normal code path, but should never * happen actually since we verified there is room. */ } obj = getDecodedObject(obj); if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) _addReplyObjectToList(c,obj); decrRefCount(obj); } else { redisPanic("Wrong obj->encoding in addReply()"); } } void addReplySds(redisClient *c, sds s) { if (prepareClientToWrite(c) != REDIS_OK) { /* The caller expects the sds to be free'd. */ sdsfree(s); return; } if (_addReplyToBuffer(c,s,sdslen(s)) == REDIS_OK) { sdsfree(s); } else { /* This method free's the sds when it is no longer needed. */ _addReplySdsToList(c,s); } } void addReplyString(redisClient *c, char *s, size_t len) { if (prepareClientToWrite(c) != REDIS_OK) return; if (_addReplyToBuffer(c,s,len) != REDIS_OK) _addReplyStringToList(c,s,len); } void addReplyErrorLength(redisClient *c, char *s, size_t len) { addReplyString(c,"-ERR ",5); addReplyString(c,s,len); addReplyString(c,"\r\n",2); } void addReplyError(redisClient *c, char *err) { addReplyErrorLength(c,err,strlen(err)); } void addReplyErrorFormat(redisClient *c, const char *fmt, ...) { size_t l, j; va_list ap; va_start(ap,fmt); sds s = sdscatvprintf(sdsempty(),fmt,ap); va_end(ap); /* Make sure there are no newlines in the string, otherwise invalid protocol * is emitted. */ l = sdslen(s); for (j = 0; j < l; j++) { if (s[j] == '\r' || s[j] == '\n') s[j] = ' '; } addReplyErrorLength(c,s,sdslen(s)); sdsfree(s); } void addReplyStatusLength(redisClient *c, char *s, size_t len) { addReplyString(c,"+",1); addReplyString(c,s,len); addReplyString(c,"\r\n",2); } void addReplyStatus(redisClient *c, char *status) { addReplyStatusLength(c,status,strlen(status)); } void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { va_list ap; va_start(ap,fmt); sds s = sdscatvprintf(sdsempty(),fmt,ap); va_end(ap); addReplyStatusLength(c,s,sdslen(s)); sdsfree(s); } /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(redisClient *c) { /* Note that we install the write event here even if the object is not * ready to be sent, since we are sure that before returning to the * event loop setDeferredMultiBulkLength() will be called. */ if (prepareClientToWrite(c) != REDIS_OK) return NULL; listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL)); return listLast(c->reply); } /* Populate the length object and try glueing it to the next chunk. */ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { listNode *ln = (listNode*)node; robj *len, *next; /* Abort when *node is NULL (see addDeferredMultiBulkLength). */ if (node == NULL) return; len = listNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); c->reply_bytes += zmalloc_size_sds(len->ptr); if (ln->next != NULL) { next = listNodeValue(ln->next); /* Only glue when the next node is non-NULL (an sds in this case) */ if (next->ptr != NULL) { len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr)); listDelNode(c->reply,ln->next); } } asyncCloseClientOnOutputBufferLimitReached(c); } /* Add a duble as a bulk reply */ void addReplyDouble(redisClient *c, double d) { char dbuf[128], sbuf[128]; int dlen, slen; dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d); slen = snprintf(sbuf,sizeof(sbuf),"$%d\r\n%s\r\n",dlen,dbuf); addReplyString(c,sbuf,slen); } /* Add a long long as integer reply or bulk len / multi bulk count. * Basically this is used to output . */ void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) { char buf[128]; int len; /* Things like $3\r\n or *2\r\n are emitted very often by the protocol * so we have a few shared objects to use if the integer is small * like it is most of the times. */ if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) { addReply(c,shared.mbulkhdr[ll]); return; } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) { addReply(c,shared.bulkhdr[ll]); return; } buf[0] = prefix; len = ll2string(buf+1,sizeof(buf)-1,ll); buf[len+1] = '\r'; buf[len+2] = '\n'; addReplyString(c,buf,len+3); } void addReplyLongLong(redisClient *c, long long ll) { if (ll == 0) addReply(c,shared.czero); else if (ll == 1) addReply(c,shared.cone); else addReplyLongLongWithPrefix(c,ll,':'); } void addReplyMultiBulkLen(redisClient *c, long length) { addReplyLongLongWithPrefix(c,length,'*'); } /* Create the length prefix of a bulk reply, example: $2234 */ void addReplyBulkLen(redisClient *c, robj *obj) { size_t len; if (obj->encoding == REDIS_ENCODING_RAW) { len = sdslen(obj->ptr); } else { long n = (long)obj->ptr; /* Compute how many bytes will take this integer as a radix 10 string */ len = 1; if (n < 0) { len++; n = -n; } while((n = n/10) != 0) { len++; } } addReplyLongLongWithPrefix(c,len,'$'); } /* Add a Redis Object as a bulk reply */ void addReplyBulk(redisClient *c, robj *obj) { addReplyBulkLen(c,obj); addReply(c,obj); addReply(c,shared.crlf); } /* Add a C buffer as bulk reply */ void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) { addReplyLongLongWithPrefix(c,len,'$'); addReplyString(c,p,len); addReply(c,shared.crlf); } /* Add a C nul term string as bulk reply */ void addReplyBulkCString(redisClient *c, char *s) { if (s == NULL) { addReply(c,shared.nullbulk); } else { addReplyBulkCBuffer(c,s,strlen(s)); } } /* Add a long long as a bulk reply */ void addReplyBulkLongLong(redisClient *c, long long ll) { char buf[64]; int len; len = ll2string(buf,64,ll); addReplyBulkCBuffer(c,buf,len); } /* Copy 'src' client output buffers into 'dst' client output buffers. * The function takes care of freeing the old output buffers of the * destination client. */ void copyClientOutputBuffer(redisClient *dst, redisClient *src) { listRelease(dst->reply); dst->reply = listDup(src->reply); memcpy(dst->buf,src->buf,src->bufpos); dst->bufpos = src->bufpos; dst->reply_bytes = src->reply_bytes; } static void acceptCommonHandler(int fd) { redisClient *c; if ((c = createClient(fd)) == NULL) { redisLog(REDIS_WARNING,"Error allocating resoures for the client"); close(fd); /* May be already closed, just ingore errors */ return; } /* If maxclient directive is set and this is one client more... close the * connection. Note that we create the client instead to check before * for this condition, since now the socket is already set in nonblocking * mode and we can send an error for free using the Kernel I/O */ if (listLength(server.clients) > server.maxclients) { char *err = "-ERR max number of clients reached\r\n"; /* That's a best effort error message, don't check write errors */ if (write(c->fd,err,strlen(err)) == -1) { /* Nothing to do, Just to avoid the warning... */ } server.stat_rejected_conn++; freeClient(c); return; } server.stat_numconnections++; } void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) { int cport, cfd; char cip[128]; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); REDIS_NOTUSED(privdata); cfd = anetTcpAccept(server.neterr, fd, cip, &cport); if (cfd == AE_ERR) { redisLog(REDIS_WARNING,"Accepting client connection: %s", server.neterr); return; } redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport); acceptCommonHandler(cfd); } void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) { int cfd; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); REDIS_NOTUSED(privdata); cfd = anetUnixAccept(server.neterr, fd); if (cfd == AE_ERR) { redisLog(REDIS_WARNING,"Accepting client connection: %s", server.neterr); return; } redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket); acceptCommonHandler(cfd); } static void freeClientArgv(redisClient *c) { int j; for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); c->argc = 0; c->cmd = NULL; } void freeClient(redisClient *c) { listNode *ln; /* If this is marked as current client unset it */ if (server.current_client == c) server.current_client = NULL; /* Note that if the client we are freeing is blocked into a blocking * call, we have to set querybuf to NULL *before* to call * unblockClientWaitingData() to avoid processInputBuffer() will get * called. Also it is important to remove the file events after * this, because this call adds the READABLE event. */ sdsfree(c->querybuf); c->querybuf = NULL; if (c->flags & REDIS_BLOCKED) unblockClientWaitingData(c); /* UNWATCH all the keys */ unwatchAllKeys(c); listRelease(c->watched_keys); /* Unsubscribe from all the pubsub channels */ pubsubUnsubscribeAllChannels(c,0); pubsubUnsubscribeAllPatterns(c,0); dictRelease(c->pubsub_channels); listRelease(c->pubsub_patterns); /* Obvious cleanup */ aeDeleteFileEvent(server.el,c->fd,AE_READABLE); aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); listRelease(c->reply); freeClientArgv(c); close(c->fd); /* Remove from the list of clients */ ln = listSearchKey(server.clients,c); redisAssert(ln != NULL); listDelNode(server.clients,ln); /* When client was just unblocked because of a blocking operation, * remove it from the list with unblocked clients. */ if (c->flags & REDIS_UNBLOCKED) { ln = listSearchKey(server.unblocked_clients,c); redisAssert(ln != NULL); listDelNode(server.unblocked_clients,ln); } listRelease(c->io_keys); /* Master/slave cleanup. * Case 1: we lost the connection with a slave. */ if (c->flags & REDIS_SLAVE) { if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1) close(c->repldbfd); list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves; ln = listSearchKey(l,c); redisAssert(ln != NULL); listDelNode(l,ln); } /* Case 2: we lost the connection with the master. */ if (c->flags & REDIS_MASTER) { server.master = NULL; server.repl_state = REDIS_REPL_CONNECT; server.repl_down_since = time(NULL); /* Since we lost the connection with the master, we should also * close the connection with all our slaves if we have any, so * when we'll resync with the master the other slaves will sync again * with us as well. Note that also when the slave is not connected * to the master it will keep refusing connections by other slaves. * * We do this only if server.masterhost != NULL. If it is NULL this * means the user called SLAVEOF NO ONE and we are freeing our * link with the master, so no need to close link with slaves. */ if (server.masterhost != NULL) { while (listLength(server.slaves)) { ln = listFirst(server.slaves); freeClient((redisClient*)ln->value); } } } /* If this client was scheduled for async freeing we need to remove it * from the queue. */ if (c->flags & REDIS_CLOSE_ASAP) { ln = listSearchKey(server.clients_to_close,c); redisAssert(ln != NULL); listDelNode(server.clients_to_close,ln); } /* Release memory */ zfree(c->argv); freeClientMultiState(c); zfree(c); } /* Schedule a client to free it at a safe time in the serverCron() function. * This function is useful when we need to terminate a client but we are in * a context where calling freeClient() is not possible, because the client * should be valid for the continuation of the flow of the program. */ void freeClientAsync(redisClient *c) { if (c->flags & REDIS_CLOSE_ASAP) return; c->flags |= REDIS_CLOSE_ASAP; listAddNodeTail(server.clients_to_close,c); } void freeClientsInAsyncFreeQueue(void) { while (listLength(server.clients_to_close)) { listNode *ln = listFirst(server.clients_to_close); redisClient *c = listNodeValue(ln); c->flags &= ~REDIS_CLOSE_ASAP; freeClient(c); listDelNode(server.clients_to_close,ln); } } void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen; size_t objmem; robj *o; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); while(c->bufpos > 0 || listLength(c->reply)) { if (c->bufpos > 0) { if (c->flags & REDIS_MASTER) { /* Don't reply to a master */ nwritten = c->bufpos - c->sentlen; } else { nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen); if (nwritten <= 0) break; } c->sentlen += nwritten; totwritten += nwritten; /* If the buffer was sent, set bufpos to zero to continue with * the remainder of the reply. */ if (c->sentlen == c->bufpos) { c->bufpos = 0; c->sentlen = 0; } } else { o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); objmem = zmalloc_size_sds(o->ptr); if (objlen == 0) { listDelNode(c->reply,listFirst(c->reply)); continue; } if (c->flags & REDIS_MASTER) { /* Don't reply to a master */ nwritten = objlen - c->sentlen; } else { nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen); if (nwritten <= 0) break; } c->sentlen += nwritten; totwritten += nwritten; /* If we fully sent the object on head go to the next one */ if (c->sentlen == objlen) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; c->reply_bytes -= objmem; } } /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT * bytes, in a single threaded server it's a good idea to serve * other clients as well, even if a very large request comes from * super fast link that is always able to accept data (in real world * scenario think about 'KEYS *' against the loopback interface). * * However if we are over the maxmemory limit we ignore that and * just deliver as much data as it is possible to deliver. */ if (totwritten > REDIS_MAX_WRITE_PER_EVENT && (server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory)) break; } if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { redisLog(REDIS_VERBOSE, "Error writing to client: %s", strerror(errno)); freeClient(c); return; } } if (totwritten > 0) c->lastinteraction = time(NULL); if (c->bufpos == 0 && listLength(c->reply) == 0) { c->sentlen = 0; aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); /* Close connection after entire reply has been sent. */ if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c); } } /* resetClient prepare the client to process the next command */ void resetClient(redisClient *c) { freeClientArgv(c); c->reqtype = 0; c->multibulklen = 0; c->bulklen = -1; /* We clear the ASKING flag as well if we are not inside a MULTI. */ if (!(c->flags & REDIS_MULTI)) c->flags &= (~REDIS_ASKING); } int processInlineBuffer(redisClient *c) { char *newline = strstr(c->querybuf,"\r\n"); int argc, j; sds *argv; size_t querylen; /* Nothing to do without a \r\n */ if (newline == NULL) { if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big inline request"); setProtocolError(c,0); } return REDIS_ERR; } /* Split the input buffer up to the \r\n */ querylen = newline-(c->querybuf); argv = sdssplitlen(c->querybuf,querylen," ",1,&argc); /* Leave data after the first line of the query in the buffer */ c->querybuf = sdsrange(c->querybuf,querylen+2,-1); /* Setup argv array on client structure */ if (c->argv) zfree(c->argv); c->argv = zmalloc(sizeof(robj*)*argc); /* Create redis objects for all arguments. */ for (c->argc = 0, j = 0; j < argc; j++) { if (sdslen(argv[j])) { c->argv[c->argc] = createObject(REDIS_STRING,argv[j]); c->argc++; } else { sdsfree(argv[j]); } } zfree(argv); return REDIS_OK; } /* Helper function. Trims query buffer to make the function that processes * multi bulk requests idempotent. */ static void setProtocolError(redisClient *c, int pos) { if (server.verbosity >= REDIS_VERBOSE) { sds client = getClientInfoString(c); redisLog(REDIS_VERBOSE, "Protocol error from client: %s", client); sdsfree(client); } c->flags |= REDIS_CLOSE_AFTER_REPLY; c->querybuf = sdsrange(c->querybuf,pos,-1); } int processMultibulkBuffer(redisClient *c) { char *newline = NULL; int pos = 0, ok; long long ll; if (c->multibulklen == 0) { /* The client should have been reset */ redisAssertWithInfo(c,NULL,c->argc == 0); /* Multi bulk length cannot be read without a \r\n */ newline = strchr(c->querybuf,'\r'); if (newline == NULL) { if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big mbulk count string"); setProtocolError(c,0); } return REDIS_ERR; } /* Buffer should also contain \n */ if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2)) return REDIS_ERR; /* We know for sure there is a whole line since newline != NULL, * so go ahead and find out the multi bulk length. */ redisAssertWithInfo(c,NULL,c->querybuf[0] == '*'); ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll); if (!ok || ll > 1024*1024) { addReplyError(c,"Protocol error: invalid multibulk length"); setProtocolError(c,pos); return REDIS_ERR; } pos = (newline-c->querybuf)+2; if (ll <= 0) { c->querybuf = sdsrange(c->querybuf,pos,-1); return REDIS_OK; } c->multibulklen = ll; /* Setup argv array on client structure */ if (c->argv) zfree(c->argv); c->argv = zmalloc(sizeof(robj*)*c->multibulklen); } redisAssertWithInfo(c,NULL,c->multibulklen > 0); while(c->multibulklen) { /* Read bulk length if unknown */ if (c->bulklen == -1) { newline = strchr(c->querybuf+pos,'\r'); if (newline == NULL) { if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) { addReplyError(c,"Protocol error: too big bulk count string"); setProtocolError(c,0); } break; } /* Buffer should also contain \n */ if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2)) break; if (c->querybuf[pos] != '$') { addReplyErrorFormat(c, "Protocol error: expected '$', got '%c'", c->querybuf[pos]); setProtocolError(c,pos); return REDIS_ERR; } ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll); if (!ok || ll < 0 || ll > 512*1024*1024) { addReplyError(c,"Protocol error: invalid bulk length"); setProtocolError(c,pos); return REDIS_ERR; } pos += newline-(c->querybuf+pos)+2; if (ll >= REDIS_MBULK_BIG_ARG) { /* If we are going to read a large object from network * try to make it likely that it will start at c->querybuf * boundary so that we can optimized object creation * avoiding a large copy of data. */ c->querybuf = sdsrange(c->querybuf,pos,-1); pos = 0; /* Hint the sds library about the amount of bytes this string is * going to contain. */ c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2); } c->bulklen = ll; } /* Read bulk argument */ if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) { /* Not enough data (+2 == trailing \r\n) */ break; } else { /* Optimization: if the buffer contanins JUST our bulk element * instead of creating a new object by *copying* the sds we * just use the current sds string. */ if (pos == 0 && c->bulklen >= REDIS_MBULK_BIG_ARG && (signed) sdslen(c->querybuf) == c->bulklen+2) { c->argv[c->argc++] = createObject(REDIS_STRING,c->querybuf); sdsIncrLen(c->querybuf,-2); /* remove CRLF */ c->querybuf = sdsempty(); /* Assume that if we saw a fat argument we'll see another one * likely... */ c->querybuf = sdsMakeRoomFor(c->querybuf,c->bulklen+2); pos = 0; } else { c->argv[c->argc++] = createStringObject(c->querybuf+pos,c->bulklen); pos += c->bulklen+2; } c->bulklen = -1; c->multibulklen--; } } /* Trim to pos */ if (pos) c->querybuf = sdsrange(c->querybuf,pos,-1); /* We're done when c->multibulk == 0 */ if (c->multibulklen == 0) return REDIS_OK; /* Still not read to process the command */ return REDIS_ERR; } void processInputBuffer(redisClient *c) { /* Keep processing while there is something in the input buffer */ while(sdslen(c->querybuf)) { /* Immediately abort if the client is in the middle of something. */ if (c->flags & REDIS_BLOCKED) return; /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is * written to the client. Make sure to not let the reply grow after * this flag has been set (i.e. don't process more commands). */ if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; /* Determine request type when unknown. */ if (!c->reqtype) { if (c->querybuf[0] == '*') { c->reqtype = REDIS_REQ_MULTIBULK; } else { c->reqtype = REDIS_REQ_INLINE; } } if (c->reqtype == REDIS_REQ_INLINE) { if (processInlineBuffer(c) != REDIS_OK) break; } else if (c->reqtype == REDIS_REQ_MULTIBULK) { if (processMultibulkBuffer(c) != REDIS_OK) break; } else { redisPanic("Unknown request type"); } /* Multibulk processing could see a <= 0 length. */ if (c->argc == 0) { resetClient(c); } else { /* Only reset the client when the command was executed. */ if (processCommand(c) == REDIS_OK) resetClient(c); } } } void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = (redisClient*) privdata; int nread, readlen; size_t qblen; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); server.current_client = c; readlen = REDIS_IOBUF_LEN; /* If this is a multi bulk request, and we are processing a bulk reply * that is large enough, try to maximize the probabilty that the query * buffer contains excatly the SDS string representing the object, even * at the risk of requring more read(2) calls. This way the function * processMultiBulkBuffer() can avoid copying buffers to create the * Redis Object representing the argument. */ if (c->reqtype == REDIS_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1 && c->bulklen >= REDIS_MBULK_BIG_ARG) { int remaining = (unsigned)(c->bulklen+2)-sdslen(c->querybuf); if (remaining < readlen) readlen = remaining; } qblen = sdslen(c->querybuf); c->querybuf = sdsMakeRoomFor(c->querybuf, readlen); nread = read(fd, c->querybuf+qblen, readlen); if (nread == -1) { if (errno == EAGAIN) { nread = 0; } else { redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno)); freeClient(c); return; } } else if (nread == 0) { redisLog(REDIS_VERBOSE, "Client closed connection"); freeClient(c); return; } if (nread) { sdsIncrLen(c->querybuf,nread); c->lastinteraction = time(NULL); } else { server.current_client = NULL; return; } if (sdslen(c->querybuf) > server.client_max_querybuf_len) { sds ci = getClientInfoString(c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); redisLog(REDIS_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClient(c); return; } processInputBuffer(c); server.current_client = NULL; } void getClientsMaxBuffers(unsigned long *longest_output_list, unsigned long *biggest_input_buffer) { redisClient *c; listNode *ln; listIter li; unsigned long lol = 0, bib = 0; listRewind(server.clients,&li); while ((ln = listNext(&li)) != NULL) { c = listNodeValue(ln); if (listLength(c->reply) > lol) lol = listLength(c->reply); if (sdslen(c->querybuf) > bib) bib = sdslen(c->querybuf); } *longest_output_list = lol; *biggest_input_buffer = bib; } /* Turn a Redis client into an sds string representing its state. */ sds getClientInfoString(redisClient *client) { char ip[32], flags[16], events[3], *p; int port; time_t now = time(NULL); int emask; anetPeerToString(client->fd,ip,&port); p = flags; if (client->flags & REDIS_SLAVE) { if (client->flags & REDIS_MONITOR) *p++ = 'O'; else *p++ = 'S'; } if (client->flags & REDIS_MASTER) *p++ = 'M'; if (client->flags & REDIS_MULTI) *p++ = 'x'; if (client->flags & REDIS_BLOCKED) *p++ = 'b'; if (client->flags & REDIS_DIRTY_CAS) *p++ = 'd'; if (client->flags & REDIS_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & REDIS_UNBLOCKED) *p++ = 'u'; if (client->flags & REDIS_CLOSE_ASAP) *p++ = 'A'; if (p == flags) *p++ = 'N'; *p++ = '\0'; emask = client->fd == -1 ? 0 : aeGetFileEvents(server.el,client->fd); p = events; if (emask & AE_READABLE) *p++ = 'r'; if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatprintf(sdsempty(), "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu qbuf-free=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", ip,port,client->fd, (long)(now - client->ctime), (long)(now - client->lastinteraction), flags, client->db->id, (int) dictSize(client->pubsub_channels), (int) listLength(client->pubsub_patterns), (unsigned long) sdslen(client->querybuf), (unsigned long) sdsavail(client->querybuf), (unsigned long) client->bufpos, (unsigned long) listLength(client->reply), getClientOutputBufferMemoryUsage(client), events, client->lastcmd ? client->lastcmd->name : "NULL"); } sds getAllClientsInfoString(void) { listNode *ln; listIter li; redisClient *client; sds o = sdsempty(); listRewind(server.clients,&li); while ((ln = listNext(&li)) != NULL) { sds cs; client = listNodeValue(ln); cs = getClientInfoString(client); o = sdscatsds(o,cs); sdsfree(cs); o = sdscatlen(o,"\n",1); } return o; } void clientCommand(redisClient *c) { listNode *ln; listIter li; redisClient *client; if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) { sds o = getAllClientsInfoString(); addReplyBulkCBuffer(c,o,sdslen(o)); sdsfree(o); } else if (!strcasecmp(c->argv[1]->ptr,"kill") && c->argc == 3) { listRewind(server.clients,&li); while ((ln = listNext(&li)) != NULL) { char ip[32], addr[64]; int port; client = listNodeValue(ln); if (anetPeerToString(client->fd,ip,&port) == -1) continue; snprintf(addr,sizeof(addr),"%s:%d",ip,port); if (strcmp(addr,c->argv[2]->ptr) == 0) { addReply(c,shared.ok); if (c == client) { client->flags |= REDIS_CLOSE_AFTER_REPLY; } else { freeClient(client); } return; } } addReplyError(c,"No such client"); } else { addReplyError(c, "Syntax error, try CLIENT (LIST | KILL ip:port)"); } } /* Rewrite the command vector of the client. All the new objects ref count * is incremented. The old command vector is freed, and the old objects * ref count is decremented. */ void rewriteClientCommandVector(redisClient *c, int argc, ...) { va_list ap; int j; robj **argv; /* The new argument vector */ argv = zmalloc(sizeof(robj*)*argc); va_start(ap,argc); for (j = 0; j < argc; j++) { robj *a; a = va_arg(ap, robj*); argv[j] = a; incrRefCount(a); } /* We free the objects in the original vector at the end, so we are * sure that if the same objects are reused in the new vector the * refcount gets incremented before it gets decremented. */ for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); zfree(c->argv); /* Replace argv and argc with our new versions. */ c->argv = argv; c->argc = argc; c->cmd = lookupCommand(c->argv[0]->ptr); redisAssertWithInfo(c,NULL,c->cmd != NULL); va_end(ap); } /* Rewrite a single item in the command vector. * The new val ref count is incremented, and the old decremented. */ void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) { robj *oldval; redisAssertWithInfo(c,NULL,i < c->argc); oldval = c->argv[i]; c->argv[i] = newval; incrRefCount(newval); decrRefCount(oldval); /* If this is the command name make sure to fix c->cmd. */ if (i == 0) { c->cmd = lookupCommand(c->argv[0]->ptr); redisAssertWithInfo(c,NULL,c->cmd != NULL); } } /* This function returns the number of bytes that Redis is virtually * using to store the reply still not read by the client. * It is "virtual" since the reply output list may contain objects that * are shared and are not really using additional memory. * * The function returns the total sum of the length of all the objects * stored in the output list, plus the memory used to allocate every * list node. The static reply buffer is not taken into account since it * is allocated anyway. * * Note: this function is very fast so can be called as many time as * the caller wishes. The main usage of this function currently is * enforcing the client output length limits. */ unsigned long getClientOutputBufferMemoryUsage(redisClient *c) { unsigned long list_item_size = sizeof(listNode)+sizeof(robj); return c->reply_bytes + (list_item_size*listLength(c->reply)); } /* Get the class of a client, used in order to envorce limits to different * classes of clients. * * The function will return one of the following: * REDIS_CLIENT_LIMIT_CLASS_NORMAL -> Normal client * REDIS_CLIENT_LIMIT_CLASS_SLAVE -> Slave or client executing MONITOR command * REDIS_CLIENT_LIMIT_CLASS_PUBSUB -> Client subscribed to Pub/Sub channels */ int getClientLimitClass(redisClient *c) { if (c->flags & REDIS_SLAVE) return REDIS_CLIENT_LIMIT_CLASS_SLAVE; if (dictSize(c->pubsub_channels) || listLength(c->pubsub_patterns)) return REDIS_CLIENT_LIMIT_CLASS_PUBSUB; return REDIS_CLIENT_LIMIT_CLASS_NORMAL; } int getClientLimitClassByName(char *name) { if (!strcasecmp(name,"normal")) return REDIS_CLIENT_LIMIT_CLASS_NORMAL; else if (!strcasecmp(name,"slave")) return REDIS_CLIENT_LIMIT_CLASS_SLAVE; else if (!strcasecmp(name,"pubsub")) return REDIS_CLIENT_LIMIT_CLASS_PUBSUB; else return -1; } char *getClientLimitClassName(int class) { switch(class) { case REDIS_CLIENT_LIMIT_CLASS_NORMAL: return "normal"; case REDIS_CLIENT_LIMIT_CLASS_SLAVE: return "slave"; case REDIS_CLIENT_LIMIT_CLASS_PUBSUB: return "pubsub"; default: return NULL; } } /* The function checks if the client reached output buffer soft or hard * limit, and also update the state needed to check the soft limit as * a side effect. * * Return value: non-zero if the client reached the soft or the hard limit. * Otherwise zero is returned. */ int checkClientOutputBufferLimits(redisClient *c) { int soft = 0, hard = 0, class; unsigned long used_mem = getClientOutputBufferMemoryUsage(c); class = getClientLimitClass(c); if (server.client_obuf_limits[class].hard_limit_bytes && used_mem >= server.client_obuf_limits[class].hard_limit_bytes) hard = 1; if (server.client_obuf_limits[class].soft_limit_bytes && used_mem >= server.client_obuf_limits[class].soft_limit_bytes) soft = 1; /* We need to check if the soft limit is reached continuously for the * specified amount of seconds. */ if (soft) { if (c->obuf_soft_limit_reached_time == 0) { c->obuf_soft_limit_reached_time = server.unixtime; soft = 0; /* First time we see the soft limit reached */ } else { time_t elapsed = server.unixtime - c->obuf_soft_limit_reached_time; if (elapsed <= server.client_obuf_limits[class].soft_limit_seconds) { soft = 0; /* The client still did not reached the max number of seconds for the soft limit to be considered reached. */ } } } else { c->obuf_soft_limit_reached_time = 0; } return soft || hard; } /* Asynchronously close a client if soft or hard limit is reached on the * output buffer size. The caller can check if the client will be closed * checking if the client REDIS_CLOSE_ASAP flag is set. * * Note: we need to close the client asynchronously because this function is * called from contexts where the client can't be freed safely, i.e. from the * lower level functions pushing data inside the client output buffers. */ void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) { if (c->reply_bytes == 0 || c->flags & REDIS_CLOSE_ASAP) return; if (checkClientOutputBufferLimits(c)) { sds client = getClientInfoString(c); freeClientAsync(c); redisLog(REDIS_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client); sdsfree(client); } } /* Helper function used by freeMemoryIfNeeded() in order to flush slaves * output buffers without returning control to the event loop. */ void flushSlavesOutputBuffers(void) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { redisClient *slave = listNodeValue(ln); int events; events = aeGetFileEvents(server.el,slave->fd); if (events & AE_WRITABLE && slave->replstate == REDIS_REPL_ONLINE && listLength(slave->reply)) { sendReplyToClient(server.el,slave->fd,slave,0); } } }