From 06e76bc3e22dd72a30a8a614d367246b03ff1312 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Dec 2014 16:10:21 +0100 Subject: [PATCH] Better read-only behavior for expired keys in slaves. Slaves key expire is orchestrated by the master. Sometimes the master will send the synthesized DEL to expire keys on the slave with a non trivial delay (when the key is not accessed, only the incremental expiry algorithm will expire it in background). During that time, a key is logically expired, but slaves still return the key if you GET (or whatever) it. This is a bad behavior. However we can't simply trust the slave view of the key, since we need the master to be able to send write commands to update the slave data set, and DELs should only happen when the key is expired in the master in order to ensure consistency. However 99.99% of the issues with this behavior is when a client which is not a master sends a read only command. In this case we are safe and can consider the key as non existing. This commit does a few changes in order to make this sane: 1. lookupKeyRead() is modified in order to return NULL if the above conditions are met. 2. Calls to lookupKeyRead() in commands actually writing to the data set are repliaced with calls to lookupKeyWrite(). There are redundand checks, so for example, if in "2" something was overlooked, we should be still safe, since anyway, when the master writes the behavior is to don't care about what expireIfneeded() returns. This commit is related to #1768, #1770, #2131. --- src/cluster.c | 2 +- src/db.c | 29 +++++++++++++++++++++++++++-- src/debug.c | 2 +- src/hyperloglog.c | 4 ++-- src/t_list.c | 2 +- 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 80ac66e41..bb688425b 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4409,7 +4409,7 @@ try_again: /* Check if the key is here. If not we reply with success as there is * nothing to migrate (for instance the key expired in the meantime), but * we include such information in the reply string. */ - if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) { + if ((o = lookupKeyWrite(c->db,c->argv[3])) == NULL) { addReplySds(c,sdsnew("+NOKEY\r\n")); return; } diff --git a/src/db.c b/src/db.c index 2982c0166..b7756bf27 100644 --- a/src/db.c +++ b/src/db.c @@ -60,7 +60,32 @@ robj *lookupKey(redisDb *db, robj *key) { robj *lookupKeyRead(redisDb *db, robj *key) { robj *val; - expireIfNeeded(db,key); + if (expireIfNeeded(db,key) == 1) { + /* Key expired. If we are in the context of a master, expireIfNeeded() + * returns 0 only when the key does not exist at all, so it's save + * to return NULL ASAP. */ + if (server.masterhost == NULL) return NULL; + + /* However if we are in the context of a slave, expireIfNeeded() will + * not really try to expire the key, it only returns information + * about the "logical" status of the key: key expiring is up to the + * master in order to have a consistent view of master's data set. + * + * However, if the command caller is not the master, and as additional + * safety measure, the command invoked is a read-only command, we can + * safely return NULL here, and provide a more consistent behavior + * to clients accessign expired values in a read-only fashion, that + * will say the key as non exisitng. + * + * Notably this covers GETs when slaves are used to scale reads. */ + if (server.current_client && + server.current_client != server.master && + server.current_client->cmd && + server.current_client->cmd->flags & REDIS_CMD_READONLY) + { + return NULL; + } + } val = lookupKey(db,key); if (val == NULL) server.stat_keyspace_misses++; @@ -877,7 +902,7 @@ void expireGenericCommand(redisClient *c, long long basetime, int unit) { when += basetime; /* No key, return zero. */ - if (lookupKeyRead(c->db,key) == NULL) { + if (lookupKeyWrite(c->db,key) == NULL) { addReply(c,shared.czero); return; } diff --git a/src/debug.c b/src/debug.c index a22556bd2..27f2c96b9 100644 --- a/src/debug.c +++ b/src/debug.c @@ -338,7 +338,7 @@ void debugCommand(redisClient *c) { snprintf(buf,sizeof(buf),"%s:%lu", (c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j); key = createStringObject(buf,strlen(buf)); - if (lookupKeyRead(c->db,key) != NULL) { + if (lookupKeyWrite(c->db,key) != NULL) { decrRefCount(key); continue; } diff --git a/src/hyperloglog.c b/src/hyperloglog.c index a6cfdc742..b3542f997 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1233,7 +1233,7 @@ void pfcountCommand(redisClient *c) { * * The user specified a single key. Either return the cached value * or compute one and update the cache. */ - o = lookupKeyRead(c->db,c->argv[1]); + o = lookupKeyWrite(c->db,c->argv[1]); if (o == NULL) { /* No key? Cardinality is zero since no element was added, otherwise * we would have a key as HLLADD creates it as a side effect. */ @@ -1458,7 +1458,7 @@ void pfdebugCommand(redisClient *c) { robj *o; int j; - o = lookupKeyRead(c->db,c->argv[2]); + o = lookupKeyWrite(c->db,c->argv[2]); if (o == NULL) { addReplyError(c,"The specified key does not exist"); return; diff --git a/src/t_list.c b/src/t_list.c index 7c79185fd..fc27331f5 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -334,7 +334,7 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) { listTypeEntry entry; int inserted = 0; - if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || + if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || checkType(c,subject,REDIS_LIST)) return; if (refval != NULL) {