Better read-only behavior for expired keys in slaves.

Slaves key expire is orchestrated by the master. Sometimes the master will send the synthesized DEL to expire keys on the slave with a non trivial delay (when the key is not accessed, only the incremental expiry algorithm will expire it in background). During that time, a key is logically expired, but slaves still return the key if you GET (or whatever) it. This is a bad behavior. However we can't simply trust the slave view of the key, since we need the master to be able to send write commands to update the slave data set, and DELs should only happen when the key is expired in the master in order to ensure consistency. However 99.99% of the issues with this behavior is when a client which is not a master sends a read only command. In this case we are safe and can consider the key as non existing. This commit does a few changes in order to make this sane: 1. lookupKeyRead() is modified in order to return NULL if the above conditions are met. 2. Calls to lookupKeyRead() in commands actually writing to the data set are repliaced with calls to lookupKeyWrite(). There are redundand checks, so for example, if in "2" something was overlooked, we should be still safe, since anyway, when the master writes the behavior is to don't care about what expireIfneeded() returns. This commit is related to #1768, #1770, #2131.
2025-01-22 16:18:28 -05:00 · 2014-12-10 16:10:21 +01:00 · 2014-12-10 16:10:21 +01:00 · 06e76bc3e2
commit 06e76bc3e2
parent 3da87b70dd
5 changed files with 32 additions and 7 deletions
--- a/src/cluster.c
+++ b/src/cluster.c
@ -4409,7 +4409,7 @@ try_again:
    /* Check if the key is here. If not we reply with success as there is
     * nothing to migrate (for instance the key expired in the meantime), but
     * we include such information in the reply string. */
-    if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) {
+    if ((o = lookupKeyWrite(c->db,c->argv[3])) == NULL) {
        addReplySds(c,sdsnew("+NOKEY\r\n"));
        return;
    }
--- a/src/db.c
+++ b/src/db.c
@ -60,7 +60,32 @@ robj *lookupKey(redisDb *db, robj *key) {
 robj *lookupKeyRead(redisDb *db, robj *key) {
    robj *val;

-    expireIfNeeded(db,key);
+    if (expireIfNeeded(db,key) == 1) {
+        /* Key expired. If we are in the context of a master, expireIfNeeded()
+         * returns 0 only when the key does not exist at all, so it's save
+         * to return NULL ASAP. */
+        if (server.masterhost == NULL) return NULL;
+
+        /* However if we are in the context of a slave, expireIfNeeded() will
+         * not really try to expire the key, it only returns information
+         * about the "logical" status of the key: key expiring is up to the
+         * master in order to have a consistent view of master's data set.
+         *
+         * However, if the command caller is not the master, and as additional
+         * safety measure, the command invoked is a read-only command, we can
+         * safely return NULL here, and provide a more consistent behavior
+         * to clients accessign expired values in a read-only fashion, that
+         * will say the key as non exisitng.
+         *
+         * Notably this covers GETs when slaves are used to scale reads. */
+        if (server.current_client &&
+            server.current_client != server.master &&
+            server.current_client->cmd &&
+            server.current_client->cmd->flags & REDIS_CMD_READONLY)
+        {
+            return NULL;
+        }
+    }
    val = lookupKey(db,key);
    if (val == NULL)
        server.stat_keyspace_misses++;
@ -877,7 +902,7 @@ void expireGenericCommand(redisClient *c, long long basetime, int unit) {
    when += basetime;

    /* No key, return zero. */
-    if (lookupKeyRead(c->db,key) == NULL) {
+    if (lookupKeyWrite(c->db,key) == NULL) {
        addReply(c,shared.czero);
        return;
    }
--- a/src/debug.c
+++ b/src/debug.c
@ -338,7 +338,7 @@ void debugCommand(redisClient *c) {
            snprintf(buf,sizeof(buf),"%s:%lu",
                (c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j);
            key = createStringObject(buf,strlen(buf));
-            if (lookupKeyRead(c->db,key) != NULL) {
+            if (lookupKeyWrite(c->db,key) != NULL) {
                decrRefCount(key);
                continue;
            }
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@ -1233,7 +1233,7 @@ void pfcountCommand(redisClient *c) {
     *
     * The user specified a single key. Either return the cached value
     * or compute one and update the cache. */
-    o = lookupKeyRead(c->db,c->argv[1]);
+    o = lookupKeyWrite(c->db,c->argv[1]);
    if (o == NULL) {
        /* No key? Cardinality is zero since no element was added, otherwise
         * we would have a key as HLLADD creates it as a side effect. */
@ -1458,7 +1458,7 @@ void pfdebugCommand(redisClient *c) {
    robj *o;
    int j;

-    o = lookupKeyRead(c->db,c->argv[2]);
+    o = lookupKeyWrite(c->db,c->argv[2]);
    if (o == NULL) {
        addReplyError(c,"The specified key does not exist");
        return;
--- a/src/t_list.c
+++ b/src/t_list.c
@ -334,7 +334,7 @@ void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
    listTypeEntry entry;
    int inserted = 0;

-    if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+    if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
        checkType(c,subject,REDIS_LIST)) return;

    if (refval != NULL) {