From d934e1e85b1ec1c6d2cd07292cced53770f4a626 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 31 Dec 2010 17:32:59 +0100 Subject: [PATCH] negative caching implemented --- src/db.c | 13 +++--- src/dscache.c | 107 ++++++++++++++++++++++++++++++++++++++++---------- src/redis.c | 6 ++- src/redis.h | 4 ++ 4 files changed, 102 insertions(+), 28 deletions(-) diff --git a/src/db.c b/src/db.c index 48663e9c9..1a30034e8 100644 --- a/src/db.c +++ b/src/db.c @@ -38,6 +38,8 @@ robj *lookupKey(redisDb *db, robj *key) { * async loading of this key, what may happen is that the old * key is loaded in memory if this gets deleted in the meantime. */ if (server.ds_enabled && cacheKeyMayExist(db,key)) { + redisLog(REDIS_DEBUG,"Force loading key %s via lookup", + key->ptr); val = dsGet(db,key,&expire); if (val) { int retval = dbAdd(db,key,val); @@ -142,14 +144,13 @@ robj *dbRandomKey(redisDb *db) { /* Delete a key, value, and associated expiration entry if any, from the DB */ int dbDelete(redisDb *db, robj *key) { - /* If VM is enabled make sure to awake waiting clients for this key: - * deleting the key will kill the I/O thread bringing the key from swap - * to memory, so the client will never be notified and unblocked if we - * don't do it now. */ + /* If diskstore is enabled make sure to awake waiting clients for this key + * as it is not really useful to wait for a key already deleted to be + * loaded from disk. */ if (server.ds_enabled) handleClientsBlockedOnSwappedKey(db,key); - /* FIXME: we need to delete the IO Job loading the key, or simply we can - * wait for it to finish. */ + /* Mark this key as non existing on disk as well */ + cacheSetKeyDoesNotExistRemember(db,key); /* Deleting an entry from the expires dict will not free the sds of * the key, because it is shared with the main dictionary. */ diff --git a/src/dscache.c b/src/dscache.c index 7be3bf86a..a56fa118b 100644 --- a/src/dscache.c +++ b/src/dscache.c @@ -105,6 +105,10 @@ * value so it will be evicted later. * * Are there other patterns like this where we load stale data? + * + * Also, make sure that key preloading is ONLY done for keys that are + * not marked as cacheKeyDoesNotExist(), otherwise, again, we can load + * data from disk that should instead be deleted. */ /* Virtual Memory is composed mainly of two subsystems: @@ -259,7 +263,72 @@ int dsCanTouchDiskStore(void) { return (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1); } -/* =================== Virtual Memory - Threaded I/O ======================= */ +/* ==================== Disk store negative caching ======================== + * + * When disk store is enabled, we need negative caching, that is, to remember + * keys that are for sure *not* on the disk key-value store. + * + * This is useful for two reasons: + * + * 1) Without negative caching cache misses will cost us a disk lookup, even + * if the same non existing key is accessed again and again. We negative + * caching we remember that the key is not on disk, so if it's not in memory + * and we have a negative cache entry, we don't try a disk access at all. + * + * 2) Negative caching is the way to fix a specific race condition. For instance + * think at the following sequence of commands: + * + * SET foo bar + * DEL foo + * GET foo + * + * After the SET, we'll mark the value as dirty, so it will be flushed + * on disk at some time. Later the key is deleted, so will be removed + * from memory. Another job will be created to remove the key from the disk + * store, but the removal is not synchronous, so may happen later in time. + * + * Finally we have a GET foo operation. This operation may result in + * reading back a value from disk that is not updated data, as the deletion + * operaiton against the disk KV store was still not completed, so we + * read old data. + * + * Remembering that the given key is deleted is important. We can discard this + * information once the key was really removed from the disk. + * + * So actually there are two kind of negative caching entries: entries that + * can be evicted when we need to reclaim memory, and entries that will + * not be evicted, for all the time we need this information to be available. + * + * The API allows to create both kind of negative caching. */ + +int cacheKeyMayExist(redisDb *db, robj *key) { + return dictFind(db->io_negcache,key) == NULL; +} + +void cacheSetKeyMayExist(redisDb *db, robj *key) { + dictDelete(db->io_negcache,key); +} + +void cacheSetKeyDoesNotExist(redisDb *db, robj *key) { + struct dictEntry *de; + + /* Don't overwrite negative cached entries with val set to 0, as this + * entries were created with cacheSetKeyDoesNotExistRemember(). */ + de = dictFind(db->io_negcache,key); + if (de != NULL && dictGetEntryVal(de) == NULL) return; + + if (dictReplace(db->io_negcache,key,(void*)time(NULL))) { + incrRefCount(key); + } +} + +void cacheSetKeyDoesNotExistRemember(redisDb *db, robj *key) { + if (dictReplace(db->io_negcache,key,NULL)) { + incrRefCount(key); + } +} + +/* ================== Disk store cache - Threaded I/O ====================== */ void freeIOJob(iojob *j) { decrRefCount(j->key); @@ -310,15 +379,20 @@ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, if (j->val != NULL) { /* Note: the key may already be here if between the time * this key loading was scheduled and now there was the - * need to blocking load the key for a key lookup. */ - if (dbAdd(j->db,j->key,j->val) == REDIS_OK) { + * need to blocking load the key for a key lookup. + * + * Also we don't add a key that was deleted in the + * meantime and should not be on disk either. */ + if (cacheKeyMayExist(j->db,j->key) && + dbAdd(j->db,j->key,j->val) == REDIS_OK) + { incrRefCount(j->val); if (j->expire != -1) setExpire(j->db,j->key,j->expire); } } else { /* The key does not exist. Create a negative cache entry * for this key. */ - /* FIXME: add this entry into the negative cache */ + cacheSetKeyDoesNotExist(j->db,j->key); } /* Handle clients waiting for this key to be loaded. */ handleClientsBlockedOnSwappedKey(j->db,j->key); @@ -327,6 +401,12 @@ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, if (j->val) { redisAssert(j->val->storage == REDIS_DS_SAVING); j->val->storage = REDIS_DS_MEMORY; + cacheSetKeyMayExist(j->db,j->key); + } else { + /* Key deleted. Probably we have this key marked as + * non existing, and impossible to evict, in our negative + * cache entry. Add it as a normal negative cache entry. */ + cacheSetKeyMayExist(j->db,j->key); } freeIOJob(j); } @@ -585,15 +665,6 @@ void cacheCron(void) { } } -/* ============ Negative caching for diskstore objects ====================== */ -/* Since accesses to keys that don't exist with disk store cost us a disk - * access, we need to cache names of keys that do not exist but are frequently - * accessed. */ -int cacheKeyMayExist(redisDb *db, robj *key) { - /* FIXME: for now we just always return true. */ - return 1; -} - /* ============ Virtual Memory - Blocking clients on missing keys =========== */ /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. @@ -624,6 +695,9 @@ int waitForSwappedKey(redisClient *c, robj *key) { de = dictFind(c->db->dict,key->ptr); if (de != NULL) return 0; + /* Don't wait for keys we are sure are not on disk either */ + if (!cacheKeyMayExist(c->db,key)) return 0; + /* Add the key to the list of keys this client is waiting for. * This maps clients to keys they are waiting for. */ listAddNodeTail(c->io_keys,key); @@ -645,13 +719,6 @@ int waitForSwappedKey(redisClient *c, robj *key) { listAddNodeTail(l,c); /* Are we already loading the key from disk? If not create a job */ - /* FIXME: if a given client was blocked for this key (so job already - * created) but the client was freed, there may be a job loading this - * key even if de == NULL. Does this creates some race condition? - * - * Example: after the first load the key gets a DEL that will schedule - * a write. But the write will happen later, the duplicated load will - * fire and we'll get again the key in memory. */ if (de == NULL) dsCreateIOJob(REDIS_IOJOB_LOAD,c->db,key,NULL); return 1; diff --git a/src/redis.c b/src/redis.c index bb917f50f..65cf3ace9 100644 --- a/src/redis.c +++ b/src/redis.c @@ -345,7 +345,7 @@ unsigned int dictEncObjHash(const void *key) { } } -/* Sets type */ +/* Sets type and diskstore negative caching hash table */ dictType setDictType = { dictEncObjHash, /* hash function */ NULL, /* key dup */ @@ -854,8 +854,10 @@ void initServer() { server.db[j].expires = dictCreate(&keyptrDictType,NULL); server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL); server.db[j].watched_keys = dictCreate(&keylistDictType,NULL); - if (server.ds_enabled) + if (server.ds_enabled) { server.db[j].io_keys = dictCreate(&keylistDictType,NULL); + server.db[j].io_negcache = dictCreate(&setDictType,NULL); + } server.db[j].id = j; } server.pubsub_channels = dictCreate(&keylistDictType,NULL); diff --git a/src/redis.h b/src/redis.h index 15c192cfa..4a6a5dc6d 100644 --- a/src/redis.h +++ b/src/redis.h @@ -269,6 +269,7 @@ typedef struct redisDb { dict *expires; /* Timeout of keys with a timeout set */ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ dict *io_keys; /* Keys with clients waiting for VM I/O */ + dict *io_negcache; /* Negative caching for disk store */ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ int id; } redisDb; @@ -809,6 +810,9 @@ int cacheFreeOneEntry(void); void cacheScheduleForFlush(redisDb *db, robj *key); void cacheCron(void); int cacheKeyMayExist(redisDb *db, robj *key); +void cacheSetKeyExists(redisDb *db, robj *key); +void cacheSetKeyDoesNotExist(redisDb *db, robj *key); +void cacheSetKeyDoesNotExistRemember(redisDb *db, robj *key); /* Set data type */ robj *setTypeCreate(robj *value);