From ef59a8bc9ef426f9d24e701e9b73a6a03ddd3d0f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 14 Oct 2010 13:52:58 +0200 Subject: [PATCH] Object approximated LRU algorithm enhanced / fixed / refactored. This is used for the VM currently but will soon be used for maxmemory expiring. --- src/db.c | 5 +++-- src/debug.c | 6 ++++-- src/object.c | 31 +++++++++++++++++++++++-------- src/redis.c | 18 ++++++++---------- src/redis.h | 2 ++ src/vm.c | 2 +- 6 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/db.c b/src/db.c index 445078474..c1ce79b56 100644 --- a/src/db.c +++ b/src/db.c @@ -11,6 +11,9 @@ robj *lookupKey(redisDb *db, robj *key) { if (de) { robj *val = dictGetEntryVal(de); + /* Update the access time for the aging algorithm. */ + val->lru = server.lruclock; + if (server.vm_enabled) { if (val->storage == REDIS_VM_MEMORY || val->storage == REDIS_VM_SWAPPING) @@ -18,8 +21,6 @@ robj *lookupKey(redisDb *db, robj *key) { /* If we were swapping the object out, cancel the operation */ if (val->storage == REDIS_VM_SWAPPING) vmCancelThreadedIOJob(val); - /* Update the access time for the aging algorithm. */ - val->lru = server.lruclock; } else { int notify = (val->storage == REDIS_VM_LOADING); diff --git a/src/debug.c b/src/debug.c index 2f7ab58f1..3b187da4e 100644 --- a/src/debug.c +++ b/src/debug.c @@ -213,9 +213,11 @@ void debugCommand(redisClient *c) { strenc = strEncoding(val->encoding); addReplyStatusFormat(c, "Value at:%p refcount:%d " - "encoding:%s serializedlength:%lld", + "encoding:%s serializedlength:%lld " + "lru :%d lru_seconds_idle:%lu", (void*)val, val->refcount, - strenc, (long long) rdbSavedObjectLen(val,NULL)); + strenc, (long long) rdbSavedObjectLen(val,NULL), + val->lru, estimateObjectIdleTime(val)); } else { vmpointer *vp = (vmpointer*) val; addReplyStatusFormat(c, diff --git a/src/object.c b/src/object.c index c1a082451..e7fa37427 100644 --- a/src/object.c +++ b/src/object.c @@ -19,14 +19,19 @@ robj *createObject(int type, void *ptr) { o->encoding = REDIS_ENCODING_RAW; o->ptr = ptr; o->refcount = 1; - if (server.vm_enabled) { - /* Note that this code may run in the context of an I/O thread - * and accessing server.lruclock in theory is an error - * (no locks). But in practice this is safe, and even if we read - * garbage Redis will not fail. */ - o->lru = server.lruclock; - o->storage = REDIS_VM_MEMORY; - } + /* Set the LRU to the current lruclock (minutes resolution). + * We do this regardless of the fact VM is active as LRU is also + * used for the maxmemory directive when Redis is used as cache. + * + * Note that this code may run in the context of an I/O thread + * and accessing server.lruclock in theory is an error + * (no locks). But in practice this is safe, and even if we read + * garbage Redis will not fail. */ + o->lru = server.lruclock; + /* The following is only needed if VM is active, but since the conditional + * is probably more costly than initializing the field it's better to + * have every field properly initialized anyway. */ + o->storage = REDIS_VM_MEMORY; return o; } @@ -433,3 +438,13 @@ char *strEncoding(int encoding) { default: return "unknown"; } } + +/* Given an object returns the min number of seconds the object was never + * requested, using an approximated LRU algorithm. */ +unsigned long estimateObjectIdleTime(robj *o) { + if (server.lruclock >= o->lru) { + return (server.lruclock - o->lru) * 60; + } else { + return ((REDIS_LRU_CLOCK_MAX - o->lru) + server.lruclock) * 60; + } +} diff --git a/src/redis.c b/src/redis.c index 27a855d97..774b3a818 100644 --- a/src/redis.c +++ b/src/redis.c @@ -490,19 +490,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * in objects at every object access, and accuracy is not needed. * To access a global var is faster than calling time(NULL) */ server.unixtime = time(NULL); - /* We have just 21 bits per object for LRU information. + /* We have just 22 bits per object for LRU information. * So we use an (eventually wrapping) LRU clock with minutes resolution. + * 2^22 minutes are more than 7 years. * - * When we need to select what object to swap, we compute the minimum - * time distance between the current lruclock and the object last access - * lruclock info. Even if clocks will wrap on overflow, there is - * the interesting property that we are sure that at least - * ABS(A-B) minutes passed between current time and timestamp B. - * - * This is not precise but we don't need at all precision, but just - * something statistically reasonable. + * Note that even if this will wrap after 7 years it's not a problem, + * everything will still work but just some object will appear younger + * to Redis :) */ - server.lruclock = (time(NULL)/60)&((1<<21)-1); + server.lruclock = (time(NULL)/60) & REDIS_LRU_CLOCK_MAX; /* We received a SIGTERM, shutting down here in a safe way, as it is * not ok doing so inside the signal handler. */ @@ -1165,6 +1161,7 @@ sds genRedisInfoString(void) { "process_id:%ld\r\n" "uptime_in_seconds:%ld\r\n" "uptime_in_days:%ld\r\n" + "lru_clock:%ld\r\n" "used_cpu_sys:%.2f\r\n" "used_cpu_user:%.2f\r\n" "used_cpu_sys_childrens:%.2f\r\n" @@ -1196,6 +1193,7 @@ sds genRedisInfoString(void) { (long) getpid(), uptime, uptime/(3600*24), + (unsigned long) server.lruclock, (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000, (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000, (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000, diff --git a/src/redis.h b/src/redis.h index 3e9fc2369..d768b184b 100644 --- a/src/redis.h +++ b/src/redis.h @@ -211,6 +211,7 @@ void _redisPanic(char *msg, char *file, int line); /* A redis object, that is a type able to hold a string / list / set */ /* The actual Redis Object */ +#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */ typedef struct redisObject { unsigned type:4; unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ @@ -678,6 +679,7 @@ int getLongLongFromObject(robj *o, long long *target); char *strEncoding(int encoding); int compareStringObjects(robj *a, robj *b); int equalStringObjects(robj *a, robj *b); +unsigned long estimateObjectIdleTime(robj *o); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); diff --git a/src/vm.c b/src/vm.c index ee831fb9a..1aad95d75 100644 --- a/src/vm.c +++ b/src/vm.c @@ -362,7 +362,7 @@ robj *vmPreviewObject(robj *o) { double computeObjectSwappability(robj *o) { /* actual age can be >= minage, but not < minage. As we use wrapping * 21 bit clocks with minutes resolution for the LRU. */ - time_t minage = abs(server.lruclock - o->lru); + time_t minage = estimateObjectIdleTime(o); long asize = 0, elesize; robj *ele; list *l;