redict/src/evict.c

439 lines
18 KiB
C
Raw Normal View History

2016-07-06 09:28:18 -04:00
/* Maxmemory directive handling (LRU eviction and other policies).
*
* ----------------------------------------------------------------------------
*
* Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "server.h"
#include "bio.h"
/* ----------------------------------------------------------------------------
* Data structures
* --------------------------------------------------------------------------*/
/* To improve the quality of the LRU approximation we take a set of keys
* that are good candidate for eviction across freeMemoryIfNeeded() calls.
*
* Entries inside the eviciton pool are taken ordered by idle time, putting
* greater idle times to the right (ascending order).
*
* Empty entries have the key pointer set to NULL. */
#define EVPOOL_SIZE 16
#define EVPOOL_CACHED_SDS_SIZE 255
struct evictionPoolEntry {
unsigned long long idle; /* Object idle time. */
sds key; /* Key name. */
sds cached; /* Cached SDS object for key name. */
int dbid; /* Key DB number. */
};
static struct evictionPoolEntry *EvictionPoolLRU;
/* ----------------------------------------------------------------------------
* Implementation of eviction, aging and LRU
* --------------------------------------------------------------------------*/
2016-07-06 09:28:18 -04:00
/* Return the LRU clock, based on the clock resolution. This is a time
* in a reduced-bits format that can be used to set and check the
* object->lru field of redisObject structures. */
unsigned int getLRUClock(void) {
return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
}
/* Given an object returns the min number of milliseconds the object was never
* requested, using an approximated LRU algorithm. */
unsigned long long estimateObjectIdleTime(robj *o) {
unsigned long long lruclock = LRU_CLOCK();
if (lruclock >= o->lru) {
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
} else {
return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
LRU_CLOCK_RESOLUTION;
}
}
/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
* file to limit the max memory used by the server, before processing a
* command.
*
* The goal of the function is to free enough memory to keep Redis under the
* configured memory limit.
*
* The function starts calculating how many bytes should be freed to keep
* Redis under the limit, and enters a loop selecting the best keys to
* evict accordingly to the configured policy.
*
* If all the bytes needed to return back under the limit were freed the
* function returns C_OK, otherwise C_ERR is returned, and the caller
* should block the execution of commands that will result in more memory
* used by the server.
*
* ------------------------------------------------------------------------
*
* LRU approximation algorithm
*
* Redis uses an approximation of the LRU algorithm that runs in constant
* memory. Every time there is a key to expire, we sample N keys (with
* N very small, usually in around 5) to populate a pool of best keys to
* evict of M keys (the pool size is defined by EVPOOL_SIZE).
2016-07-06 09:28:18 -04:00
*
* The N keys sampled are added in the pool of good keys to expire (the one
* with an old access time) if they are better than one of the current keys
* in the pool.
*
* After the pool is populated, the best key we have in the pool is expired.
* However note that we don't remove keys from the pool when they are deleted
* so the pool may contain keys that no longer exist.
*
* When we try to evict a key, and all the entries in the pool don't exist
* we populate it again. This time we'll be sure that the pool has at least
* one key that can be evicted, if there is at least one key that can be
* evicted in the whole database. */
/* Create a new eviction pool. */
void evictionPoolAlloc(void) {
2016-07-06 09:28:18 -04:00
struct evictionPoolEntry *ep;
int j;
ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
for (j = 0; j < EVPOOL_SIZE; j++) {
2016-07-06 09:28:18 -04:00
ep[j].idle = 0;
ep[j].key = NULL;
ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
ep[j].dbid = 0;
2016-07-06 09:28:18 -04:00
}
EvictionPoolLRU = ep;
2016-07-06 09:28:18 -04:00
}
/* This is an helper function for freeMemoryIfNeeded(), it is used in order
* to populate the evictionPool with a few entries every time we want to
* expire a key. Keys with idle time smaller than one of the current
* keys are added. Keys are always added if there are free entries.
*
* We insert keys on place in ascending order, so keys with the smaller
* idle time are on the left, and keys with the higher idle time on the
* right. */
void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
2016-07-06 09:28:18 -04:00
int j, k, count;
dictEntry *samples[server.maxmemory_samples];
2016-07-06 09:28:18 -04:00
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
for (j = 0; j < count; j++) {
unsigned long long idle;
sds key;
robj *o;
dictEntry *de;
de = samples[j];
key = dictGetKey(de);
/* If the dictionary we are sampling from is not the main
* dictionary (but the expires one) we need to lookup the key
* again in the key dictionary to obtain the value object. */
if (sampledict != keydict) de = dictFind(keydict, key);
o = dictGetVal(de);
idle = estimateObjectIdleTime(o);
/* Insert the element inside the pool.
* First, find the first empty bucket or the first populated
* bucket that has an idle time smaller than our idle time. */
k = 0;
while (k < EVPOOL_SIZE &&
2016-07-06 09:28:18 -04:00
pool[k].key &&
pool[k].idle < idle) k++;
if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
2016-07-06 09:28:18 -04:00
/* Can't insert if the element is < the worst element we have
* and there are no empty buckets. */
continue;
} else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
2016-07-06 09:28:18 -04:00
/* Inserting into empty position. No setup needed before insert. */
} else {
/* Inserting in the middle. Now k points to the first element
* greater than the element to insert. */
if (pool[EVPOOL_SIZE-1].key == NULL) {
2016-07-06 09:28:18 -04:00
/* Free space on the right? Insert at k shifting
* all the elements from k to end to the right. */
/* Save SDS before overwriting. */
sds cached = pool[EVPOOL_SIZE-1].cached;
2016-07-06 09:28:18 -04:00
memmove(pool+k+1,pool+k,
sizeof(pool[0])*(EVPOOL_SIZE-k-1));
pool[k].cached = cached;
2016-07-06 09:28:18 -04:00
} else {
/* No free space on right? Insert at k-1 */
k--;
/* Shift all elements on the left of k (included) to the
* left, so we discard the element with smaller idle time. */
sds cached = pool[0].cached; /* Save SDS before overwriting. */
if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
2016-07-06 09:28:18 -04:00
memmove(pool,pool+1,sizeof(pool[0])*k);
pool[k].cached = cached;
2016-07-06 09:28:18 -04:00
}
}
/* Try to reuse the cached SDS string allocated in the pool entry,
* because allocating and deallocating this object is costly
* (according to the profiler, not my fantasy. Remember:
* premature optimizbla bla bla bla. */
int klen = sdslen(key);
if (klen > EVPOOL_CACHED_SDS_SIZE) {
pool[k].key = sdsdup(key);
} else {
memcpy(pool[k].cached,key,klen+1);
sdssetlen(pool[k].cached,klen);
pool[k].key = pool[k].cached;
}
2016-07-06 09:28:18 -04:00
pool[k].idle = idle;
pool[k].dbid = dbid;
2016-07-06 09:28:18 -04:00
}
}
int freeMemoryIfNeeded(void) {
size_t mem_reported, mem_used, mem_tofree, mem_freed;
int slaves = listLength(server.slaves);
mstime_t latency, eviction_latency;
long long delta;
/* Check if we are over the memory usage limit. If we are not, no need
* to subtract the slaves output buffers. We can just return ASAP. */
mem_reported = zmalloc_used_memory();
if (mem_reported <= server.maxmemory) return C_OK;
/* Remove the size of slaves output buffers and AOF buffer from the
* count of used memory. */
mem_used = mem_reported;
if (slaves) {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = listNodeValue(ln);
unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
if (obuf_bytes > mem_used)
mem_used = 0;
else
mem_used -= obuf_bytes;
}
}
if (server.aof_state != AOF_OFF) {
mem_used -= sdslen(server.aof_buf);
mem_used -= aofRewriteBufferSize();
}
/* Check if we are still over the memory limit. */
if (mem_used <= server.maxmemory) return C_OK;
/* Compute how much memory we need to free. */
mem_tofree = mem_used - server.maxmemory;
mem_freed = 0;
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
goto cant_free; /* We need to free memory, but policy forbids. */
latencyStartMonitor(latency);
while (mem_freed < mem_tofree) {
int j, k, i, keys_freed = 0;
static int next_db = 0;
sds bestkey = NULL;
int bestdbid;
redisDb *db;
dict *dict;
dictEntry *de;
2016-07-06 09:28:18 -04:00
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
{
struct evictionPoolEntry *pool = EvictionPoolLRU;
while(bestkey == NULL) {
unsigned long total_keys = 0, keys;
/* We don't want to make local-db choices when expiring keys,
* so to start populate the eviction pool sampling keys from
* every DB. */
for (i = 0; i < server.dbnum; i++) {
db = server.db+i;
dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) ?
db->dict : db->expires;
if ((keys = dictSize(dict)) != 0) {
evictionPoolPopulate(i, dict, db->dict, pool);
total_keys += keys;
2016-07-06 09:28:18 -04:00
}
}
if (!total_keys) break; /* No keys to evict. */
/* Go backward from best to worst element to evict. */
for (k = EVPOOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue;
bestdbid = pool[k].dbid;
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) {
de = dictFind(server.db[pool[k].dbid].dict,
pool[k].key);
} else {
de = dictFind(server.db[pool[k].dbid].expires,
pool[k].key);
}
2016-07-06 09:28:18 -04:00
/* Remove the entry from the pool. */
if (pool[k].key != pool[k].cached)
sdsfree(pool[k].key);
pool[k].key = NULL;
pool[k].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
if (de) {
bestkey = dictGetKey(de);
break;
} else {
/* Ghost... Iterate again. */
}
}
}
}
2016-07-06 09:28:18 -04:00
/* volatile-random and allkeys-random policy */
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
/* When evicting a random key, we try to evict a key for
* each DB, so we use the static 'next_db' variable to
* incrementally visit all DBs. */
for (i = 0; i < server.dbnum; i++) {
j = (++next_db) % server.dbnum;
db = server.db+j;
dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
db->dict : db->expires;
if (dictSize(dict) != 0) {
2016-07-06 09:28:18 -04:00
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
bestdbid = j;
break;
2016-07-06 09:28:18 -04:00
}
}
}
2016-07-06 09:28:18 -04:00
/* volatile-ttl */
else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
long bestttl = 0; /* Initialized to avoid warning. */
/* In this policy we scan a single DB per iteration (visiting
* a different DB per call), expiring the key with the smallest
* TTL among the few sampled.
*
* Note that this algorithm makes local-DB choices, and should
* use a pool and code more similr to the one used in the
* LRU eviction policies in the future. */
for (i = 0; i < server.dbnum; i++) {
j = (++next_db) % server.dbnum;
db = server.db+j;
dict = db->expires;
if (dictSize(dict) != 0) {
for (k = 0; k < server.maxmemory_samples; k++) {
sds thiskey;
long thisttl;
de = dictGetRandomKey(dict);
thiskey = dictGetKey(de);
thisttl = (long) dictGetVal(de);
/* Keys expiring sooner (smaller unix timestamp) are
* better candidates for deletion */
if (bestkey == NULL || thisttl < bestttl) {
bestkey = thiskey;
bestttl = thisttl;
bestdbid = j;
}
}
}
2016-07-06 09:28:18 -04:00
}
}
/* Finally remove the selected key. */
if (bestkey) {
db = server.db+bestdbid;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
/* We compute the amount of memory freed by db*Delete() alone.
* It is possible that actually the memory needed to propagate
* the DEL in AOF and replication link is greater than the one
* we are freeing removing the key, but we can't account for
* that otherwise we would never exit the loop.
*
* AOF and Output buffer memory will be freed eventually so
* we only care about memory used by the key space. */
delta = (long long) zmalloc_used_memory();
latencyStartMonitor(eviction_latency);
if (server.lazyfree_lazy_eviction)
dbAsyncDelete(db,keyobj);
else
dbSyncDelete(db,keyobj);
latencyEndMonitor(eviction_latency);
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
latencyRemoveNestedEvent(latency,eviction_latency);
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
server.stat_evictedkeys++;
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
keyobj, db->id);
decrRefCount(keyobj);
keys_freed++;
/* When the memory to free starts to be big enough, we may
* start spending so much time here that is impossible to
* deliver data to the slaves fast enough, so we force the
* transmission here inside the loop. */
if (slaves) flushSlavesOutputBuffers();
}
2016-07-06 09:28:18 -04:00
if (!keys_freed) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
goto cant_free; /* nothing to free... */
}
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
return C_OK;
cant_free:
/* We are here if we are not able to reclaim memory. There is only one
* last thing we can try: check if the lazyfree thread has jobs in queue
* and wait... */
while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
break;
usleep(1000);
}
return C_ERR;
}