2016-07-06 09:28:18 -04:00
|
|
|
/* Maxmemory directive handling (LRU eviction and other policies).
|
|
|
|
*
|
|
|
|
* ----------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "server.h"
|
|
|
|
#include "bio.h"
|
|
|
|
|
|
|
|
/* Return the LRU clock, based on the clock resolution. This is a time
|
|
|
|
* in a reduced-bits format that can be used to set and check the
|
|
|
|
* object->lru field of redisObject structures. */
|
|
|
|
unsigned int getLRUClock(void) {
|
|
|
|
return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Given an object returns the min number of milliseconds the object was never
|
|
|
|
* requested, using an approximated LRU algorithm. */
|
|
|
|
unsigned long long estimateObjectIdleTime(robj *o) {
|
|
|
|
unsigned long long lruclock = LRU_CLOCK();
|
|
|
|
if (lruclock >= o->lru) {
|
|
|
|
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
|
|
|
|
} else {
|
|
|
|
return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
|
|
|
|
LRU_CLOCK_RESOLUTION;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
|
|
|
|
* file to limit the max memory used by the server, before processing a
|
|
|
|
* command.
|
|
|
|
*
|
|
|
|
* The goal of the function is to free enough memory to keep Redis under the
|
|
|
|
* configured memory limit.
|
|
|
|
*
|
|
|
|
* The function starts calculating how many bytes should be freed to keep
|
|
|
|
* Redis under the limit, and enters a loop selecting the best keys to
|
|
|
|
* evict accordingly to the configured policy.
|
|
|
|
*
|
|
|
|
* If all the bytes needed to return back under the limit were freed the
|
|
|
|
* function returns C_OK, otherwise C_ERR is returned, and the caller
|
|
|
|
* should block the execution of commands that will result in more memory
|
|
|
|
* used by the server.
|
|
|
|
*
|
|
|
|
* ------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* LRU approximation algorithm
|
|
|
|
*
|
|
|
|
* Redis uses an approximation of the LRU algorithm that runs in constant
|
|
|
|
* memory. Every time there is a key to expire, we sample N keys (with
|
|
|
|
* N very small, usually in around 5) to populate a pool of best keys to
|
|
|
|
* evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE).
|
|
|
|
*
|
|
|
|
* The N keys sampled are added in the pool of good keys to expire (the one
|
|
|
|
* with an old access time) if they are better than one of the current keys
|
|
|
|
* in the pool.
|
|
|
|
*
|
|
|
|
* After the pool is populated, the best key we have in the pool is expired.
|
|
|
|
* However note that we don't remove keys from the pool when they are deleted
|
|
|
|
* so the pool may contain keys that no longer exist.
|
|
|
|
*
|
|
|
|
* When we try to evict a key, and all the entries in the pool don't exist
|
|
|
|
* we populate it again. This time we'll be sure that the pool has at least
|
|
|
|
* one key that can be evicted, if there is at least one key that can be
|
|
|
|
* evicted in the whole database. */
|
|
|
|
|
|
|
|
/* Create a new eviction pool. */
|
|
|
|
struct evictionPoolEntry *evictionPoolAlloc(void) {
|
|
|
|
struct evictionPoolEntry *ep;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE);
|
|
|
|
for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) {
|
|
|
|
ep[j].idle = 0;
|
|
|
|
ep[j].key = NULL;
|
|
|
|
}
|
|
|
|
return ep;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This is an helper function for freeMemoryIfNeeded(), it is used in order
|
|
|
|
* to populate the evictionPool with a few entries every time we want to
|
|
|
|
* expire a key. Keys with idle time smaller than one of the current
|
|
|
|
* keys are added. Keys are always added if there are free entries.
|
|
|
|
*
|
|
|
|
* We insert keys on place in ascending order, so keys with the smaller
|
|
|
|
* idle time are on the left, and keys with the higher idle time on the
|
|
|
|
* right. */
|
|
|
|
|
|
|
|
#define EVICTION_SAMPLES_ARRAY_SIZE 16
|
|
|
|
void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
|
|
|
|
int j, k, count;
|
|
|
|
dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE];
|
|
|
|
dictEntry **samples;
|
|
|
|
|
|
|
|
/* Try to use a static buffer: this function is a big hit...
|
|
|
|
* Note: it was actually measured that this helps. */
|
|
|
|
if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) {
|
|
|
|
samples = _samples;
|
|
|
|
} else {
|
|
|
|
samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
|
|
|
|
}
|
|
|
|
|
|
|
|
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
|
|
|
|
for (j = 0; j < count; j++) {
|
|
|
|
unsigned long long idle;
|
|
|
|
sds key;
|
|
|
|
robj *o;
|
|
|
|
dictEntry *de;
|
|
|
|
|
|
|
|
de = samples[j];
|
|
|
|
key = dictGetKey(de);
|
|
|
|
/* If the dictionary we are sampling from is not the main
|
|
|
|
* dictionary (but the expires one) we need to lookup the key
|
|
|
|
* again in the key dictionary to obtain the value object. */
|
|
|
|
if (sampledict != keydict) de = dictFind(keydict, key);
|
|
|
|
o = dictGetVal(de);
|
|
|
|
idle = estimateObjectIdleTime(o);
|
|
|
|
|
|
|
|
/* Insert the element inside the pool.
|
|
|
|
* First, find the first empty bucket or the first populated
|
|
|
|
* bucket that has an idle time smaller than our idle time. */
|
|
|
|
k = 0;
|
|
|
|
while (k < MAXMEMORY_EVICTION_POOL_SIZE &&
|
|
|
|
pool[k].key &&
|
|
|
|
pool[k].idle < idle) k++;
|
|
|
|
if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) {
|
|
|
|
/* Can't insert if the element is < the worst element we have
|
|
|
|
* and there are no empty buckets. */
|
|
|
|
continue;
|
|
|
|
} else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) {
|
|
|
|
/* Inserting into empty position. No setup needed before insert. */
|
|
|
|
} else {
|
|
|
|
/* Inserting in the middle. Now k points to the first element
|
|
|
|
* greater than the element to insert. */
|
|
|
|
if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) {
|
|
|
|
/* Free space on the right? Insert at k shifting
|
|
|
|
* all the elements from k to end to the right. */
|
|
|
|
memmove(pool+k+1,pool+k,
|
|
|
|
sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
|
|
|
|
} else {
|
|
|
|
/* No free space on right? Insert at k-1 */
|
|
|
|
k--;
|
|
|
|
/* Shift all elements on the left of k (included) to the
|
|
|
|
* left, so we discard the element with smaller idle time. */
|
|
|
|
sdsfree(pool[0].key);
|
|
|
|
memmove(pool,pool+1,sizeof(pool[0])*k);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pool[k].key = sdsdup(key);
|
|
|
|
pool[k].idle = idle;
|
|
|
|
}
|
|
|
|
if (samples != _samples) zfree(samples);
|
|
|
|
}
|
|
|
|
|
|
|
|
int freeMemoryIfNeeded(void) {
|
|
|
|
size_t mem_reported, mem_used, mem_tofree, mem_freed;
|
|
|
|
int slaves = listLength(server.slaves);
|
|
|
|
mstime_t latency, eviction_latency;
|
|
|
|
long long delta;
|
|
|
|
|
|
|
|
/* Check if we are over the memory usage limit. If we are not, no need
|
|
|
|
* to subtract the slaves output buffers. We can just return ASAP. */
|
|
|
|
mem_reported = zmalloc_used_memory();
|
|
|
|
if (mem_reported <= server.maxmemory) return C_OK;
|
|
|
|
|
|
|
|
/* Remove the size of slaves output buffers and AOF buffer from the
|
|
|
|
* count of used memory. */
|
|
|
|
mem_used = mem_reported;
|
|
|
|
if (slaves) {
|
|
|
|
listIter li;
|
|
|
|
listNode *ln;
|
|
|
|
|
|
|
|
listRewind(server.slaves,&li);
|
|
|
|
while((ln = listNext(&li))) {
|
|
|
|
client *slave = listNodeValue(ln);
|
|
|
|
unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
|
|
|
|
if (obuf_bytes > mem_used)
|
|
|
|
mem_used = 0;
|
|
|
|
else
|
|
|
|
mem_used -= obuf_bytes;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (server.aof_state != AOF_OFF) {
|
|
|
|
mem_used -= sdslen(server.aof_buf);
|
|
|
|
mem_used -= aofRewriteBufferSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if we are still over the memory limit. */
|
|
|
|
if (mem_used <= server.maxmemory) return C_OK;
|
|
|
|
|
|
|
|
/* Compute how much memory we need to free. */
|
|
|
|
mem_tofree = mem_used - server.maxmemory;
|
|
|
|
mem_freed = 0;
|
|
|
|
|
|
|
|
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
|
|
|
|
goto cant_free; /* We need to free memory, but policy forbids. */
|
|
|
|
|
|
|
|
latencyStartMonitor(latency);
|
|
|
|
while (mem_freed < mem_tofree) {
|
|
|
|
int j, k, keys_freed = 0;
|
|
|
|
|
|
|
|
for (j = 0; j < server.dbnum; j++) {
|
|
|
|
long bestval = 0; /* just to prevent warning */
|
|
|
|
sds bestkey = NULL;
|
|
|
|
dictEntry *de;
|
|
|
|
redisDb *db = server.db+j;
|
|
|
|
dict *dict;
|
|
|
|
|
|
|
|
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
|
|
|
|
server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM)
|
|
|
|
{
|
|
|
|
dict = server.db[j].dict;
|
|
|
|
} else {
|
|
|
|
dict = server.db[j].expires;
|
|
|
|
}
|
|
|
|
if (dictSize(dict) == 0) continue;
|
|
|
|
|
|
|
|
/* volatile-random and allkeys-random policy */
|
|
|
|
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
|
|
|
|
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
|
|
|
|
{
|
|
|
|
de = dictGetRandomKey(dict);
|
|
|
|
bestkey = dictGetKey(de);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* volatile-lru and allkeys-lru policy */
|
|
|
|
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
|
|
|
|
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
|
|
|
|
{
|
|
|
|
struct evictionPoolEntry *pool = db->eviction_pool;
|
|
|
|
|
|
|
|
while(bestkey == NULL) {
|
|
|
|
evictionPoolPopulate(dict, db->dict, db->eviction_pool);
|
|
|
|
/* Go backward from best to worst element to evict. */
|
|
|
|
for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) {
|
|
|
|
if (pool[k].key == NULL) continue;
|
|
|
|
de = dictFind(dict,pool[k].key);
|
|
|
|
|
|
|
|
/* Remove the entry from the pool. */
|
|
|
|
sdsfree(pool[k].key);
|
2016-07-11 13:18:17 -04:00
|
|
|
pool[k].key = NULL;
|
|
|
|
pool[k].idle = 0;
|
2016-07-06 09:28:18 -04:00
|
|
|
|
|
|
|
/* If the key exists, is our pick. Otherwise it is
|
|
|
|
* a ghost and we need to try the next element. */
|
|
|
|
if (de) {
|
|
|
|
bestkey = dictGetKey(de);
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
/* Ghost... */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* volatile-ttl */
|
|
|
|
else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
|
|
|
|
for (k = 0; k < server.maxmemory_samples; k++) {
|
|
|
|
sds thiskey;
|
|
|
|
long thisval;
|
|
|
|
|
|
|
|
de = dictGetRandomKey(dict);
|
|
|
|
thiskey = dictGetKey(de);
|
|
|
|
thisval = (long) dictGetVal(de);
|
|
|
|
|
|
|
|
/* Expire sooner (minor expire unix timestamp) is better
|
|
|
|
* candidate for deletion */
|
|
|
|
if (bestkey == NULL || thisval < bestval) {
|
|
|
|
bestkey = thiskey;
|
|
|
|
bestval = thisval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Finally remove the selected key. */
|
|
|
|
if (bestkey) {
|
|
|
|
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
|
|
|
|
propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
|
|
|
|
/* We compute the amount of memory freed by db*Delete() alone.
|
|
|
|
* It is possible that actually the memory needed to propagate
|
|
|
|
* the DEL in AOF and replication link is greater than the one
|
|
|
|
* we are freeing removing the key, but we can't account for
|
|
|
|
* that otherwise we would never exit the loop.
|
|
|
|
*
|
|
|
|
* AOF and Output buffer memory will be freed eventually so
|
|
|
|
* we only care about memory used by the key space. */
|
|
|
|
delta = (long long) zmalloc_used_memory();
|
|
|
|
latencyStartMonitor(eviction_latency);
|
|
|
|
if (server.lazyfree_lazy_eviction)
|
|
|
|
dbAsyncDelete(db,keyobj);
|
|
|
|
else
|
|
|
|
dbSyncDelete(db,keyobj);
|
|
|
|
latencyEndMonitor(eviction_latency);
|
|
|
|
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
|
|
|
|
latencyRemoveNestedEvent(latency,eviction_latency);
|
|
|
|
delta -= (long long) zmalloc_used_memory();
|
|
|
|
mem_freed += delta;
|
|
|
|
server.stat_evictedkeys++;
|
|
|
|
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
|
|
|
|
keyobj, db->id);
|
|
|
|
decrRefCount(keyobj);
|
|
|
|
keys_freed++;
|
|
|
|
|
|
|
|
/* When the memory to free starts to be big enough, we may
|
|
|
|
* start spending so much time here that is impossible to
|
|
|
|
* deliver data to the slaves fast enough, so we force the
|
|
|
|
* transmission here inside the loop. */
|
|
|
|
if (slaves) flushSlavesOutputBuffers();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!keys_freed) {
|
|
|
|
latencyEndMonitor(latency);
|
|
|
|
latencyAddSampleIfNeeded("eviction-cycle",latency);
|
|
|
|
goto cant_free; /* nothing to free... */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
latencyEndMonitor(latency);
|
|
|
|
latencyAddSampleIfNeeded("eviction-cycle",latency);
|
|
|
|
return C_OK;
|
|
|
|
|
|
|
|
cant_free:
|
|
|
|
/* We are here if we are not able to reclaim memory. There is only one
|
|
|
|
* last thing we can try: check if the lazyfree thread has jobs in queue
|
|
|
|
* and wait... */
|
|
|
|
while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
|
|
|
|
if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
|
|
|
|
break;
|
|
|
|
usleep(1000);
|
|
|
|
}
|
|
|
|
return C_ERR;
|
|
|
|
}
|
|
|
|
|