diff --git a/src/defrag.c b/src/defrag.c index b813ab74f..5b7fdb775 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -238,46 +238,26 @@ void activeDefragZsetEntry(zset *zs, dictEntry *de) { #define DEFRAG_SDS_DICT_VAL_VOID_PTR 3 #define DEFRAG_SDS_DICT_VAL_LUA_SCRIPT 4 -typedef struct { - dict *dict; - int val_type; -} activeDefragSdsDictData; - -void activeDefragSdsDictCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry*)_de; - activeDefragSdsDictData *data = privdata; - dict *d = data->dict; - int val_type = data->val_type; - sds sdsele = dictGetKey(de), newsds; - if ((newsds = activeDefragSds(sdsele))) - dictSetKey(d, de, newsds); - /* defrag the value */ - if (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS) { - sdsele = dictGetVal(de); - if ((newsds = activeDefragSds(sdsele))) - dictSetVal(d, de, newsds); - } else if (val_type == DEFRAG_SDS_DICT_VAL_IS_STROB) { - robj *newele, *ele = dictGetVal(de); - if ((newele = activeDefragStringOb(ele))) - dictSetVal(d, de, newele); - } else if (val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) { - void *newptr, *ptr = dictGetVal(de); - if ((newptr = activeDefragAlloc(ptr))) - dictSetVal(d, de, newptr); - } else if (val_type == DEFRAG_SDS_DICT_VAL_LUA_SCRIPT) { - void *newptr, *ptr = dictGetVal(de); - if ((newptr = activeDefragLuaScript(ptr))) - dictSetVal(d, de, newptr); - } +void activeDefragSdsDictCallback(void *privdata, const dictEntry *de) { + UNUSED(privdata); + UNUSED(de); } /* Defrag a dict with sds key and optional value (either ptr, sds or robj string) */ void activeDefragSdsDict(dict* d, int val_type) { - activeDefragSdsDictData data = {d, val_type}; unsigned long cursor = 0; + dictDefragFunctions defragfns = { + .defragAlloc = activeDefragAlloc, + .defragKey = (dictDefragAllocFunction *)activeDefragSds, + .defragVal = (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS ? (dictDefragAllocFunction *)activeDefragSds : + val_type == DEFRAG_SDS_DICT_VAL_IS_STROB ? (dictDefragAllocFunction *)activeDefragStringOb : + val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR ? (dictDefragAllocFunction *)activeDefragAlloc : + val_type == DEFRAG_SDS_DICT_VAL_LUA_SCRIPT ? (dictDefragAllocFunction *)activeDefragLuaScript : + NULL) + }; do { cursor = dictScanDefrag(d, cursor, activeDefragSdsDictCallback, - activeDefragAlloc, &data); + &defragfns, NULL); } while (cursor != 0); } @@ -407,19 +387,14 @@ void scanLaterZset(robj *ob, unsigned long *cursor) { zset *zs = (zset*)ob->ptr; dict *d = zs->dict; scanLaterZsetData data = {zs}; - *cursor = dictScanDefrag(d, *cursor, scanLaterZsetCallback, activeDefragAlloc, &data); + dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; + *cursor = dictScanDefrag(d, *cursor, scanLaterZsetCallback, &defragfns, &data); } -typedef struct { - dict *dict; -} scanLaterDictData; - -void scanLaterSetCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry*)_de; - scanLaterDictData *data = privdata; - sds sdsele = dictGetKey(de), newsds; - if ((newsds = activeDefragSds(sdsele))) - dictSetKey(data->dict, de, newsds); +/* Used as scan callback when all the work is done in the dictDefragFunctions. */ +void scanCallbackCountScanned(void *privdata, const dictEntry *de) { + UNUSED(privdata); + UNUSED(de); server.stat_active_defrag_scanned++; } @@ -427,28 +402,23 @@ void scanLaterSet(robj *ob, unsigned long *cursor) { if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT) return; dict *d = ob->ptr; - scanLaterDictData data = {d}; - *cursor = dictScanDefrag(d, *cursor, scanLaterSetCallback, activeDefragAlloc, &data); -} - -void scanLaterHashCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry*)_de; - scanLaterDictData *data = privdata; - sds sdsele = dictGetKey(de), newsds; - if ((newsds = activeDefragSds(sdsele))) - dictSetKey(data->dict, de, newsds); - sdsele = dictGetVal(de); - if ((newsds = activeDefragSds(sdsele))) - dictSetVal(data->dict, de, newsds); - server.stat_active_defrag_scanned++; + dictDefragFunctions defragfns = { + .defragAlloc = activeDefragAlloc, + .defragKey = (dictDefragAllocFunction *)activeDefragSds + }; + *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL); } void scanLaterHash(robj *ob, unsigned long *cursor) { if (ob->type != OBJ_HASH || ob->encoding != OBJ_ENCODING_HT) return; dict *d = ob->ptr; - scanLaterDictData data = {d}; - *cursor = dictScanDefrag(d, *cursor, scanLaterHashCallback, activeDefragAlloc, &data); + dictDefragFunctions defragfns = { + .defragAlloc = activeDefragAlloc, + .defragKey = (dictDefragAllocFunction *)activeDefragSds, + .defragVal = (dictDefragAllocFunction *)activeDefragSds + }; + *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL); } void defragQuicklist(redisDb *db, dictEntry *kde) { @@ -788,14 +758,6 @@ void defragScanCallback(void *privdata, const dictEntry *de) { server.stat_active_defrag_scanned++; } -/* Dummy scan callback used when defragging the expire dictionary. We only - * defrag the entries, which is done per bucket. */ -void defragExpireScanCallback(void *privdata, const dictEntry *de) { - UNUSED(privdata); - UNUSED(de); - server.stat_active_defrag_scanned++; -} - /* Utility function to get the fragmentation ratio from jemalloc. * It is critical to do that by comparing only heap maps that belong to * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this @@ -1003,6 +965,7 @@ void activeDefragCycle(void) { endtime = start + timelimit; latencyStartMonitor(latency); + dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; do { /* if we're not continuing a scan from the last call or loop, start a new one */ if (!cursor && !expires_cursor) { @@ -1055,13 +1018,13 @@ void activeDefragCycle(void) { /* Scan the keyspace dict unless we're scanning the expire dict. */ if (!expires_cursor) cursor = dictScanDefrag(db->dict, cursor, defragScanCallback, - activeDefragAlloc, db); + &defragfns, db); /* When done scanning the keyspace dict, we scan the expire dict. */ if (!cursor) expires_cursor = dictScanDefrag(db->expires, expires_cursor, - defragExpireScanCallback, - activeDefragAlloc, NULL); + scanCallbackCountScanned, + &defragfns, NULL); /* Once in 16 scan iterations, 512 pointer reallocations. or 64 keys * (if we have a lot of pointers in one hash bucket or rehashing), diff --git a/src/dict.c b/src/dict.c index ec2d4dca0..d46e30588 100644 --- a/src/dict.c +++ b/src/dict.c @@ -74,12 +74,19 @@ struct dictEntry { * by dictType's dictEntryMetadataBytes(). */ }; +typedef struct { + void *key; + dictEntry *next; +} dictEntryNoValue; + /* -------------------------- private prototypes ---------------------------- */ static int _dictExpandIfNeeded(dict *d); static signed char _dictNextExp(unsigned long size); -static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing); static int _dictInit(dict *d, dictType *type); +static dictEntry *dictGetNext(const dictEntry *de); +static dictEntry **dictGetNextRef(dictEntry *de); +static void dictSetNext(dictEntry *de, dictEntry *next); /* -------------------------- hash functions -------------------------------- */ @@ -107,6 +114,63 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) { return siphash_nocase(buf,len,dict_hash_function_seed); } +/* --------------------- dictEntry pointer bit tricks ---------------------- */ + +/* The 3 least significant bits in a pointer to a dictEntry determines what the + * pointer actually points to. If the least bit is set, it's a key. Otherwise, + * the bit pattern of the least 3 significant bits mark the kind of entry. */ + +#define ENTRY_PTR_MASK 7 /* 111 */ +#define ENTRY_PTR_NORMAL 0 /* 000 */ +#define ENTRY_PTR_NO_VALUE 2 /* 010 */ + +/* Returns 1 if the entry pointer is a pointer to a key, rather than to an + * allocated entry. Returns 0 otherwise. */ +static inline int entryIsKey(const dictEntry *de) { + return (uintptr_t)(void *)de & 1; +} + +/* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns + * 0 otherwise. */ +static inline int entryIsNormal(const dictEntry *de) { + return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NORMAL; +} + +/* Returns 1 if the entry is a special entry with key and next, but without + * value. Returns 0 otherwise. */ +static inline int entryIsNoValue(const dictEntry *de) { + return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NO_VALUE; +} + +/* Creates an entry without a value field. */ +static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) { + dictEntryNoValue *entry = zmalloc(sizeof(*entry)); + entry->key = key; + entry->next = next; + return (dictEntry *)(void *)((uintptr_t)(void *)entry | ENTRY_PTR_NO_VALUE); +} + +static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) { + assert(((uintptr_t)ptr & ENTRY_PTR_MASK) == 0); + return (dictEntry *)(void *)((uintptr_t)ptr | bits); +} + +static inline void *decodeMaskedPtr(const dictEntry *de) { + assert(!entryIsKey(de)); + return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK); +} + +/* Decodes the pointer to an entry without value, when you know it is an entry + * without value. Hint: Use entryIsNoValue to check. */ +static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) { + return decodeMaskedPtr(de); +} + +/* Returns 1 if the entry has a value field and 0 otherwise. */ +static inline int entryHasValue(const dictEntry *de) { + return entryIsNormal(de); +} + /* ----------------------------- API implementation ------------------------- */ /* Reset hash table parameters already initialized with _dictInit()*/ @@ -252,17 +316,42 @@ int dictRehash(dict *d, int n) { while(de) { uint64_t h; - nextde = de->next; + nextde = dictGetNext(de); + void *key = dictGetKey(de); /* Get the index in the new hash table */ if (d->ht_size_exp[1] > d->ht_size_exp[0]) { - h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]); + h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]); } else { /* We're shrinking the table. The tables sizes are powers of * two, so we simply mask the bucket index in the larger table * to get the bucket index in the smaller table. */ h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]); } - de->next = d->ht_table[1][h]; + if (d->type->no_value) { + if (d->type->keys_are_odd && !d->ht_table[1][h]) { + /* Destination bucket is empty and we can store the key + * directly without an allocated entry. Free the old entry + * if it's an allocated entry. + * + * TODO: Add a flag 'keys_are_even' and if set, we can use + * this optimization for these dicts too. We can set the LSB + * bit when stored as a dict entry and clear it again when + * we need the key back. */ + assert(entryIsKey(key)); + if (!entryIsKey(de)) zfree(decodeMaskedPtr(de)); + de = key; + } else if (entryIsKey(de)) { + /* We don't have an allocated entry but we need one. */ + de = createEntryNoValue(key, d->ht_table[1][h]); + } else { + /* Just move the existing entry to the destination table and + * update the 'next' field. */ + assert(entryIsNoValue(de)); + dictSetNext(de, d->ht_table[1][h]); + } + } else { + dictSetNext(de, d->ht_table[1][h]); + } d->ht_table[1][h] = de; d->ht_used[0]--; d->ht_used[1]++; @@ -334,7 +423,7 @@ int dictAdd(dict *d, void *key, void *val) dictEntry *entry = dictAddRaw(d,key,NULL); if (!entry) return DICT_ERR; - dictSetVal(d, entry, val); + if (!d->type->no_value) dictSetVal(d, entry, val); return DICT_OK; } @@ -358,33 +447,61 @@ int dictAdd(dict *d, void *key, void *val) */ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { - long index; + /* Get the position for the new key or NULL if the key already exists. */ + void *position = dictFindPositionForInsert(d, key, existing); + if (!position) return NULL; + + /* Dup the key if necessary. */ + if (d->type->keyDup) key = d->type->keyDup(d, key); + + return dictInsertAtPosition(d, key, position); +} + +/* Adds a key in the dict's hashtable at the position returned by a preceding + * call to dictFindPositionForInsert. This is a low level function which allows + * splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be + * used instead. */ +dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) { + dictEntry **bucket = position; /* It's a bucket, but the API hides that. */ dictEntry *entry; - int htidx; - - if (dictIsRehashing(d)) _dictRehashStep(d); - - /* Get the index of the new element, or -1 if - * the element already exists. */ - if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) - return NULL; - - /* Allocate the memory and store the new entry. - * Insert the element in top, with the assumption that in a database - * system it is more likely that recently added entries are accessed - * more frequently. */ - htidx = dictIsRehashing(d) ? 1 : 0; + /* If rehashing is ongoing, we insert in table 1, otherwise in table 0. + * Assert that the provided bucket is the right table. */ + int htidx = dictIsRehashing(d) ? 1 : 0; + assert(bucket >= &d->ht_table[htidx][0] && + bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]); size_t metasize = dictEntryMetadataSize(d); - entry = zmalloc(sizeof(*entry) + metasize); - if (metasize > 0) { - memset(dictEntryMetadata(entry), 0, metasize); + if (d->type->no_value) { + assert(!metasize); /* Entry metadata + no value not supported. */ + if (d->type->keys_are_odd && !*bucket) { + /* We can store the key directly in the destination bucket without the + * allocated entry. + * + * TODO: Add a flag 'keys_are_even' and if set, we can use this + * optimization for these dicts too. We can set the LSB bit when + * stored as a dict entry and clear it again when we need the key + * back. */ + entry = key; + assert(entryIsKey(entry)); + } else { + /* Allocate an entry without value. */ + entry = createEntryNoValue(key, *bucket); + } + } else { + /* Allocate the memory and store the new entry. + * Insert the element in top, with the assumption that in a database + * system it is more likely that recently added entries are accessed + * more frequently. */ + entry = zmalloc(sizeof(*entry) + metasize); + assert(entryIsNormal(entry)); /* Check alignment of allocation */ + if (metasize > 0) { + memset(dictEntryMetadata(entry), 0, metasize); + } + entry->key = key; + entry->next = *bucket; } - entry->next = d->ht_table[htidx][index]; - d->ht_table[htidx][index] = entry; + *bucket = entry; d->ht_used[htidx]++; - /* Set the hash entry fields. */ - dictSetKey(d, entry, key); return entry; } @@ -395,7 +512,7 @@ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) * operation. */ int dictReplace(dict *d, void *key, void *val) { - dictEntry *entry, *existing, auxentry; + dictEntry *entry, *existing; /* Try to add the element. If the key * does not exists dictAdd will succeed. */ @@ -410,9 +527,10 @@ int dictReplace(dict *d, void *key, void *val) * as the previous one. In this context, think to reference counting, * you want to increment (set), and then decrement (free), and not the * reverse. */ - auxentry = *existing; + void *oldval = dictGetVal(existing); dictSetVal(d, existing, val); - dictFreeVal(d, &auxentry); + if (d->type->valDestructor) + d->type->valDestructor(d, oldval); return 0; } @@ -448,12 +566,13 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { he = d->ht_table[table][idx]; prevHe = NULL; while(he) { - if (key==he->key || dictCompareKeys(d, key, he->key)) { + void *he_key = dictGetKey(he); + if (key == he_key || dictCompareKeys(d, key, he_key)) { /* Unlink the element from the list */ if (prevHe) - prevHe->next = he->next; + dictSetNext(prevHe, dictGetNext(he)); else - d->ht_table[table][idx] = he->next; + d->ht_table[table][idx] = dictGetNext(he); if (!nofree) { dictFreeUnlinkedEntry(d, he); } @@ -461,7 +580,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { return he; } prevHe = he; - he = he->next; + he = dictGetNext(he); } if (!dictIsRehashing(d)) break; } @@ -505,7 +624,7 @@ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { if (he == NULL) return; dictFreeKey(d, he); dictFreeVal(d, he); - zfree(he); + if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); } /* Destroy an entire dictionary */ @@ -520,10 +639,10 @@ int _dictClear(dict *d, int htidx, void(callback)(dict*)) { if ((he = d->ht_table[htidx][i]) == NULL) continue; while(he) { - nextHe = he->next; + nextHe = dictGetNext(he); dictFreeKey(d, he); dictFreeVal(d, he); - zfree(he); + if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); d->ht_used[htidx]--; he = nextHe; } @@ -555,9 +674,10 @@ dictEntry *dictFind(dict *d, const void *key) idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); he = d->ht_table[table][idx]; while(he) { - if (key==he->key || dictCompareKeys(d, key, he->key)) + void *he_key = dictGetKey(he); + if (key == he_key || dictCompareKeys(d, key, he_key)) return he; - he = he->next; + he = dictGetNext(he); } if (!dictIsRehashing(d)) return NULL; } @@ -597,14 +717,15 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink, for (table = 0; table <= 1; table++) { idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); dictEntry **ref = &d->ht_table[table][idx]; - while(*ref) { - if (key==(*ref)->key || dictCompareKeys(d, key, (*ref)->key)) { + while (ref && *ref) { + void *de_key = dictGetKey(*ref); + if (key == de_key || dictCompareKeys(d, key, de_key)) { *table_index = table; *plink = ref; dictPauseRehashing(d); return *ref; } - ref = &(*ref)->next; + ref = dictGetNextRef(*ref); } if (!dictIsRehashing(d)) return NULL; } @@ -614,14 +735,15 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink, void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index) { if (he == NULL) return; d->ht_used[table_index]--; - *plink = he->next; + *plink = dictGetNext(he); dictFreeKey(d, he); dictFreeVal(d, he); - zfree(he); + if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); dictResumeRehashing(d); } void dictSetKey(dict *d, dictEntry* de, void *key) { + assert(!d->type->no_value); if (d->type->keyDup) de->key = d->type->keyDup(d, key); else @@ -629,63 +751,104 @@ void dictSetKey(dict *d, dictEntry* de, void *key) { } void dictSetVal(dict *d, dictEntry *de, void *val) { + assert(entryHasValue(de)); de->v.val = d->type->valDup ? d->type->valDup(d, val) : val; } void dictSetSignedIntegerVal(dictEntry *de, int64_t val) { + assert(entryHasValue(de)); de->v.s64 = val; } void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val) { + assert(entryHasValue(de)); de->v.u64 = val; } void dictSetDoubleVal(dictEntry *de, double val) { + assert(entryHasValue(de)); de->v.d = val; } int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val) { + assert(entryHasValue(de)); return de->v.s64 += val; } uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val) { + assert(entryHasValue(de)); return de->v.u64 += val; } double dictIncrDoubleVal(dictEntry *de, double val) { + assert(entryHasValue(de)); return de->v.d += val; } /* A pointer to the metadata section within the dict entry. */ void *dictEntryMetadata(dictEntry *de) { + assert(entryHasValue(de)); return &de->metadata; } void *dictGetKey(const dictEntry *de) { + if (entryIsKey(de)) return (void*)de; + if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key; return de->key; } void *dictGetVal(const dictEntry *de) { + assert(entryHasValue(de)); return de->v.val; } int64_t dictGetSignedIntegerVal(const dictEntry *de) { + assert(entryHasValue(de)); return de->v.s64; } uint64_t dictGetUnsignedIntegerVal(const dictEntry *de) { + assert(entryHasValue(de)); return de->v.u64; } double dictGetDoubleVal(const dictEntry *de) { + assert(entryHasValue(de)); return de->v.d; } /* Returns a mutable reference to the value as a double within the entry. */ double *dictGetDoubleValPtr(dictEntry *de) { + assert(entryHasValue(de)); return &de->v.d; } +/* Returns the 'next' field of the entry or NULL if the entry doesn't have a + * 'next' field. */ +static dictEntry *dictGetNext(const dictEntry *de) { + if (entryIsKey(de)) return NULL; /* there's no next */ + if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next; + return de->next; +} + +/* Returns a pointer to the 'next' field in the entry or NULL if the entry + * doesn't have a next field. */ +static dictEntry **dictGetNextRef(dictEntry *de) { + if (entryIsKey(de)) return NULL; + if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next; + return &de->next; +} + +static void dictSetNext(dictEntry *de, dictEntry *next) { + assert(!entryIsKey(de)); + if (entryIsNoValue(de)) { + dictEntryNoValue *entry = decodeEntryNoValue(de); + entry->next = next; + } else { + de->next = next; + } +} + /* Returns the memory usage in bytes of the dict, excluding the size of the keys * and values. */ size_t dictMemUsage(const dict *d) { @@ -801,7 +964,7 @@ dictEntry *dictNext(dictIterator *iter) if (iter->entry) { /* We need to save the 'next' here, the iterator user * may delete the entry we are returning. */ - iter->nextEntry = iter->entry->next; + iter->nextEntry = dictGetNext(iter->entry); return iter->entry; } } @@ -847,12 +1010,12 @@ dictEntry *dictGetRandomKey(dict *d) listlen = 0; orighe = he; while(he) { - he = he->next; + he = dictGetNext(he); listlen++; } listele = random() % listlen; he = orighe; - while(listele--) he = he->next; + while(listele--) he = dictGetNext(he); return he; } @@ -936,7 +1099,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { * empty while iterating. */ *des = he; des++; - he = he->next; + he = dictGetNext(he); stored++; if (stored == count) return stored; } @@ -948,17 +1111,39 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { } -/* Reallocate the dictEntry allocations in a bucket using the provided - * allocation function in order to defrag them. */ -static void dictDefragBucket(dict *d, dictEntry **bucketref, dictDefragAllocFunction *allocfn) { +/* Reallocate the dictEntry, key and value allocations in a bucket using the + * provided allocation functions in order to defrag them. */ +static void dictDefragBucket(dict *d, dictEntry **bucketref, dictDefragFunctions *defragfns) { + dictDefragAllocFunction *defragalloc = defragfns->defragAlloc; + dictDefragAllocFunction *defragkey = defragfns->defragKey; + dictDefragAllocFunction *defragval = defragfns->defragVal; while (bucketref && *bucketref) { - dictEntry *de = *bucketref, *newde; - if ((newde = allocfn(de))) { + dictEntry *de = *bucketref, *newde = NULL; + void *newkey = defragkey ? defragkey(dictGetKey(de)) : NULL; + void *newval = defragval ? defragval(dictGetVal(de)) : NULL; + if (entryIsKey(de)) { + if (newkey) *bucketref = newkey; + assert(entryIsKey(*bucketref)); + } else if (entryIsNoValue(de)) { + dictEntryNoValue *entry = decodeEntryNoValue(de), *newentry; + if ((newentry = defragalloc(entry))) { + newde = encodeMaskedPtr(newentry, ENTRY_PTR_NO_VALUE); + entry = newentry; + } + if (newkey) entry->key = newkey; + } else { + assert(entryIsNormal(de)); + newde = defragalloc(de); + if (newde) de = newde; + if (newkey) de->key = newkey; + if (newval) de->v.val = newval; + } + if (newde) { *bucketref = newde; if (d->type->afterReplaceEntry) d->type->afterReplaceEntry(d, newde); } - bucketref = &(*bucketref)->next; + bucketref = dictGetNextRef(*bucketref); } } @@ -1094,14 +1279,14 @@ unsigned long dictScan(dict *d, * entries using the provided allocation function. This feature was added for * the active defrag feature. * - * The 'defracallocfn' callback is called with a pointer to memory that callback - * can reallocate. The callback should return a new memory address or NULL, + * The 'defragfns' callbacks are called with a pointer to memory that callback + * can reallocate. The callbacks should return a new memory address or NULL, * where NULL means that no reallocation happened and the old memory is still * valid. */ unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, - dictDefragAllocFunction *defragallocfn, + dictDefragFunctions *defragfns, void *privdata) { int htidx0, htidx1; @@ -1118,12 +1303,12 @@ unsigned long dictScanDefrag(dict *d, m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]); /* Emit entries at cursor */ - if (defragallocfn) { - dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragallocfn); + if (defragfns) { + dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragfns); } de = d->ht_table[htidx0][v & m0]; while (de) { - next = de->next; + next = dictGetNext(de); fn(privdata, de); de = next; } @@ -1151,12 +1336,12 @@ unsigned long dictScanDefrag(dict *d, m1 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx1]); /* Emit entries at cursor */ - if (defragallocfn) { - dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragallocfn); + if (defragfns) { + dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragfns); } de = d->ht_table[htidx0][v & m0]; while (de) { - next = de->next; + next = dictGetNext(de); fn(privdata, de); de = next; } @@ -1165,12 +1350,12 @@ unsigned long dictScanDefrag(dict *d, * of the index pointed to by the cursor in the smaller table */ do { /* Emit entries at cursor */ - if (defragallocfn) { - dictDefragBucket(d, &d->ht_table[htidx1][v & m1], defragallocfn); + if (defragfns) { + dictDefragBucket(d, &d->ht_table[htidx1][v & m1], defragfns); } de = d->ht_table[htidx1][v & m1]; while (de) { - next = de->next; + next = dictGetNext(de); fn(privdata, de); de = next; } @@ -1241,36 +1426,39 @@ static signed char _dictNextExp(unsigned long size) } } -/* Returns the index of a free slot that can be populated with - * a hash entry for the given 'key'. - * If the key already exists, -1 is returned - * and the optional output parameter may be filled. - * - * Note that if we are in the process of rehashing the hash table, the - * index is always returned in the context of the second (new) hash table. */ -static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing) -{ +/* Finds and returns the position within the dict where the provided key should + * be inserted using dictInsertAtPosition if the key does not already exist in + * the dict. If the key exists in the dict, NULL is returned and the optional + * 'existing' entry pointer is populated, if provided. */ +void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) { unsigned long idx, table; dictEntry *he; + uint64_t hash = dictHashKey(d, key); if (existing) *existing = NULL; + if (dictIsRehashing(d)) _dictRehashStep(d); /* Expand the hash table if needed */ if (_dictExpandIfNeeded(d) == DICT_ERR) - return -1; + return NULL; for (table = 0; table <= 1; table++) { idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); /* Search if this slot does not already contain the given key */ he = d->ht_table[table][idx]; while(he) { - if (key==he->key || dictCompareKeys(d, key, he->key)) { + void *he_key = dictGetKey(he); + if (key == he_key || dictCompareKeys(d, key, he_key)) { if (existing) *existing = he; - return -1; + return NULL; } - he = he->next; + he = dictGetNext(he); } if (!dictIsRehashing(d)) break; } - return idx; + + /* If we are in the process of rehashing the hash table, the bucket is + * always returned in the context of the second (new) hash table. */ + dictEntry **bucket = &d->ht_table[dictIsRehashing(d) ? 1 : 0][idx]; + return bucket; } void dictEmpty(dict *d, void(callback)(dict*)) { @@ -1302,9 +1490,9 @@ dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); he = d->ht_table[table][idx]; while(he) { - if (oldptr==he->key) + if (oldptr == dictGetKey(he)) return he; - he = he->next; + he = dictGetNext(he); } if (!dictIsRehashing(d)) return NULL; } @@ -1340,7 +1528,7 @@ size_t _dictGetStatsHt(char *buf, size_t bufsize, dict *d, int htidx) { he = d->ht_table[htidx][i]; while(he) { chainlen++; - he = he->next; + he = dictGetNext(he); } clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++; if (chainlen > maxchainlen) maxchainlen = chainlen; diff --git a/src/dict.h b/src/dict.h index a8a76e69c..06d4bff78 100644 --- a/src/dict.h +++ b/src/dict.h @@ -56,6 +56,19 @@ typedef struct dictType { void (*keyDestructor)(dict *d, void *key); void (*valDestructor)(dict *d, void *obj); int (*expandAllowed)(size_t moreMem, double usedRatio); + /* Flags */ + /* The 'no_value' flag, if set, indicates that values are not used, i.e. the + * dict is a set. When this flag is set, it's not possible to access the + * value of a dictEntry and it's also impossible to use dictSetKey(). Entry + * metadata can also not be used. */ + unsigned int no_value:1; + /* If no_value = 1 and all keys are odd (LSB=1), setting keys_are_odd = 1 + * enables one more optimization: to store a key without an allocated + * dictEntry. */ + unsigned int keys_are_odd:1; + /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that + * flag is set. */ + /* Allow each dict and dictEntry to carry extra caller-defined metadata. The * extra memory is initialized to 0 when allocated. */ size_t (*dictEntryMetadataBytes)(dict *d); @@ -100,6 +113,11 @@ typedef struct dictIterator { typedef void (dictScanFunction)(void *privdata, const dictEntry *de); typedef void *(dictDefragAllocFunction)(void *ptr); +typedef struct { + dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */ + dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */ + dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */ +} dictDefragFunctions; /* This is the initial size of every hash table */ #define DICT_HT_INITIAL_EXP 2 @@ -152,6 +170,8 @@ int dictTryExpand(dict *d, unsigned long size); void *dictMetadata(dict *d); int dictAdd(dict *d, void *key, void *val); dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); +void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing); +dictEntry *dictInsertAtPosition(dict *d, void *key, void *position); dictEntry *dictAddOrFind(dict *d, void *key); int dictReplace(dict *d, void *key, void *val); int dictDelete(dict *d, const void *key); @@ -200,7 +220,7 @@ int dictRehashMilliseconds(dict *d, int ms); void dictSetHashFunctionSeed(uint8_t *seed); uint8_t *dictGetHashFunctionSeed(void); unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata); -unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragAllocFunction *allocfn, void *privdata); +unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata); uint64_t dictGetHash(dict *d, const void *key); dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash); diff --git a/src/server.c b/src/server.c index 910f299f3..8b4ec28e4 100644 --- a/src/server.c +++ b/src/server.c @@ -448,7 +448,8 @@ dictType setDictType = { NULL, /* val dup */ dictSdsKeyCompare, /* key compare */ dictSdsDestructor, /* key destructor */ - NULL /* val destructor */ + .no_value = 1, /* no values in this dict */ + .keys_are_odd = 1 /* an SDS string is always an odd pointer */ }; /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ @@ -471,9 +472,9 @@ dictType dbDictType = { dictSdsDestructor, /* key destructor */ dictObjectDestructor, /* val destructor */ dictExpandAllowed, /* allow to expand */ - dbDictEntryMetadataSize, /* size of entry metadata in bytes */ - dbDictMetadataSize, /* size of dict metadata in bytes */ - dbDictAfterReplaceEntry /* notify entry moved/reallocated */ + .dictEntryMetadataBytes = dbDictEntryMetadataSize, + .dictMetadataBytes = dbDictMetadataSize, + .afterReplaceEntry = dbDictAfterReplaceEntry }; /* Db->expires */ diff --git a/src/t_set.c b/src/t_set.c index 6d8023f31..7b0dbed5a 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -122,17 +122,16 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd /* Avoid duping the string if it is an sds string. */ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len); dict *ht = set->ptr; - dictEntry *de = dictAddRaw(ht,sdsval,NULL); - if (de && sdsval == str) { - /* String was added but we don't own this sds string. Dup it and - * replace it in the dict entry. */ - dictSetKey(ht,de,sdsdup((sds)str)); - dictSetVal(ht,de,NULL); - } else if (!de && sdsval != str) { - /* String was already a member. Free our temporary sds copy. */ + void *position = dictFindPositionForInsert(ht, sdsval, NULL); + if (position) { + /* Key doesn't already exist in the set. Add it but dup the key. */ + if (sdsval == str) sdsval = sdsdup(sdsval); + dictInsertAtPosition(ht, sdsval, position); + } else if (sdsval != str) { + /* String is already a member. Free our temporary sds copy. */ sdsfree(sdsval); } - return (de != NULL); + return (position != NULL); } else if (set->encoding == OBJ_ENCODING_LISTPACK) { unsigned char *lp = set->ptr; unsigned char *p = lpFirst(lp); diff --git a/src/zmalloc.c b/src/zmalloc.c index 6d02604f4..e7b426277 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -60,8 +60,9 @@ void zlibc_free(void *ptr) { #ifdef HAVE_MALLOC_SIZE #define PREFIX_SIZE (0) #else -#if defined(__sun) || defined(__sparc) || defined(__sparc__) -#define PREFIX_SIZE (sizeof(long long)) +/* Use at least 8 bits alignment on all systems. */ +#if SIZE_MAX < 0xffffffffffffffffull +#define PREFIX_SIZE 8 #else #define PREFIX_SIZE (sizeof(size_t)) #endif