redict/src/dict.h

190 lines
7.1 KiB
C
Raw Normal View History

2009-03-22 05:30:00 -04:00
/* Hash Tables Implementation.
*
2012-10-03 00:58:36 -04:00
* This file implements in-memory hash tables with insert/del/replace/find/
* get-random-element operations. Hash tables will auto-resize if needed
2009-03-22 05:30:00 -04:00
* tables of power of two in size are used, collisions are handled by
* chaining. See the source code for more information... :)
*
2012-10-03 00:58:36 -04:00
* Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
2009-03-22 05:30:00 -04:00
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
2009-03-22 05:30:00 -04:00
#ifndef __DICT_H
#define __DICT_H
#define DICT_OK 0
#define DICT_ERR 1
/* Unused arguments generate annoying warnings... */
#define DICT_NOTUSED(V) ((void) V)
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
2009-03-22 05:30:00 -04:00
struct dictEntry *next;
} dictEntry;
typedef struct dictType {
Use SipHash hash function to mitigate HashDos attempts. This change attempts to switch to an hash function which mitigates the effects of the HashDoS attack (denial of service attack trying to force data structures to worst case behavior) while at the same time providing Redis with an hash function that does not expect the input data to be word aligned, a condition no longer true now that sds.c strings have a varialbe length header. Note that it is possible sometimes that even using an hash function for which collisions cannot be generated without knowing the seed, special implementation details or the exposure of the seed in an indirect way (for example the ability to add elements to a Set and check the return in which Redis returns them with SMEMBERS) may make the attacker's life simpler in the process of trying to guess the correct seed, however the next step would be to switch to a log(N) data structure when too many items in a single bucket are detected: this seems like an overkill in the case of Redis. SPEED REGRESION TESTS: In order to verify that switching from MurmurHash to SipHash had no impact on speed, a set of benchmarks involving fast insertion of 5 million of keys were performed. The result shows Redis with SipHash in high pipelining conditions to be about 4% slower compared to using the previous hash function. However this could partially be related to the fact that the current implementation does not attempt to hash whole words at a time but reads single bytes, in order to have an output which is endian-netural and at the same time working on systems where unaligned memory accesses are a problem. Further X86 specific optimizations should be tested, the function may easily get at the same level of MurMurHash2 if a few optimizations are performed.
2017-02-20 10:09:54 -05:00
uint64_t (*hashFunction)(const void *key);
2009-03-22 05:30:00 -04:00
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
2009-03-22 05:30:00 -04:00
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;
typedef struct dict {
dictType *type;
2009-03-22 05:30:00 -04:00
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
unsigned long iterators; /* number of iterators currently running */
2009-03-22 05:30:00 -04:00
} dict;
2012-10-03 00:58:36 -04:00
/* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
2009-03-22 05:30:00 -04:00
typedef struct dictIterator {
dict *d;
long index;
int table, safe;
2009-03-22 05:30:00 -04:00
dictEntry *entry, *nextEntry;
/* unsafe iterator fingerprint for misuse detection. */
long long fingerprint;
2009-03-22 05:30:00 -04:00
} dictIterator;
2012-07-09 04:00:26 -04:00
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
2016-12-29 20:37:52 -05:00
typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref);
2012-07-09 04:00:26 -04:00
2009-03-22 05:30:00 -04:00
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 4
2009-03-22 05:30:00 -04:00
/* ------------------------------- Macros ------------------------------------*/
#define dictFreeVal(d, entry) \
if ((d)->type->valDestructor) \
(d)->type->valDestructor((d)->privdata, (entry)->v.val)
2009-03-22 05:30:00 -04:00
#define dictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
(entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \
2009-03-22 05:30:00 -04:00
else \
(entry)->v.val = (_val_); \
2009-03-22 05:30:00 -04:00
} while(0)
#define dictSetSignedIntegerVal(entry, _val_) \
do { (entry)->v.s64 = _val_; } while(0)
#define dictSetUnsignedIntegerVal(entry, _val_) \
do { (entry)->v.u64 = _val_; } while(0)
#define dictSetDoubleVal(entry, _val_) \
do { (entry)->v.d = _val_; } while(0)
#define dictFreeKey(d, entry) \
if ((d)->type->keyDestructor) \
(d)->type->keyDestructor((d)->privdata, (entry)->key)
2009-03-22 05:30:00 -04:00
#define dictSetKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
(entry)->key = (d)->type->keyDup((d)->privdata, _key_); \
2009-03-22 05:30:00 -04:00
else \
(entry)->key = (_key_); \
2009-03-22 05:30:00 -04:00
} while(0)
#define dictCompareKeys(d, key1, key2) \
(((d)->type->keyCompare) ? \
(d)->type->keyCompare((d)->privdata, key1, key2) : \
2009-03-22 05:30:00 -04:00
(key1) == (key2))
#define dictHashKey(d, key) (d)->type->hashFunction(key)
#define dictGetKey(he) ((he)->key)
#define dictGetVal(he) ((he)->v.val)
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)
#define dictGetDoubleVal(he) ((he)->v.d)
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
#define dictIsRehashing(d) ((d)->rehashidx != -1)
2009-03-22 05:30:00 -04:00
/* API */
dict *dictCreate(dictType *type, void *privDataPtr);
2010-04-15 12:07:57 -04:00
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddOrFind(dict *d, void *key);
2010-04-15 12:07:57 -04:00
int dictReplace(dict *d, void *key, void *val);
int dictDelete(dict *d, const void *key);
dictEntry *dictUnlink(dict *ht, const void *key);
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
2010-04-15 12:07:57 -04:00
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
2010-04-15 12:07:57 -04:00
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
2009-03-22 05:30:00 -04:00
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
2010-04-15 12:07:57 -04:00
dictEntry *dictGetRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
Use SipHash hash function to mitigate HashDos attempts. This change attempts to switch to an hash function which mitigates the effects of the HashDoS attack (denial of service attack trying to force data structures to worst case behavior) while at the same time providing Redis with an hash function that does not expect the input data to be word aligned, a condition no longer true now that sds.c strings have a varialbe length header. Note that it is possible sometimes that even using an hash function for which collisions cannot be generated without knowing the seed, special implementation details or the exposure of the seed in an indirect way (for example the ability to add elements to a Set and check the return in which Redis returns them with SMEMBERS) may make the attacker's life simpler in the process of trying to guess the correct seed, however the next step would be to switch to a log(N) data structure when too many items in a single bucket are detected: this seems like an overkill in the case of Redis. SPEED REGRESION TESTS: In order to verify that switching from MurmurHash to SipHash had no impact on speed, a set of benchmarks involving fast insertion of 5 million of keys were performed. The result shows Redis with SipHash in high pipelining conditions to be about 4% slower compared to using the previous hash function. However this could partially be related to the fact that the current implementation does not attempt to hash whole words at a time but reads single bytes, in order to have an output which is endian-netural and at the same time working on systems where unaligned memory accesses are a problem. Further X86 specific optimizations should be tested, the function may easily get at the same level of MurMurHash2 if a few optimizations are performed.
2017-02-20 10:09:54 -05:00
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
2010-04-15 12:07:57 -04:00
int dictRehashMilliseconds(dict *d, int ms);
Use SipHash hash function to mitigate HashDos attempts. This change attempts to switch to an hash function which mitigates the effects of the HashDoS attack (denial of service attack trying to force data structures to worst case behavior) while at the same time providing Redis with an hash function that does not expect the input data to be word aligned, a condition no longer true now that sds.c strings have a varialbe length header. Note that it is possible sometimes that even using an hash function for which collisions cannot be generated without knowing the seed, special implementation details or the exposure of the seed in an indirect way (for example the ability to add elements to a Set and check the return in which Redis returns them with SMEMBERS) may make the attacker's life simpler in the process of trying to guess the correct seed, however the next step would be to switch to a log(N) data structure when too many items in a single bucket are detected: this seems like an overkill in the case of Redis. SPEED REGRESION TESTS: In order to verify that switching from MurmurHash to SipHash had no impact on speed, a set of benchmarks involving fast insertion of 5 million of keys were performed. The result shows Redis with SipHash in high pipelining conditions to be about 4% slower compared to using the previous hash function. However this could partially be related to the fact that the current implementation does not attempt to hash whole words at a time but reads single bytes, in order to have an output which is endian-netural and at the same time working on systems where unaligned memory accesses are a problem. Further X86 specific optimizations should be tested, the function may easily get at the same level of MurMurHash2 if a few optimizations are performed.
2017-02-20 10:09:54 -05:00
void dictSetHashFunctionSeed(uint8_t *seed);
uint8_t *dictGetHashFunctionSeed(void);
2016-12-29 20:37:52 -05:00
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
unsigned int dictGetHash(dict *d, const void *key);
2017-01-02 02:42:32 -05:00
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash);
2009-03-22 05:30:00 -04:00
/* Hash table types */
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;
#endif /* __DICT_H */