From 156929ee974ce968585144b0d0226e56622e6add Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 Mar 2014 16:24:35 +0100 Subject: [PATCH] HLLADD implemented. --- src/hyperloglog.c | 94 +++++++++++++++++++++++++++++++++++++++++++++-- src/redis.c | 1 + src/redis.h | 1 + 3 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 8874cde7d..b426efc51 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -29,8 +29,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include "redis.h" -#include "crc64.h" /* The Redis HyperLogLog implementation is based on the following ideas: * @@ -182,6 +182,42 @@ /* ========================= HyperLogLog algorithm ========================= */ +/* Our hahs function is MurmurHash2, 64 bit version. */ +uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + uint64_t h = seed ^ (len * m); + const uint64_t *data = (const uint64_t *)key; + const uint64_t *end = data + (len/8); + + while(data != end) { + uint64_t k = *data++; + k *= m; + k ^= k >> r; + k *= m; + h ^= k; + h *= m; + } + + const unsigned char *data2 = (const unsigned char*)data; + + switch(len & 7) { + case 7: h ^= (uint64_t)data2[6] << 48; + case 6: h ^= (uint64_t)data2[5] << 40; + case 5: h ^= (uint64_t)data2[4] << 32; + case 4: h ^= (uint64_t)data2[3] << 24; + case 3: h ^= (uint64_t)data2[2] << 16; + case 2: h ^= (uint64_t)data2[1] << 8; + case 1: h ^= (uint64_t)data2[0]; + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + return h; +} + /* "Add" the element in the hyperloglog data structure. * Actually nothing is added, but the max 0 pattern counter of the subset * the element belongs to is incremented if needed. @@ -193,7 +229,7 @@ * The function always succeed, however if as a result of the operation * the approximated cardinality changed, 1 is returned. Otherwise 0 * is returned. */ -int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) { +int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { uint64_t hash, bit, index; uint8_t oldcount, count; @@ -203,7 +239,7 @@ int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) { * * This may sound like inefficient, but actually in the average case * there are high probabilities to find a 1 after a few iterations. */ - hash = crc64(0,ele,elesize); + hash = MurmurHash64A(ele,elesize,0); bit = REDIS_HLL_REGISTERS; count = 0; while((hash & bit) == 0) { @@ -215,6 +251,7 @@ int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) { bit <<= 1; } + /* Update the register if this element produced a longer run of zeroes. */ index = hash & REDIS_HLL_P_MASK; /* Index a register inside registers. */ HLL_GET_REGISTER(oldcount,registers,index); if (count > oldcount) { @@ -227,6 +264,57 @@ int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) { /* ========================== HyperLogLog commands ========================== */ +/* HLLADD var ele ele ele ... ele => :0 or :1 */ +void hllAddCommand(redisClient *c) { + robj *o = lookupKeyWrite(c->db,c->argv[1]); + uint8_t *registers; + int updated = 0, j; + + if (o == NULL) { + /* Create the key with a string value of the exact length to + * hold our HLL data structure. sdsnewlen() when NULL is passed + * is guaranteed to return bytes initialized to zero. */ + o = createObject(REDIS_STRING,sdsnewlen(NULL,REDIS_HLL_SIZE)); + dbAdd(c->db,c->argv[1],o); + } else { + /* Key exists, check type */ + if (checkType(c,o,REDIS_STRING)) + return; + + /* If this is a string representing an HLL, the size should match + * exactly. */ + if (stringObjectLen(o) != REDIS_HLL_SIZE) { + addReplyErrorFormat(c, + "HLLADD target key must contain a %d bytes string.", + REDIS_HLL_SIZE); + return; + } + + /* If the object is shared or encoded, we have to make a copy. */ + if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) { + robj *decoded = getDecodedObject(o); + o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr)); + decrRefCount(decoded); + dbOverwrite(c->db,c->argv[1],o); + } + } + /* Perform the low level ADD operation for every element. */ + registers = (uint8_t*) o->ptr; + for (j = 2; j < c->argc; j++) { + if (hllAdd(registers, (unsigned char*)c->argv[j]->ptr, + sdslen(c->argv[j]->ptr))) + { + updated++; + } + } + if (updated) { + signalModifiedKey(c->db,c->argv[1]); + notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"hlladd",c->argv[1],c->db->id); + server.dirty++; + } + addReply(c, updated ? shared.cone : shared.czero); +} + /* This command performs a self-test of the HLL registers implementation. * Something that is not easy to test from within the outside. * diff --git a/src/redis.c b/src/redis.c index 07ff7c201..b2ddd8b2b 100644 --- a/src/redis.c +++ b/src/redis.c @@ -269,6 +269,7 @@ struct redisCommand redisCommandTable[] = { {"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0}, {"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0}, {"hllselftest",hllSelftestCommand,1,"r",0,NULL,0,0,0,0,0}, + {"hlladd",hllAddCommand,-2,"wm",0,NULL,1,1,1,0,0} }; struct evictionPoolEntry *evictionPoolAlloc(void); diff --git a/src/redis.h b/src/redis.h index 106d22ba9..197793562 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1448,6 +1448,7 @@ void bitposCommand(redisClient *c); void replconfCommand(redisClient *c); void waitCommand(redisClient *c); void hllSelftestCommand(redisClient *c); +void hllAddCommand(redisClient *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated));