mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-24 00:59:02 -05:00
894eba07c8
Previously two string encodings were used for string objects: 1) REDIS_ENCODING_RAW: a string object with obj->ptr pointing to an sds stirng. 2) REDIS_ENCODING_INT: a string object where the obj->ptr void pointer is casted to a long. This commit introduces a experimental new encoding called REDIS_ENCODING_EMBSTR that implements an object represented by an sds string that is not modifiable but allocated in the same memory chunk as the robj structure itself. The chunk looks like the following: +--------------+-----------+------------+--------+----+ | robj data... | robj->ptr | sds header | string | \0 | +--------------+-----+-----+------------+--------+----+ | ^ +-----------------------+ The robj->ptr points to the contiguous sds string data, so the object can be manipulated with the same functions used to manipulate plan string objects, however we need just on malloc and one free in order to allocate or release this kind of objects. Moreover it has better cache locality. This new allocation strategy should benefit both the memory usage and the performances. A performance gain between 60 and 70% was observed during micro-benchmarks, however there is more work to do to evaluate the performance impact and the memory usage behavior.
413 lines
15 KiB
C
413 lines
15 KiB
C
/* Bit operations.
|
|
*
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "redis.h"
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* Helpers and low level bit functions.
|
|
* -------------------------------------------------------------------------- */
|
|
|
|
/* This helper function used by GETBIT / SETBIT parses the bit offset argument
|
|
* making sure an error is returned if it is negative or if it overflows
|
|
* Redis 512 MB limit for the string value. */
|
|
static int getBitOffsetFromArgument(redisClient *c, robj *o, size_t *offset) {
|
|
long long loffset;
|
|
char *err = "bit offset is not an integer or out of range";
|
|
|
|
if (getLongLongFromObjectOrReply(c,o,&loffset,err) != REDIS_OK)
|
|
return REDIS_ERR;
|
|
|
|
/* Limit offset to 512MB in bytes */
|
|
if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024))
|
|
{
|
|
addReplyError(c,err);
|
|
return REDIS_ERR;
|
|
}
|
|
|
|
*offset = (size_t)loffset;
|
|
return REDIS_OK;
|
|
}
|
|
|
|
/* Count number of bits set in the binary array pointed by 's' and long
|
|
* 'count' bytes. The implementation of this function is required to
|
|
* work with a input string length up to 512 MB. */
|
|
size_t redisPopcount(void *s, long count) {
|
|
size_t bits = 0;
|
|
unsigned char *p;
|
|
uint32_t *p4 = s;
|
|
static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8};
|
|
|
|
/* Count bits 16 bytes at a time */
|
|
while(count>=16) {
|
|
uint32_t aux1, aux2, aux3, aux4;
|
|
|
|
aux1 = *p4++;
|
|
aux2 = *p4++;
|
|
aux3 = *p4++;
|
|
aux4 = *p4++;
|
|
count -= 16;
|
|
|
|
aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
|
|
aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
|
|
aux2 = aux2 - ((aux2 >> 1) & 0x55555555);
|
|
aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333);
|
|
aux3 = aux3 - ((aux3 >> 1) & 0x55555555);
|
|
aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
|
|
aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
|
|
aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
|
|
bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
|
|
((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
|
|
((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
|
|
((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24);
|
|
}
|
|
/* Count the remaining bytes */
|
|
p = (unsigned char*)p4;
|
|
while(count--) bits += bitsinbyte[*p++];
|
|
return bits;
|
|
}
|
|
|
|
/* -----------------------------------------------------------------------------
|
|
* Bits related string commands: GETBIT, SETBIT, BITCOUNT, BITOP.
|
|
* -------------------------------------------------------------------------- */
|
|
|
|
#define BITOP_AND 0
|
|
#define BITOP_OR 1
|
|
#define BITOP_XOR 2
|
|
#define BITOP_NOT 3
|
|
|
|
/* SETBIT key offset bitvalue */
|
|
void setbitCommand(redisClient *c) {
|
|
robj *o;
|
|
char *err = "bit is not an integer or out of range";
|
|
size_t bitoffset;
|
|
int byte, bit;
|
|
int byteval, bitval;
|
|
long on;
|
|
|
|
if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
|
|
return;
|
|
|
|
if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != REDIS_OK)
|
|
return;
|
|
|
|
/* Bits can only be set or cleared... */
|
|
if (on & ~1) {
|
|
addReplyError(c,err);
|
|
return;
|
|
}
|
|
|
|
o = lookupKeyWrite(c->db,c->argv[1]);
|
|
if (o == NULL) {
|
|
o = createObject(REDIS_STRING,sdsempty());
|
|
dbAdd(c->db,c->argv[1],o);
|
|
} else {
|
|
if (checkType(c,o,REDIS_STRING)) return;
|
|
|
|
/* Create a copy when the object is shared or encoded. */
|
|
if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
|
|
robj *decoded = getDecodedObject(o);
|
|
o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
|
|
decrRefCount(decoded);
|
|
dbOverwrite(c->db,c->argv[1],o);
|
|
}
|
|
}
|
|
|
|
/* Grow sds value to the right length if necessary */
|
|
byte = bitoffset >> 3;
|
|
o->ptr = sdsgrowzero(o->ptr,byte+1);
|
|
|
|
/* Get current values */
|
|
byteval = ((uint8_t*)o->ptr)[byte];
|
|
bit = 7 - (bitoffset & 0x7);
|
|
bitval = byteval & (1 << bit);
|
|
|
|
/* Update byte with new bit value and return original value */
|
|
byteval &= ~(1 << bit);
|
|
byteval |= ((on & 0x1) << bit);
|
|
((uint8_t*)o->ptr)[byte] = byteval;
|
|
signalModifiedKey(c->db,c->argv[1]);
|
|
notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
|
|
server.dirty++;
|
|
addReply(c, bitval ? shared.cone : shared.czero);
|
|
}
|
|
|
|
/* GETBIT key offset */
|
|
void getbitCommand(redisClient *c) {
|
|
robj *o;
|
|
char llbuf[32];
|
|
size_t bitoffset;
|
|
size_t byte, bit;
|
|
size_t bitval = 0;
|
|
|
|
if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
|
|
return;
|
|
|
|
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
|
|
checkType(c,o,REDIS_STRING)) return;
|
|
|
|
byte = bitoffset >> 3;
|
|
bit = 7 - (bitoffset & 0x7);
|
|
if (sdsEncodedObject(o)) {
|
|
if (byte < sdslen(o->ptr))
|
|
bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit);
|
|
} else {
|
|
if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)o->ptr))
|
|
bitval = llbuf[byte] & (1 << bit);
|
|
}
|
|
|
|
addReply(c, bitval ? shared.cone : shared.czero);
|
|
}
|
|
|
|
/* BITOP op_name target_key src_key1 src_key2 src_key3 ... src_keyN */
|
|
void bitopCommand(redisClient *c) {
|
|
char *opname = c->argv[1]->ptr;
|
|
robj *o, *targetkey = c->argv[2];
|
|
long op, j, numkeys;
|
|
robj **objects; /* Array of source objects. */
|
|
unsigned char **src; /* Array of source strings pointers. */
|
|
long *len, maxlen = 0; /* Array of length of src strings, and max len. */
|
|
long minlen = 0; /* Min len among the input keys. */
|
|
unsigned char *res = NULL; /* Resulting string. */
|
|
|
|
/* Parse the operation name. */
|
|
if ((opname[0] == 'a' || opname[0] == 'A') && !strcasecmp(opname,"and"))
|
|
op = BITOP_AND;
|
|
else if((opname[0] == 'o' || opname[0] == 'O') && !strcasecmp(opname,"or"))
|
|
op = BITOP_OR;
|
|
else if((opname[0] == 'x' || opname[0] == 'X') && !strcasecmp(opname,"xor"))
|
|
op = BITOP_XOR;
|
|
else if((opname[0] == 'n' || opname[0] == 'N') && !strcasecmp(opname,"not"))
|
|
op = BITOP_NOT;
|
|
else {
|
|
addReply(c,shared.syntaxerr);
|
|
return;
|
|
}
|
|
|
|
/* Sanity check: NOT accepts only a single key argument. */
|
|
if (op == BITOP_NOT && c->argc != 4) {
|
|
addReplyError(c,"BITOP NOT must be called with a single source key.");
|
|
return;
|
|
}
|
|
|
|
/* Lookup keys, and store pointers to the string objects into an array. */
|
|
numkeys = c->argc - 3;
|
|
src = zmalloc(sizeof(unsigned char*) * numkeys);
|
|
len = zmalloc(sizeof(long) * numkeys);
|
|
objects = zmalloc(sizeof(robj*) * numkeys);
|
|
for (j = 0; j < numkeys; j++) {
|
|
o = lookupKeyRead(c->db,c->argv[j+3]);
|
|
/* Handle non-existing keys as empty strings. */
|
|
if (o == NULL) {
|
|
objects[j] = NULL;
|
|
src[j] = NULL;
|
|
len[j] = 0;
|
|
minlen = 0;
|
|
continue;
|
|
}
|
|
/* Return an error if one of the keys is not a string. */
|
|
if (checkType(c,o,REDIS_STRING)) {
|
|
for (j = j-1; j >= 0; j--) {
|
|
if (objects[j])
|
|
decrRefCount(objects[j]);
|
|
}
|
|
zfree(src);
|
|
zfree(len);
|
|
zfree(objects);
|
|
return;
|
|
}
|
|
objects[j] = getDecodedObject(o);
|
|
src[j] = objects[j]->ptr;
|
|
len[j] = sdslen(objects[j]->ptr);
|
|
if (len[j] > maxlen) maxlen = len[j];
|
|
if (j == 0 || len[j] < minlen) minlen = len[j];
|
|
}
|
|
|
|
/* Compute the bit operation, if at least one string is not empty. */
|
|
if (maxlen) {
|
|
res = (unsigned char*) sdsnewlen(NULL,maxlen);
|
|
unsigned char output, byte;
|
|
long i;
|
|
|
|
/* Fast path: as far as we have data for all the input bitmaps we
|
|
* can take a fast path that performs much better than the
|
|
* vanilla algorithm. */
|
|
j = 0;
|
|
if (minlen && numkeys <= 16) {
|
|
unsigned long *lp[16];
|
|
unsigned long *lres = (unsigned long*) res;
|
|
|
|
/* Note: sds pointer is always aligned to 8 byte boundary. */
|
|
memcpy(lp,src,sizeof(unsigned long*)*numkeys);
|
|
memcpy(res,src[0],minlen);
|
|
|
|
/* Different branches per different operations for speed (sorry). */
|
|
if (op == BITOP_AND) {
|
|
while(minlen >= sizeof(unsigned long)*4) {
|
|
for (i = 1; i < numkeys; i++) {
|
|
lres[0] &= lp[i][0];
|
|
lres[1] &= lp[i][1];
|
|
lres[2] &= lp[i][2];
|
|
lres[3] &= lp[i][3];
|
|
lp[i]+=4;
|
|
}
|
|
lres+=4;
|
|
j += sizeof(unsigned long)*4;
|
|
minlen -= sizeof(unsigned long)*4;
|
|
}
|
|
} else if (op == BITOP_OR) {
|
|
while(minlen >= sizeof(unsigned long)*4) {
|
|
for (i = 1; i < numkeys; i++) {
|
|
lres[0] |= lp[i][0];
|
|
lres[1] |= lp[i][1];
|
|
lres[2] |= lp[i][2];
|
|
lres[3] |= lp[i][3];
|
|
lp[i]+=4;
|
|
}
|
|
lres+=4;
|
|
j += sizeof(unsigned long)*4;
|
|
minlen -= sizeof(unsigned long)*4;
|
|
}
|
|
} else if (op == BITOP_XOR) {
|
|
while(minlen >= sizeof(unsigned long)*4) {
|
|
for (i = 1; i < numkeys; i++) {
|
|
lres[0] ^= lp[i][0];
|
|
lres[1] ^= lp[i][1];
|
|
lres[2] ^= lp[i][2];
|
|
lres[3] ^= lp[i][3];
|
|
lp[i]+=4;
|
|
}
|
|
lres+=4;
|
|
j += sizeof(unsigned long)*4;
|
|
minlen -= sizeof(unsigned long)*4;
|
|
}
|
|
} else if (op == BITOP_NOT) {
|
|
while(minlen >= sizeof(unsigned long)*4) {
|
|
lres[0] = ~lres[0];
|
|
lres[1] = ~lres[1];
|
|
lres[2] = ~lres[2];
|
|
lres[3] = ~lres[3];
|
|
lres+=4;
|
|
j += sizeof(unsigned long)*4;
|
|
minlen -= sizeof(unsigned long)*4;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* j is set to the next byte to process by the previous loop. */
|
|
for (; j < maxlen; j++) {
|
|
output = (len[0] <= j) ? 0 : src[0][j];
|
|
if (op == BITOP_NOT) output = ~output;
|
|
for (i = 1; i < numkeys; i++) {
|
|
byte = (len[i] <= j) ? 0 : src[i][j];
|
|
switch(op) {
|
|
case BITOP_AND: output &= byte; break;
|
|
case BITOP_OR: output |= byte; break;
|
|
case BITOP_XOR: output ^= byte; break;
|
|
}
|
|
}
|
|
res[j] = output;
|
|
}
|
|
}
|
|
for (j = 0; j < numkeys; j++) {
|
|
if (objects[j])
|
|
decrRefCount(objects[j]);
|
|
}
|
|
zfree(src);
|
|
zfree(len);
|
|
zfree(objects);
|
|
|
|
/* Store the computed value into the target key */
|
|
if (maxlen) {
|
|
o = createObject(REDIS_STRING,res);
|
|
setKey(c->db,targetkey,o);
|
|
notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",targetkey,c->db->id);
|
|
decrRefCount(o);
|
|
} else if (dbDelete(c->db,targetkey)) {
|
|
signalModifiedKey(c->db,targetkey);
|
|
notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",targetkey,c->db->id);
|
|
}
|
|
server.dirty++;
|
|
addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */
|
|
}
|
|
|
|
/* BITCOUNT key [start end] */
|
|
void bitcountCommand(redisClient *c) {
|
|
robj *o;
|
|
long start, end, strlen;
|
|
unsigned char *p;
|
|
char llbuf[32];
|
|
|
|
/* Lookup, check for type, and return 0 for non existing keys. */
|
|
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
|
|
checkType(c,o,REDIS_STRING)) return;
|
|
|
|
/* Set the 'p' pointer to the string, that can be just a stack allocated
|
|
* array if our string was integer encoded. */
|
|
if (o->encoding == REDIS_ENCODING_INT) {
|
|
p = (unsigned char*) llbuf;
|
|
strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
|
|
} else {
|
|
p = (unsigned char*) o->ptr;
|
|
strlen = sdslen(o->ptr);
|
|
}
|
|
|
|
/* Parse start/end range if any. */
|
|
if (c->argc == 4) {
|
|
if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK)
|
|
return;
|
|
if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)
|
|
return;
|
|
/* Convert negative indexes */
|
|
if (start < 0) start = strlen+start;
|
|
if (end < 0) end = strlen+end;
|
|
if (start < 0) start = 0;
|
|
if (end < 0) end = 0;
|
|
if (end >= strlen) end = strlen-1;
|
|
} else if (c->argc == 2) {
|
|
/* The whole string. */
|
|
start = 0;
|
|
end = strlen-1;
|
|
} else {
|
|
/* Syntax error. */
|
|
addReply(c,shared.syntaxerr);
|
|
return;
|
|
}
|
|
|
|
/* Precondition: end >= 0 && end < strlen, so the only condition where
|
|
* zero can be returned is: start > end. */
|
|
if (start > end) {
|
|
addReply(c,shared.czero);
|
|
} else {
|
|
long bytes = end-start+1;
|
|
|
|
addReplyLongLong(c,redisPopcount(p+start,bytes));
|
|
}
|
|
}
|