bitpos/bitcount add bit index (#9324)

Make bitpos/bitcount support bit index:

```
BITPOS key bit [start [end [BIT|BYTE]]]
BITCOUNT key [start end [BIT|BYTE]]
```

The default behavior is `BYTE`, so these commands are still compatible with old.
This commit is contained in:
Huang Zhw 2021-09-12 16:31:22 +08:00 committed by GitHub
parent 418c2e7931
commit 75dd230994
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 290 additions and 45 deletions

View File

@ -789,12 +789,15 @@ void bitopCommand(client *c) {
addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */
}
/* BITCOUNT key [start end] */
/* BITCOUNT key [start end [BIT|BYTE]] */
void bitcountCommand(client *c) {
robj *o;
long start, end, strlen;
long long start, end;
long strlen;
unsigned char *p;
char llbuf[LONG_STR_SIZE];
int isbit = 0;
unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0;
/* Lookup, check for type, and return 0 for non existing keys. */
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
@ -802,21 +805,41 @@ void bitcountCommand(client *c) {
p = getObjectReadOnlyString(o,&strlen,llbuf);
/* Parse start/end range if any. */
if (c->argc == 4) {
if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
if (c->argc == 4 || c->argc == 5) {
long long totlen = strlen;
/* Make sure we will not overflow */
serverAssert(totlen <= LLONG_MAX >> 3);
if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
return;
if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
return;
/* Convert negative indexes */
if (start < 0 && end < 0 && start > end) {
addReply(c,shared.czero);
return;
}
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (c->argc == 5) {
if (!strcasecmp(c->argv[4]->ptr,"bit")) isbit = 1;
else if (!strcasecmp(c->argv[4]->ptr,"byte")) isbit = 0;
else {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
if (isbit) totlen <<= 3;
if (start < 0) start = totlen+start;
if (end < 0) end = totlen+end;
if (start < 0) start = 0;
if (end < 0) end = 0;
if (end >= strlen) end = strlen-1;
if (end >= totlen) end = totlen-1;
if (isbit && start <= end) {
/* Before converting bit offset to byte offset, create negative masks
* for the edges. */
first_byte_neg_mask = ~((1<<(8-(start&7)))-1) & 0xFF;
last_byte_neg_mask = (1<<(7-(end&7)))-1;
start >>= 3;
end >>= 3;
}
} else if (c->argc == 2) {
/* The whole string. */
start = 0;
@ -832,19 +855,30 @@ void bitcountCommand(client *c) {
if (start > end) {
addReply(c,shared.czero);
} else {
long bytes = end-start+1;
addReplyLongLong(c,redisPopcount(p+start,bytes));
long bytes = (long)(end-start+1);
long long count = redisPopcount(p+start,bytes);
if (first_byte_neg_mask != 0 || last_byte_neg_mask != 0) {
unsigned char firstlast[2] = {0, 0};
/* We may count bits of first byte and last byte which are out of
* range. So we need to subtract them. Here we use a trick. We set
* bits in the range to zero. So these bit will not be excluded. */
if (first_byte_neg_mask != 0) firstlast[0] = p[start] & first_byte_neg_mask;
if (last_byte_neg_mask != 0) firstlast[1] = p[end] & last_byte_neg_mask;
count -= redisPopcount(firstlast,2);
}
addReplyLongLong(c,count);
}
}
/* BITPOS key bit [start [end]] */
/* BITPOS key bit [start [end [BIT|BYTE]]] */
void bitposCommand(client *c) {
robj *o;
long bit, start, end, strlen;
long long start, end;
long bit, strlen;
unsigned char *p;
char llbuf[LONG_STR_SIZE];
int end_given = 0;
int isbit = 0, end_given = 0;
unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0;
/* Parse the bit argument to understand what we are looking for, set
* or clear bits. */
@ -856,7 +890,7 @@ void bitposCommand(client *c) {
}
/* If the key does not exist, from our point of view it is an infinite
* array of 0 bits. If the user is looking for the fist clear bit return 0,
* array of 0 bits. If the user is looking for the first clear bit return 0,
* If the user is looking for the first set bit, return -1. */
if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
addReplyLongLong(c, bit ? -1 : 0);
@ -866,22 +900,43 @@ void bitposCommand(client *c) {
p = getObjectReadOnlyString(o,&strlen,llbuf);
/* Parse start/end range if any. */
if (c->argc == 4 || c->argc == 5) {
if (getLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK)
if (c->argc == 4 || c->argc == 5 || c->argc == 6) {
long long totlen = strlen;
/* Make sure we will not overflow */
serverAssert(totlen <= LLONG_MAX >> 3);
if (getLongLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK)
return;
if (c->argc == 5) {
if (getLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK)
if (c->argc == 6) {
if (!strcasecmp(c->argv[5]->ptr,"bit")) isbit = 1;
else if (!strcasecmp(c->argv[5]->ptr,"byte")) isbit = 0;
else {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
if (c->argc >= 5) {
if (getLongLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK)
return;
end_given = 1;
} else {
end = strlen-1;
if (isbit) end = (totlen<<3) + 7;
else end = totlen-1;
}
if (isbit) totlen <<= 3;
/* Convert negative indexes */
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = totlen+start;
if (end < 0) end = totlen+end;
if (start < 0) start = 0;
if (end < 0) end = 0;
if (end >= strlen) end = strlen-1;
if (end >= totlen) end = totlen-1;
if (isbit && start <= end) {
/* Before converting bit offset to byte offset, create negative masks
* for the edges. */
first_byte_neg_mask = ~((1<<(8-(start&7)))-1) & 0xFF;
last_byte_neg_mask = (1<<(7-(end&7)))-1;
start >>= 3;
end >>= 3;
}
} else if (c->argc == 3) {
/* The whole string. */
start = 0;
@ -898,8 +953,36 @@ void bitposCommand(client *c) {
addReplyLongLong(c, -1);
} else {
long bytes = end-start+1;
long long pos = redisBitpos(p+start,bytes,bit);
long long pos;
unsigned char tmpchar;
if (first_byte_neg_mask) {
if (bit) tmpchar = p[start] & ~first_byte_neg_mask;
else tmpchar = p[start] | first_byte_neg_mask;
/* Special case, there is only one byte */
if (last_byte_neg_mask && bytes == 1) {
if (bit) tmpchar = tmpchar & ~last_byte_neg_mask;
else tmpchar = tmpchar | last_byte_neg_mask;
}
pos = redisBitpos(&tmpchar,1,bit);
/* If there are no more bytes or we get valid pos, we can exit early */
if (bytes == 1 || (pos != -1 && pos != 8)) goto result;
start++;
bytes--;
}
/* If the last byte has not bits in the range, we should exclude it */
long curbytes = bytes - (last_byte_neg_mask ? 1 : 0);
if (curbytes > 0) {
pos = redisBitpos(p+start,curbytes,bit);
/* If there is no more bytes or we get valid pos, we can exit early */
if (bytes == curbytes || (pos != -1 && pos != (long long)curbytes<<3)) goto result;
start += curbytes;
bytes -= curbytes;
}
if (bit) tmpchar = p[end] & ~last_byte_neg_mask;
else tmpchar = p[end] | last_byte_neg_mask;
pos = redisBitpos(&tmpchar,1,bit);
result:
/* If we are looking for clear bits, and the user specified an exact
* range with start-end, we can't consider the right of the range as
* zero padded (as we do when no explicit end is given).

View File

@ -187,6 +187,11 @@ proc randomInt {max} {
expr {int(rand()*$max)}
}
# Random integer between min and max (excluded).
proc randomRange {min max} {
expr {int(rand()*[expr $max - $min]) + $min}
}
# Random signed integer between -max and max (both extremes excluded).
proc randomSignedInt {max} {
set i [randomInt $max]

View File

@ -4,6 +4,12 @@ proc count_bits s {
string length [regsub -all {0} $bits {}]
}
# start end are bit index
proc count_bits_start_end {s start end} {
binary scan $s B* bits
string length [regsub -all {0} [string range $bits $start $end] {}]
}
proc simulate_bit_op {op args} {
set maxlen 0
set j 0
@ -40,25 +46,30 @@ proc simulate_bit_op {op args} {
start_server {tags {"bitops"}} {
test {BITCOUNT returns 0 against non existing key} {
r bitcount no-key
} 0
assert {[r bitcount no-key] == 0}
assert {[r bitcount no-key 0 1000 bit] == 0}
}
test {BITCOUNT returns 0 with out of range indexes} {
r set str "xxxx"
r bitcount str 4 10
} 0
assert {[r bitcount str 4 10] == 0}
assert {[r bitcount str 32 87 bit] == 0}
}
test {BITCOUNT returns 0 with negative indexes where start > end} {
r set str "xxxx"
r bitcount str -6 -7
} 0
assert {[r bitcount str -6 -7] == 0}
assert {[r bitcount str -6 -15 bit] == 0}
}
catch {unset num}
foreach vec [list "" "\xaa" "\x00\x00\xff" "foobar" "123"] {
incr num
test "BITCOUNT against test vector #$num" {
r set str $vec
assert {[r bitcount str] == [count_bits $vec]}
set count [count_bits $vec]
assert {[r bitcount str] == $count}
assert {[r bitcount str 0 -1 bit] == $count}
}
}
@ -66,7 +77,9 @@ start_server {tags {"bitops"}} {
for {set j 0} {$j < 100} {incr j} {
set str [randstring 0 3000]
r set str $str
assert {[r bitcount str] == [count_bits $str]}
set count [count_bits $str]
assert {[r bitcount str] == $count}
assert {[r bitcount str 0 -1 bit] == $count}
}
}
@ -78,18 +91,43 @@ start_server {tags {"bitops"}} {
set start [randomInt $l]
set end [randomInt $l]
if {$start > $end} {
# Swap start and end
lassign [list $end $start] start end
}
assert {[r bitcount str $start $end] == [count_bits [string range $str $start $end]]}
}
for {set j 0} {$j < 100} {incr j} {
set str [randstring 0 3000]
r set str $str
set l [expr [string length $str] * 8]
set start [randomInt $l]
set end [randomInt $l]
if {$start > $end} {
# Swap start and end
lassign [list $end $start] start end
}
assert {[r bitcount str $start $end bit] == [count_bits_start_end $str $start $end]}
}
}
test {BITCOUNT with start, end} {
r set s "foobar"
set s "foobar"
r set s $s
assert_equal [r bitcount s 0 -1] [count_bits "foobar"]
assert_equal [r bitcount s 1 -2] [count_bits "ooba"]
assert_equal [r bitcount s -2 1] [count_bits ""]
assert_equal [r bitcount s 0 1000] [count_bits "foobar"]
assert_equal [r bitcount s 0 -1 bit] [count_bits $s]
assert_equal [r bitcount s 10 14 bit] [count_bits_start_end $s 10 14]
assert_equal [r bitcount s 3 14 bit] [count_bits_start_end $s 3 14]
assert_equal [r bitcount s 3 29 bit] [count_bits_start_end $s 3 29]
assert_equal [r bitcount s 10 -34 bit] [count_bits_start_end $s 10 14]
assert_equal [r bitcount s 3 -34 bit] [count_bits_start_end $s 3 14]
assert_equal [r bitcount s 3 -19 bit] [count_bits_start_end $s 3 29]
assert_equal [r bitcount s -2 1 bit] 0
assert_equal [r bitcount s 0 1000 bit] [count_bits $s]
}
test {BITCOUNT syntax error #1} {
@ -97,6 +135,11 @@ start_server {tags {"bitops"}} {
set e
} {ERR*syntax*}
test {BITCOUNT syntax error #2} {
catch {r bitcount s 0 1 hello} e
set e
} {ERR*syntax*}
test {BITCOUNT regression test for github issue #582} {
r del foo
r setbit foo 0 1
@ -216,33 +259,39 @@ start_server {tags {"bitops"}} {
test {BITPOS bit=0 with empty key returns 0} {
r del str
r bitpos str 0
} {0}
assert {[r bitpos str 0] == 0}
assert {[r bitpos str 0 0 -1 bit] == 0}
}
test {BITPOS bit=1 with empty key returns -1} {
r del str
r bitpos str 1
} {-1}
assert {[r bitpos str 1] == -1}
assert {[r bitpos str 1 0 -1] == -1}
}
test {BITPOS bit=0 with string less than 1 word works} {
r set str "\xff\xf0\x00"
r bitpos str 0
} {12}
assert {[r bitpos str 0] == 12}
assert {[r bitpos str 0 0 -1 bit] == 12}
}
test {BITPOS bit=1 with string less than 1 word works} {
r set str "\x00\x0f\x00"
r bitpos str 1
} {12}
assert {[r bitpos str 1] == 12}
assert {[r bitpos str 1 0 -1 bit] == 12}
}
test {BITPOS bit=0 starting at unaligned address} {
r set str "\xff\xf0\x00"
r bitpos str 0 1
} {12}
assert {[r bitpos str 0 1] == 12}
assert {[r bitpos str 0 1 -1 bit] == 12}
}
test {BITPOS bit=1 starting at unaligned address} {
r set str "\x00\x0f\xff"
r bitpos str 1 1
} {12}
assert {[r bitpos str 1 1] == 12}
assert {[r bitpos str 1 1 -1 bit] == 12}
}
test {BITPOS bit=0 unaligned+full word+reminder} {
r del str
@ -262,6 +311,16 @@ start_server {tags {"bitops"}} {
assert {[r bitpos str 0 6] == 216}
assert {[r bitpos str 0 7] == 216}
assert {[r bitpos str 0 8] == 216}
assert {[r bitpos str 0 1 -1 bit] == 216}
assert {[r bitpos str 0 9 -1 bit] == 216}
assert {[r bitpos str 0 17 -1 bit] == 216}
assert {[r bitpos str 0 25 -1 bit] == 216}
assert {[r bitpos str 0 33 -1 bit] == 216}
assert {[r bitpos str 0 41 -1 bit] == 216}
assert {[r bitpos str 0 49 -1 bit] == 216}
assert {[r bitpos str 0 57 -1 bit] == 216}
assert {[r bitpos str 0 65 -1 bit] == 216}
}
test {BITPOS bit=1 unaligned+full word+reminder} {
@ -282,12 +341,23 @@ start_server {tags {"bitops"}} {
assert {[r bitpos str 1 6] == 216}
assert {[r bitpos str 1 7] == 216}
assert {[r bitpos str 1 8] == 216}
assert {[r bitpos str 1 1 -1 bit] == 216}
assert {[r bitpos str 1 9 -1 bit] == 216}
assert {[r bitpos str 1 17 -1 bit] == 216}
assert {[r bitpos str 1 25 -1 bit] == 216}
assert {[r bitpos str 1 33 -1 bit] == 216}
assert {[r bitpos str 1 41 -1 bit] == 216}
assert {[r bitpos str 1 49 -1 bit] == 216}
assert {[r bitpos str 1 57 -1 bit] == 216}
assert {[r bitpos str 1 65 -1 bit] == 216}
}
test {BITPOS bit=1 returns -1 if string is all 0 bits} {
r set str ""
for {set j 0} {$j < 20} {incr j} {
assert {[r bitpos str 1] == -1}
assert {[r bitpos str 1 0 -1 bit] == -1}
r append str "\x00"
}
}
@ -299,6 +369,12 @@ start_server {tags {"bitops"}} {
assert {[r bitpos str 0 2 -1] == 16}
assert {[r bitpos str 0 2 200] == 16}
assert {[r bitpos str 0 1 1] == -1}
assert {[r bitpos str 0 0 -1 bit] == 0}
assert {[r bitpos str 0 8 -1 bit] == 16}
assert {[r bitpos str 0 16 -1 bit] == 16}
assert {[r bitpos str 0 16 200 bit] == 16}
assert {[r bitpos str 0 8 8 bit] == -1}
}
test {BITPOS bit=1 works with intervals} {
@ -308,6 +384,12 @@ start_server {tags {"bitops"}} {
assert {[r bitpos str 1 2 -1] == -1}
assert {[r bitpos str 1 2 200] == -1}
assert {[r bitpos str 1 1 1] == 8}
assert {[r bitpos str 1 0 -1 bit] == 8}
assert {[r bitpos str 1 8 -1 bit] == 8}
assert {[r bitpos str 1 16 -1 bit] == -1}
assert {[r bitpos str 1 16 200 bit] == -1}
assert {[r bitpos str 1 8 8 bit] == 8}
}
test {BITPOS bit=0 changes behavior if end is given} {
@ -315,6 +397,7 @@ start_server {tags {"bitops"}} {
assert {[r bitpos str 0] == 24}
assert {[r bitpos str 0 0] == 24}
assert {[r bitpos str 0 0 -1] == -1}
assert {[r bitpos str 0 0 -1 bit] == -1}
}
test {SETBIT/BITFIELD only increase dirty when the value changed} {
@ -358,6 +441,7 @@ start_server {tags {"bitops"}} {
set first_one_pos -1
for {set j 0} {$j < 1000} {incr j} {
assert {[r bitpos str 1] == $first_one_pos}
assert {[r bitpos str 1 0 -1 bit] == $first_one_pos}
set pos [randomInt $max]
r setbit str $pos 1
if {$first_one_pos == -1 || $first_one_pos > $pos} {
@ -374,6 +458,11 @@ start_server {tags {"bitops"}} {
r set str [string repeat "\xff" [expr $max/8]]
for {set j 0} {$j < 1000} {incr j} {
assert {[r bitpos str 0] == $first_zero_pos}
if {$first_zero_pos == $max} {
assert {[r bitpos str 0 0 -1 bit] == -1}
} else {
assert {[r bitpos str 0 0 -1 bit] == $first_zero_pos}
}
set pos [randomInt $max]
r setbit str $pos 0
if {$first_zero_pos > $pos} {
@ -384,6 +473,74 @@ start_server {tags {"bitops"}} {
}
}
# This test creates a string of 10 bytes. It has two iterations. One clears
# all the bits and sets just one bit and another set all the bits and clears
# just one bit. Each iteration loops from bit offset 0 to 79 and uses SETBIT
# to set the bit to 0 or 1, and then use BITPOS and BITCOUNT on a few mutations.
test {BITPOS/BITCOUNT fuzzy testing using SETBIT} {
# We have two start and end ranges, each range used to select a random
# position, one for start position and one for end position.
proc test_one {start1 end1 start2 end2 pos bit pos_type} {
set start [randomRange $start1 $end1]
set end [randomRange $start2 $end2]
if {$start > $end} {
# Swap start and end
lassign [list $end $start] start end
}
set startbit $start
set endbit $end
# For byte index, we need to generate the real bit index
if {[string equal $pos_type byte]} {
set startbit [expr $start << 3]
set endbit [expr ($end << 3) + 7]
}
# This means whether the test bit index is in the range.
set inrange [expr ($pos >= $startbit && $pos <= $endbit) ? 1: 0]
# For bitcount, there are four different results.
# $inrange == 0 && $bit == 0, all bits in the range are set, so $endbit - $startbit + 1
# $inrange == 0 && $bit == 1, all bits in the range are clear, so 0
# $inrange == 1 && $bit == 0, all bits in the range are set but one, so $endbit - $startbit
# $inrange == 1 && $bit == 1, all bits in the range are clear but one, so 1
set res_count [expr ($endbit - $startbit + 1) * (1 - $bit) + $inrange * [expr $bit ? 1 : -1]]
assert {[r bitpos str $bit $start $end $pos_type] == [expr $inrange ? $pos : -1]}
assert {[r bitcount str $start $end $pos_type] == $res_count}
}
r del str
set max 80;
r setbit str [expr $max - 1] 0
set bytes [expr $max >> 3]
# First iteration sets all bits to 1, then set bit to 0 from 0 to max - 1
# Second iteration sets all bits to 0, then set bit to 1 from 0 to max - 1
for {set bit 0} {$bit < 2} {incr bit} {
r bitop not str str
for {set j 0} {$j < $max} {incr j} {
r setbit str $j $bit
# First iteration tests byte index and second iteration tests bit index.
foreach {curr end pos_type} [list [expr $j >> 3] $bytes byte $j $max bit] {
# start==end set to bit position
test_one $curr $curr $curr $curr $j $bit $pos_type
# Both start and end are before bit position
if {$curr > 0} {
test_one 0 $curr 0 $curr $j $bit $pos_type
}
# Both start and end are after bit position
if {$curr < [expr $end - 1]} {
test_one [expr $curr + 1] $end [expr $curr + 1] $end $j $bit $pos_type
}
# start is before and end is after bit position
if {$curr > 0 && $curr < [expr $end - 1]} {
test_one 0 $curr [expr $curr +1] $end $j $bit $pos_type
}
}
# restore bit
r setbit str $j [expr 1 - $bit]
}
}
}
test "BIT pos larger than UINT_MAX" {
set bytes [expr (1 << 29) + 1]
set bitpos [expr (1 << 32)]