From fd566f405094c77f5b6b84195c93552b9e01d4a8 Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 17 May 2023 02:32:21 +0800 Subject: [PATCH] Fix for set max entries edge case in setTypeCreate / setTypeMaybeConvert (#12183) In the judgment in setTypeCreate, we should judge size_hint <= max_entries. This results in the following inconsistencies: ``` 127.0.0.1:6379> config set set-max-intset-entries 5 set-max-listpack-entries 5 OK 127.0.0.1:6379> sadd intset_set1 1 2 3 4 5 (integer) 5 127.0.0.1:6379> object encoding intset_set1 "hashtable" 127.0.0.1:6379> sadd intset_set2 1 2 3 4 (integer) 4 127.0.0.1:6379> sadd intset_set2 5 (integer) 1 127.0.0.1:6379> object encoding intset_set2 "intset" 127.0.0.1:6379> sadd listpack_set1 a 1 2 3 4 (integer) 5 127.0.0.1:6379> object encoding listpack_set1 "hashtable" 127.0.0.1:6379> sadd listpack_set2 a 1 2 3 (integer) 4 127.0.0.1:6379> sadd listpack_set2 4 (integer) 1 127.0.0.1:6379> object encoding listpack_set2 "listpack" ``` This was introduced in #12019, added corresponding tests. --- src/t_hash.c | 2 +- src/t_set.c | 14 +++++------ tests/unit/type/set.tcl | 51 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index f7d5af649..2d50ca304 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -45,7 +45,7 @@ void hashTypeTryConversion(robj *o, robj **argv, int start, int end) { /* We guess that most of the values in the input are unique, so * if there are enough arguments we create a pre-sized hash, which - * might overallocate memory if their are duplicates. */ + * might over allocate memory if there are duplicates. */ size_t new_fields = (end - start + 1) / 2; if (new_fields > server.hash_max_listpack_entries) { hashTypeConvert(o, OBJ_ENCODING_HT); diff --git a/src/t_set.c b/src/t_set.c index 56ad95a53..c65203396 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -38,19 +38,19 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *dstkey, int op); /* Factory method to return a set that *can* hold "value". When the object has - * an integer-encodable value, an intset will be returned. Otherwise a regular - * hash table. + * an integer-encodable value, an intset will be returned. Otherwise a listpack + * or a regular hash table. * * The size hint indicates approximately how many items will be added which is * used to determine the initial representation. */ robj *setTypeCreate(sds value, size_t size_hint) { - if (isSdsRepresentableAsLongLong(value,NULL) == C_OK && size_hint < server.set_max_intset_entries) + if (isSdsRepresentableAsLongLong(value,NULL) == C_OK && size_hint <= server.set_max_intset_entries) return createIntsetObject(); - if (size_hint < server.set_max_listpack_entries) + if (size_hint <= server.set_max_listpack_entries) return createSetListpackObject(); /* We may oversize the set by using the hint if the hint is not accurate, - * but we will assume this is accpetable to maximize performance. */ + * but we will assume this is acceptable to maximize performance. */ robj *o = createSetObject(); dictExpand(o->ptr, size_hint); return o; @@ -59,8 +59,8 @@ robj *setTypeCreate(sds value, size_t size_hint) { /* Check if the existing set should be converted to another encoding based off the * the size hint. */ void setTypeMaybeConvert(robj *set, size_t size_hint) { - if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint >= server.set_max_listpack_entries) - || (set->encoding == OBJ_ENCODING_INTSET && size_hint >= server.set_max_intset_entries)) + if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint > server.set_max_listpack_entries) + || (set->encoding == OBJ_ENCODING_INTSET && size_hint > server.set_max_intset_entries)) { setTypeConvertAndExpand(set, OBJ_ENCODING_HT, size_hint, 1); } diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 2d005c675..768e5cfbf 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -113,14 +113,61 @@ start_server { assert_equal {1} [r smismember myset 213244124402402314402033402] } - test "SADD overflows the maximum allowed integers in an intset" { +foreach type {single multiple single_multiple} { + test "SADD overflows the maximum allowed integers in an intset - $type" { r del myset - for {set i 0} {$i < 512} {incr i} { r sadd myset $i } + + if {$type == "single"} { + # All are single sadd commands. + for {set i 0} {$i < 512} {incr i} { r sadd myset $i } + } elseif {$type == "multiple"} { + # One sadd command to add all elements. + set args {} + for {set i 0} {$i < 512} {incr i} { lappend args $i } + r sadd myset {*}$args + } elseif {$type == "single_multiple"} { + # First one sadd adds an element (creates a key) and then one sadd adds all elements. + r sadd myset 1 + set args {} + for {set i 0} {$i < 512} {incr i} { lappend args $i } + r sadd myset {*}$args + } + assert_encoding intset myset + assert_equal 512 [r scard myset] assert_equal 1 [r sadd myset 512] assert_encoding hashtable myset } + test "SADD overflows the maximum allowed elements in a listpack - $type" { + r del myset + + if {$type == "single"} { + # All are single sadd commands. + r sadd myset a + for {set i 0} {$i < 127} {incr i} { r sadd myset $i } + } elseif {$type == "multiple"} { + # One sadd command to add all elements. + set args {} + lappend args a + for {set i 0} {$i < 127} {incr i} { lappend args $i } + r sadd myset {*}$args + } elseif {$type == "single_multiple"} { + # First one sadd adds an element (creates a key) and then one sadd adds all elements. + r sadd myset a + set args {} + lappend args a + for {set i 0} {$i < 127} {incr i} { lappend args $i } + r sadd myset {*}$args + } + + assert_encoding listpack myset + assert_equal 128 [r scard myset] + assert_equal 1 [r sadd myset b] + assert_encoding hashtable myset + } +} + test {Variadic SADD} { r del myset assert_equal 3 [r sadd myset a b c]