Optimize HRANDFIELD and ZRANDMEMBER case 3 when listpack encoded (#12205)

Optimized HRANDFIELD and ZRANDMEMBER commands as in #8444,
CASE 3 under listpack encoding. Boost optimization to CASE 2.5. 

CASE 2.5 listpack only. Sampling unique elements, in non-random order.
Listpack encoded hashes / zsets are meant to be relatively small, so
HRANDFIELD_SUB_STRATEGY_MUL / ZRANDMEMBER_SUB_STRATEGY_MUL
isn't necessary and we rather not make copies of the entries. Instead, we
emit them directly to the output buffer.

Simple benchmarks shows it provides some 400% improvement in HRANDFIELD
and ZRANGESTORE both in CASE 3.

Unrelated changes: remove useless setTypeRandomElements and fix a typo.
This commit is contained in:
Binbin 2023-05-22 20:48:32 +08:00 committed by GitHub
parent 38e284f106
commit 006ab26c37
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 48 additions and 32 deletions

View File

@ -3092,7 +3092,6 @@ void setTypeReleaseIterator(setTypeIterator *si);
int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele);
sds setTypeNextObject(setTypeIterator *si);
int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele);
unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
unsigned long setTypeSize(const robj *subject);
void setTypeConvert(robj *subject, int enc);
int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic);

View File

@ -1014,6 +1014,26 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
return;
}
/* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
* Listpack encoded hashes are meant to be relatively small, so
* HRANDFIELD_SUB_STRATEGY_MUL isn't necessary and we rather not make
* copies of the entries. Instead, we emit them directly to the output
* buffer.
*
* And it is inefficient to repeatedly pick one random element from a
* listpack in CASE 4. So we use this instead. */
if (hash->encoding == OBJ_ENCODING_LISTPACK) {
listpackEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(listpackEntry)*count);
if (withvalues)
vals = zmalloc(sizeof(listpackEntry)*count);
serverAssert(lpRandomPairsUnique(hash->ptr, count, keys, vals) == count);
hrandfieldReplyWithListpack(c, count, keys, vals);
zfree(keys);
zfree(vals);
return;
}
/* CASE 3:
* The number of elements inside the hash is not greater than
* HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements.
@ -1024,6 +1044,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
* a bit less than the number of elements in the hash, the natural approach
* used into CASE 4 is highly inefficient. */
if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) {
/* Hashtable encoding (generic implementation) */
dict *d = dictCreate(&sdsReplyDictType);
dictExpand(d, size);
hashTypeIterator *hi = hashTypeInitIterator(hash);
@ -1077,20 +1098,6 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
* to the temporary hash, trying to eventually get enough unique elements
* to reach the specified count. */
else {
if (hash->encoding == OBJ_ENCODING_LISTPACK) {
/* it is inefficient to repeatedly pick one random element from a
* listpack. so we use this instead: */
listpackEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(listpackEntry)*count);
if (withvalues)
vals = zmalloc(sizeof(listpackEntry)*count);
serverAssert(lpRandomPairsUnique(hash->ptr, count, keys, vals) == count);
hrandfieldReplyWithListpack(c, count, keys, vals);
zfree(keys);
zfree(vals);
return;
}
/* Hashtable encoding (generic implementation) */
unsigned long added = 0;
listpackEntry key, value;

View File

@ -1114,7 +1114,10 @@ void srandmemberWithCountCommand(client *c) {
* Listpack encoded sets are meant to be relatively small, so
* SRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make
* copies of the entries. Instead, we emit them directly to the output
* buffer. */
* buffer.
*
* And it is inefficient to repeatedly pick one random element from a
* listpack in CASE 4. So we use this instead. */
if (set->encoding == OBJ_ENCODING_LISTPACK) {
unsigned char *lp = set->ptr;
unsigned char *p = lpFirst(lp);
@ -1337,7 +1340,7 @@ void sinterGenericCommand(client *c, robj **setkeys,
} else if (sets[0]->encoding == OBJ_ENCODING_LISTPACK) {
/* To avoid many reallocs, we estimate that the result is a listpack
* of approximately the same size as the first set. Then we shrink
* it or possibly convert it to intset it in the end. */
* it or possibly convert it to intset in the end. */
unsigned char *lp = lpNew(lpBytes(sets[0]->ptr));
dstset = createObject(OBJ_SET, lp);
dstset->encoding = OBJ_ENCODING_LISTPACK;

View File

@ -4243,6 +4243,27 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
return;
}
/* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
* Listpack encoded zsets are meant to be relatively small, so
* ZRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make
* copies of the entries. Instead, we emit them directly to the output
* buffer.
*
* And it is inefficient to repeatedly pick one random element from a
* listpack in CASE 4. So we use this instead. */
if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
listpackEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(listpackEntry)*count);
if (withscores)
vals = zmalloc(sizeof(listpackEntry)*count);
serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
zrandmemberReplyWithListpack(c, count, keys, vals);
zfree(keys);
zfree(vals);
zuiClearIterator(&src);
return;
}
/* CASE 3:
* The number of elements inside the zset is not greater than
* ZRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
@ -4253,6 +4274,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
* a bit less than the number of elements in the set, the natural approach
* used into CASE 4 is highly inefficient. */
if (count*ZRANDMEMBER_SUB_STRATEGY_MUL > size) {
/* Hashtable encoding (generic implementation) */
dict *d = dictCreate(&sdsReplyDictType);
dictExpand(d, size);
/* Add all the elements into the temporary dictionary. */
@ -4296,21 +4318,6 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
* to the temporary set, trying to eventually get enough unique elements
* to reach the specified count. */
else {
if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
/* it is inefficient to repeatedly pick one random element from a
* listpack. so we use this instead: */
listpackEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(listpackEntry)*count);
if (withscores)
vals = zmalloc(sizeof(listpackEntry)*count);
serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
zrandmemberReplyWithListpack(c, count, keys, vals);
zfree(keys);
zfree(vals);
zuiClearIterator(&src);
return;
}
/* Hashtable encoding (generic implementation) */
unsigned long added = 0;
dict *d = dictCreate(&hashDictType);