redict/src/listpack.h

107 lines
5.4 KiB
C
Raw Normal View History

/* Listpack -- A lists of strings serialization format
*
* This file implements the specification you can find at:
*
* https://github.com/antirez/listpack
*
* Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __LISTPACK_H
#define __LISTPACK_H
#include <stdlib.h>
#include <stdint.h>
#define LP_INTBUF_SIZE 21 /* 20 digits of -2^63 + 1 null term = 21. */
/* lpInsert() where argument possible values: */
#define LP_BEFORE 0
#define LP_AFTER 1
#define LP_REPLACE 2
/* Each entry in the listpack is either a string or an integer. */
typedef struct {
/* When string is used, it is provided with the length (slen). */
unsigned char *sval;
uint32_t slen;
/* When integer is used, 'sval' is NULL, and lval holds the value. */
long long lval;
} listpackEntry;
unsigned char *lpNew(size_t capacity);
void lpFree(unsigned char *lp);
unsigned char* lpShrinkToFit(unsigned char *lp);
Replace all usage of ziplist with listpack for t_zset (#9366) Part two of implementing #8702 (zset), after #8887. ## Description of the feature Replaced all uses of ziplist with listpack in t_zset, and optimized some of the code to optimize performance. ## Rdb format changes New `RDB_TYPE_ZSET_LISTPACK` rdb type. ## Rdb loading improvements: 1) Pre-expansion of dict for validation of duplicate data for listpack and ziplist. 2) Simplifying the release of empty key objects when RDB loading. 3) Unify ziplist and listpack data verify methods for zset and hash, and move code to rdb.c. ## Interface changes 1) New `zset-max-listpack-entries` config is an alias for `zset-max-ziplist-entries` (same with `zset-max-listpack-value`). 2) OBJECT ENCODING will return listpack instead of ziplist. ## Listpack improvements: 1) Add `lpDeleteRange` and `lpDeleteRangeWithEntry` functions to delete a range of entries from listpack. 2) Improve the performance of `lpCompare`, converting from string to integer is faster than converting from integer to string. 3) Replace `snprintf` with `ll2string` to improve performance in converting numbers to strings in `lpGet()`. ## Zset improvements: 1) Improve the performance of `zzlFind` method, use `lpFind` instead of `lpCompare` in a loop. 2) Use `lpDeleteRangeWithEntry` instead of `lpDelete` twice to delete a element of zset. ## Tests 1) Add some unittests for `lpDeleteRange` and `lpDeleteRangeWithEntry` function. 2) Add zset RDB loading test. 3) Add benchmark test for `lpCompare` and `ziplsitCompare`. 4) Add empty listpack zset corrupt dump test.
2021-09-09 11:18:53 -04:00
unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
unsigned char *p, int where, unsigned char **newp);
unsigned char *lpInsertInteger(unsigned char *lp, long long lval,
unsigned char *p, int where, unsigned char **newp);
unsigned char *lpPrepend(unsigned char *lp, unsigned char *s, uint32_t slen);
unsigned char *lpPrependInteger(unsigned char *lp, long long lval);
unsigned char *lpAppend(unsigned char *lp, unsigned char *s, uint32_t slen);
unsigned char *lpAppendInteger(unsigned char *lp, long long lval);
unsigned char *lpReplace(unsigned char *lp, unsigned char **p, unsigned char *s, uint32_t slen);
unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long lval);
unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
Replace all usage of ziplist with listpack for t_zset (#9366) Part two of implementing #8702 (zset), after #8887. ## Description of the feature Replaced all uses of ziplist with listpack in t_zset, and optimized some of the code to optimize performance. ## Rdb format changes New `RDB_TYPE_ZSET_LISTPACK` rdb type. ## Rdb loading improvements: 1) Pre-expansion of dict for validation of duplicate data for listpack and ziplist. 2) Simplifying the release of empty key objects when RDB loading. 3) Unify ziplist and listpack data verify methods for zset and hash, and move code to rdb.c. ## Interface changes 1) New `zset-max-listpack-entries` config is an alias for `zset-max-ziplist-entries` (same with `zset-max-listpack-value`). 2) OBJECT ENCODING will return listpack instead of ziplist. ## Listpack improvements: 1) Add `lpDeleteRange` and `lpDeleteRangeWithEntry` functions to delete a range of entries from listpack. 2) Improve the performance of `lpCompare`, converting from string to integer is faster than converting from integer to string. 3) Replace `snprintf` with `ll2string` to improve performance in converting numbers to strings in `lpGet()`. ## Zset improvements: 1) Improve the performance of `zzlFind` method, use `lpFind` instead of `lpCompare` in a loop. 2) Use `lpDeleteRangeWithEntry` instead of `lpDelete` twice to delete a element of zset. ## Tests 1) Add some unittests for `lpDeleteRange` and `lpDeleteRangeWithEntry` function. 2) Add zset RDB loading test. 3) Add benchmark test for `lpCompare` and `ziplsitCompare`. 4) Add empty listpack zset corrupt dump test.
2021-09-09 11:18:53 -04:00
unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
2021-11-24 06:34:13 -05:00
unsigned char *lpMerge(unsigned char **first, unsigned char **second);
Add listpack encoding for list (#11303) Improve memory efficiency of list keys ## Description of the feature The new listpack encoding uses the old `list-max-listpack-size` config to perform the conversion, which we can think it of as a node inside a quicklist, but without 80 bytes overhead (internal fragmentation included) of quicklist and quicklistNode structs. For example, a list key with 5 items of 10 chars each, now takes 128 bytes instead of 208 it used to take. ## Conversion rules * Convert listpack to quicklist When the listpack length or size reaches the `list-max-listpack-size` limit, it will be converted to a quicklist. * Convert quicklist to listpack When a quicklist has only one node, and its length or size is reduced to half of the `list-max-listpack-size` limit, it will be converted to a listpack. This is done to avoid frequent conversions when we add or remove at the bounding size or length. ## Interface changes 1. add list entry param to listTypeSetIteratorDirection When list encoding is listpack, `listTypeIterator->lpi` points to the next entry of current entry, so when changing the direction, we need to use the current node (listTypeEntry->p) to update `listTypeIterator->lpi` to the next node in the reverse direction. ## Benchmark ### Listpack VS Quicklist with one node * LPUSH - roughly 0.3% improvement * LRANGE - roughly 13% improvement ### Both are quicklist * LRANGE - roughly 3% improvement * LRANGE without pipeline - roughly 3% improvement From the benchmark, as we can see from the results 1. When list is quicklist encoding, LRANGE improves performance by <5%. 2. When list is listpack encoding, LRANGE improves performance by ~13%, the main enhancement is brought by `addListListpackRangeReply()`. ## Memory usage 1M lists(key:0~key:1000000) with 5 items of 10 chars ("hellohello") each. shows memory usage down by 35.49%, from 214MB to 138MB. ## Note 1. Add conversion callback to support doing some work before conversion Since the quicklist iterator decompresses the current node when it is released, we can no longer decompress the quicklist after we convert the list.
2022-11-16 13:29:46 -05:00
unsigned char *lpDup(unsigned char *lp);
unsigned long lpLength(unsigned char *lp);
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);
unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip);
unsigned char *lpFirst(unsigned char *lp);
unsigned char *lpLast(unsigned char *lp);
unsigned char *lpNext(unsigned char *lp, unsigned char *p);
unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
size_t lpBytes(unsigned char *lp);
size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep);
unsigned char *lpSeek(unsigned char *lp, long index);
Replace all usage of ziplist with listpack for t_zset (#9366) Part two of implementing #8702 (zset), after #8887. ## Description of the feature Replaced all uses of ziplist with listpack in t_zset, and optimized some of the code to optimize performance. ## Rdb format changes New `RDB_TYPE_ZSET_LISTPACK` rdb type. ## Rdb loading improvements: 1) Pre-expansion of dict for validation of duplicate data for listpack and ziplist. 2) Simplifying the release of empty key objects when RDB loading. 3) Unify ziplist and listpack data verify methods for zset and hash, and move code to rdb.c. ## Interface changes 1) New `zset-max-listpack-entries` config is an alias for `zset-max-ziplist-entries` (same with `zset-max-listpack-value`). 2) OBJECT ENCODING will return listpack instead of ziplist. ## Listpack improvements: 1) Add `lpDeleteRange` and `lpDeleteRangeWithEntry` functions to delete a range of entries from listpack. 2) Improve the performance of `lpCompare`, converting from string to integer is faster than converting from integer to string. 3) Replace `snprintf` with `ll2string` to improve performance in converting numbers to strings in `lpGet()`. ## Zset improvements: 1) Improve the performance of `zzlFind` method, use `lpFind` instead of `lpCompare` in a loop. 2) Use `lpDeleteRangeWithEntry` instead of `lpDelete` twice to delete a element of zset. ## Tests 1) Add some unittests for `lpDeleteRange` and `lpDeleteRangeWithEntry` function. 2) Add zset RDB loading test. 3) Add benchmark test for `lpCompare` and `ziplsitCompare`. 4) Add empty listpack zset corrupt dump test.
2021-09-09 11:18:53 -04:00
typedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata);
int lpValidateIntegrity(unsigned char *lp, size_t size, int deep,
listpackValidateEntryCB entry_cb, void *cb_userdata);
unsigned char *lpValidateFirst(unsigned char *lp);
int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes);
unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
unsigned int remaining, int even_only);
int lpSafeToAdd(unsigned char* lp, size_t add);
2021-11-24 06:34:13 -05:00
void lpRepr(unsigned char *lp);
#ifdef REDIS_TEST
int listpackTest(int argc, char *argv[], int flags);
#endif
#endif