2017-08-30 06:40:27 -04:00
|
|
|
/* Listpack -- A lists of strings serialization format
|
|
|
|
*
|
|
|
|
* This file implements the specification you can find at:
|
|
|
|
*
|
|
|
|
* https://github.com/antirez/listpack
|
|
|
|
*
|
|
|
|
* Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __LISTPACK_H
|
|
|
|
#define __LISTPACK_H
|
|
|
|
|
2021-02-16 09:17:38 -05:00
|
|
|
#include <stdlib.h>
|
2017-08-30 06:40:27 -04:00
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
#define LP_INTBUF_SIZE 21 /* 20 digits of -2^63 + 1 null term = 21. */
|
|
|
|
|
|
|
|
/* lpInsert() where argument possible values: */
|
|
|
|
#define LP_BEFORE 0
|
|
|
|
#define LP_AFTER 1
|
|
|
|
#define LP_REPLACE 2
|
|
|
|
|
2021-08-10 02:18:49 -04:00
|
|
|
/* Each entry in the listpack is either a string or an integer. */
|
|
|
|
typedef struct {
|
|
|
|
/* When string is used, it is provided with the length (slen). */
|
|
|
|
unsigned char *sval;
|
|
|
|
uint32_t slen;
|
|
|
|
/* When integer is used, 'sval' is NULL, and lval holds the value. */
|
|
|
|
long long lval;
|
|
|
|
} listpackEntry;
|
|
|
|
|
2021-02-16 09:17:38 -05:00
|
|
|
unsigned char *lpNew(size_t capacity);
|
2017-08-30 06:40:27 -04:00
|
|
|
void lpFree(unsigned char *lp);
|
2021-02-16 09:17:38 -05:00
|
|
|
unsigned char* lpShrinkToFit(unsigned char *lp);
|
2021-09-09 11:18:53 -04:00
|
|
|
unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
|
|
|
|
unsigned char *p, int where, unsigned char **newp);
|
Optimize integer zset scores in listpack (converting to string and back) (#10486)
When the score doesn't have fractional part, and can be stored as an integer,
we use the integer capabilities of listpack to store it, rather than convert it to string.
This already existed before this PR (lpInsert dose that conversion implicitly).
But to do that, we would have first converted the score from double to string (calling `d2string`),
then pass the string to `lpAppend` which identified it as being an integer and convert it back to an int.
Now, instead of converting it to a string, we store it using lpAppendInteger`.
Unrelated:
---
* Fix the double2ll range check (negative and positive ranges, and also the comparison operands
were slightly off. but also, the range could be made much larger, see comment).
* Unify the double to string conversion code in rdb.c with the one in util.c
* Small optimization in lpStringToInt64, don't attempt to convert strings that are obviously too long.
Benchmark;
---
Up to 20% improvement in certain tight loops doing zzlInsert with large integers.
(if listpack is pre-allocated to avoid realloc, and insertion is sorted from largest to smaller)
2022-04-17 10:16:46 -04:00
|
|
|
unsigned char *lpInsertInteger(unsigned char *lp, long long lval,
|
|
|
|
unsigned char *p, int where, unsigned char **newp);
|
2021-08-10 02:18:49 -04:00
|
|
|
unsigned char *lpPrepend(unsigned char *lp, unsigned char *s, uint32_t slen);
|
|
|
|
unsigned char *lpPrependInteger(unsigned char *lp, long long lval);
|
|
|
|
unsigned char *lpAppend(unsigned char *lp, unsigned char *s, uint32_t slen);
|
|
|
|
unsigned char *lpAppendInteger(unsigned char *lp, long long lval);
|
|
|
|
unsigned char *lpReplace(unsigned char *lp, unsigned char **p, unsigned char *s, uint32_t slen);
|
|
|
|
unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long lval);
|
2017-08-30 06:40:27 -04:00
|
|
|
unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
|
2021-09-09 11:18:53 -04:00
|
|
|
unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
|
|
|
|
unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
|
2022-11-09 12:50:07 -05:00
|
|
|
unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
|
2021-11-24 06:34:13 -05:00
|
|
|
unsigned char *lpMerge(unsigned char **first, unsigned char **second);
|
Add listpack encoding for list (#11303)
Improve memory efficiency of list keys
## Description of the feature
The new listpack encoding uses the old `list-max-listpack-size` config
to perform the conversion, which we can think it of as a node inside a
quicklist, but without 80 bytes overhead (internal fragmentation included)
of quicklist and quicklistNode structs.
For example, a list key with 5 items of 10 chars each, now takes 128 bytes
instead of 208 it used to take.
## Conversion rules
* Convert listpack to quicklist
When the listpack length or size reaches the `list-max-listpack-size` limit,
it will be converted to a quicklist.
* Convert quicklist to listpack
When a quicklist has only one node, and its length or size is reduced to half
of the `list-max-listpack-size` limit, it will be converted to a listpack.
This is done to avoid frequent conversions when we add or remove at the bounding size or length.
## Interface changes
1. add list entry param to listTypeSetIteratorDirection
When list encoding is listpack, `listTypeIterator->lpi` points to the next entry of current entry,
so when changing the direction, we need to use the current node (listTypeEntry->p) to
update `listTypeIterator->lpi` to the next node in the reverse direction.
## Benchmark
### Listpack VS Quicklist with one node
* LPUSH - roughly 0.3% improvement
* LRANGE - roughly 13% improvement
### Both are quicklist
* LRANGE - roughly 3% improvement
* LRANGE without pipeline - roughly 3% improvement
From the benchmark, as we can see from the results
1. When list is quicklist encoding, LRANGE improves performance by <5%.
2. When list is listpack encoding, LRANGE improves performance by ~13%,
the main enhancement is brought by `addListListpackRangeReply()`.
## Memory usage
1M lists(key:0~key:1000000) with 5 items of 10 chars ("hellohello") each.
shows memory usage down by 35.49%, from 214MB to 138MB.
## Note
1. Add conversion callback to support doing some work before conversion
Since the quicklist iterator decompresses the current node when it is released, we can
no longer decompress the quicklist after we convert the list.
2022-11-16 13:29:46 -05:00
|
|
|
unsigned char *lpDup(unsigned char *lp);
|
2021-08-10 02:18:49 -04:00
|
|
|
unsigned long lpLength(unsigned char *lp);
|
2017-08-30 06:40:27 -04:00
|
|
|
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
|
2021-08-10 02:18:49 -04:00
|
|
|
unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);
|
|
|
|
unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip);
|
2017-08-30 06:40:27 -04:00
|
|
|
unsigned char *lpFirst(unsigned char *lp);
|
|
|
|
unsigned char *lpLast(unsigned char *lp);
|
|
|
|
unsigned char *lpNext(unsigned char *lp, unsigned char *p);
|
|
|
|
unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
|
2021-08-10 02:18:49 -04:00
|
|
|
size_t lpBytes(unsigned char *lp);
|
2022-12-06 04:25:51 -05:00
|
|
|
size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep);
|
2017-08-30 06:40:27 -04:00
|
|
|
unsigned char *lpSeek(unsigned char *lp, long index);
|
2021-09-09 11:18:53 -04:00
|
|
|
typedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata);
|
2021-08-10 02:18:49 -04:00
|
|
|
int lpValidateIntegrity(unsigned char *lp, size_t size, int deep,
|
|
|
|
listpackValidateEntryCB entry_cb, void *cb_userdata);
|
2021-08-05 15:56:14 -04:00
|
|
|
unsigned char *lpValidateFirst(unsigned char *lp);
|
Sanitize dump payload: ziplist, listpack, zipmap, intset, stream
When loading an encoded payload we will at least do a shallow validation to
check that the size that's encoded in the payload matches the size of the
allocation.
This let's us later use this encoded size to make sure the various offsets
inside encoded payload don't reach outside the allocation, if they do, we'll
assert/panic, but at least we won't segfault or smear memory.
We can also do 'deep' validation which runs on all the records of the encoded
payload and validates that they don't contain invalid offsets. This lets us
detect corruptions early and reject a RESTORE command rather than accepting
it and asserting (crashing) later when accessing that payload via some command.
configuration:
- adding ACL flag skip-sanitize-payload
- adding config sanitize-dump-payload [yes/no/clients]
For now, we don't have a good way to ensure MIGRATE in cluster resharding isn't
being slowed down by these sanitation, so i'm setting the default value to `no`,
but later on it should be set to `clients` by default.
changes:
- changing rdbReportError not to `exit` in RESTORE command
- adding a new stat to be able to later check if cluster MIGRATE isn't being
slowed down by sanitation.
2020-08-13 09:41:05 -04:00
|
|
|
int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes);
|
2021-08-10 02:18:49 -04:00
|
|
|
unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
|
|
|
|
void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
|
|
|
|
void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
|
|
|
|
unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
|
2022-11-09 12:50:07 -05:00
|
|
|
void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
|
|
|
|
unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
|
|
|
|
unsigned int remaining, int even_only);
|
2021-10-04 05:11:02 -04:00
|
|
|
int lpSafeToAdd(unsigned char* lp, size_t add);
|
2021-11-24 06:34:13 -05:00
|
|
|
void lpRepr(unsigned char *lp);
|
2021-08-10 02:18:49 -04:00
|
|
|
|
|
|
|
#ifdef REDIS_TEST
|
2021-11-16 01:55:10 -05:00
|
|
|
int listpackTest(int argc, char *argv[], int flags);
|
2021-08-10 02:18:49 -04:00
|
|
|
#endif
|
2017-08-30 06:40:27 -04:00
|
|
|
|
|
|
|
#endif
|