mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-23 00:28:26 -05:00
985430b4fc
Redis supports inserting data over 4GB into string (and recently for lists too, see #9357), But LZF compression used in RDB files (see `rdbcompression` config), and in quicklist (see `list-compress-depth` config) does not support compress/decompress data over UINT32_MAX, which will result in corrupting the rdb after compression. Internal changes: 1. Modify the `unsigned int` parameter of `lzf_compress/lzf_decompress` to `size_t`. 2. Modify the variable types in `lzf_compress` involving offsets and lengths to `size_t`. 3. Set LZF_USE_OFFSETS to 0. When LZF_USE_OFFSETS is 1, lzf store offset into `LZF_HSLOT`(32bit). Even in 64-bit, `LZF_USE_OFFSETS` defaults to 1, because lzf assumes that it only compresses and decompresses data smaller than UINT32_MAX. But now we need to make lzf support 64-bit, turning on `LZF_USE_OFFSETS` will make it impossible to store 64-bit offsets or pointers. BTW, disable LZF_USE_OFFSETS also brings a few performance improvements. Tests: 1. Add test for compress/decompress string large than UINT32_MAX. 2. Add unittest for compress/decompress quicklistNode.
191 lines
5.8 KiB
C++
191 lines
5.8 KiB
C++
/*
|
|
* Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modifica-
|
|
* tion, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
|
|
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
|
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
|
|
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
|
|
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* the GNU General Public License ("GPL") version 2 or any later version,
|
|
* in which case the provisions of the GPL are applicable instead of
|
|
* the above. If you wish to allow the use of your version of this file
|
|
* only under the terms of the GPL and not to allow others to use your
|
|
* version of this file under the BSD license, indicate your decision
|
|
* by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL. If you do not delete the
|
|
* provisions above, a recipient may use your version of this file under
|
|
* either the BSD or the GPL.
|
|
*/
|
|
|
|
#ifndef LZFP_h
|
|
#define LZFP_h
|
|
|
|
#define STANDALONE 1 /* at the moment, this is ok. */
|
|
|
|
#ifndef STANDALONE
|
|
# include "lzf.h"
|
|
#endif
|
|
|
|
/*
|
|
* Size of hashtable is (1 << HLOG) * sizeof (char *)
|
|
* decompression is independent of the hash table size
|
|
* the difference between 15 and 14 is very small
|
|
* for small blocks (and 14 is usually a bit faster).
|
|
* For a low-memory/faster configuration, use HLOG == 13;
|
|
* For best compression, use 15 or 16 (or more, up to 22).
|
|
*/
|
|
#ifndef HLOG
|
|
# define HLOG 16
|
|
#endif
|
|
|
|
/*
|
|
* Sacrifice very little compression quality in favour of compression speed.
|
|
* This gives almost the same compression as the default code, and is
|
|
* (very roughly) 15% faster. This is the preferred mode of operation.
|
|
*/
|
|
#ifndef VERY_FAST
|
|
# define VERY_FAST 1
|
|
#endif
|
|
|
|
/*
|
|
* Sacrifice some more compression quality in favour of compression speed.
|
|
* (roughly 1-2% worse compression for large blocks and
|
|
* 9-10% for small, redundant, blocks and >>20% better speed in both cases)
|
|
* In short: when in need for speed, enable this for binary data,
|
|
* possibly disable this for text data.
|
|
*/
|
|
#ifndef ULTRA_FAST
|
|
# define ULTRA_FAST 0
|
|
#endif
|
|
|
|
/*
|
|
* Unconditionally aligning does not cost very much, so do it if unsure
|
|
*/
|
|
#ifndef STRICT_ALIGN
|
|
# if !(defined(__i386) || defined (__amd64))
|
|
# define STRICT_ALIGN 1
|
|
# else
|
|
# define STRICT_ALIGN 0
|
|
# endif
|
|
#endif
|
|
|
|
/*
|
|
* You may choose to pre-set the hash table (might be faster on some
|
|
* modern cpus and large (>>64k) blocks, and also makes compression
|
|
* deterministic/repeatable when the configuration otherwise is the same).
|
|
*/
|
|
#ifndef INIT_HTAB
|
|
# define INIT_HTAB 0
|
|
#endif
|
|
|
|
/*
|
|
* Avoid assigning values to errno variable? for some embedding purposes
|
|
* (linux kernel for example), this is necessary. NOTE: this breaks
|
|
* the documentation in lzf.h. Avoiding errno has no speed impact.
|
|
*/
|
|
#ifndef AVOID_ERRNO
|
|
# define AVOID_ERRNO 0
|
|
#endif
|
|
|
|
/*
|
|
* Whether to pass the LZF_STATE variable as argument, or allocate it
|
|
* on the stack. For small-stack environments, define this to 1.
|
|
* NOTE: this breaks the prototype in lzf.h.
|
|
*/
|
|
#ifndef LZF_STATE_ARG
|
|
# define LZF_STATE_ARG 0
|
|
#endif
|
|
|
|
/*
|
|
* Whether to add extra checks for input validity in lzf_decompress
|
|
* and return EINVAL if the input stream has been corrupted. This
|
|
* only shields against overflowing the input buffer and will not
|
|
* detect most corrupted streams.
|
|
* This check is not normally noticeable on modern hardware
|
|
* (<1% slowdown), but might slow down older cpus considerably.
|
|
*/
|
|
#ifndef CHECK_INPUT
|
|
# define CHECK_INPUT 1
|
|
#endif
|
|
|
|
/*
|
|
* Whether to store pointers or offsets inside the hash table. On
|
|
* 64 bit architectures, pointers take up twice as much space,
|
|
* and might also be slower. Default is to autodetect.
|
|
* Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'
|
|
* not being able to store offset above UINT32_MAX in 64bit. */
|
|
#define LZF_USE_OFFSETS 0
|
|
|
|
/*****************************************************************************/
|
|
/* nothing should be changed below */
|
|
|
|
#ifdef __cplusplus
|
|
# include <cstring>
|
|
# include <climits>
|
|
using namespace std;
|
|
#else
|
|
# include <string.h>
|
|
# include <limits.h>
|
|
#endif
|
|
|
|
#ifndef LZF_USE_OFFSETS
|
|
# if defined (WIN32)
|
|
# define LZF_USE_OFFSETS defined(_M_X64)
|
|
# else
|
|
# if __cplusplus > 199711L
|
|
# include <cstdint>
|
|
# else
|
|
# include <stdint.h>
|
|
# endif
|
|
# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
|
|
# endif
|
|
#endif
|
|
|
|
typedef unsigned char u8;
|
|
|
|
#if LZF_USE_OFFSETS
|
|
# define LZF_HSLOT_BIAS ((const u8 *)in_data)
|
|
typedef unsigned int LZF_HSLOT;
|
|
#else
|
|
# define LZF_HSLOT_BIAS 0
|
|
typedef const u8 *LZF_HSLOT;
|
|
#endif
|
|
|
|
typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
|
|
|
|
#if !STRICT_ALIGN
|
|
/* for unaligned accesses we need a 16 bit datatype. */
|
|
# if USHRT_MAX == 65535
|
|
typedef unsigned short u16;
|
|
# elif UINT_MAX == 65535
|
|
typedef unsigned int u16;
|
|
# else
|
|
# undef STRICT_ALIGN
|
|
# define STRICT_ALIGN 1
|
|
# endif
|
|
#endif
|
|
|
|
#if ULTRA_FAST
|
|
# undef VERY_FAST
|
|
#endif
|
|
|
|
#endif
|
|
|