redict/src/lzfP.h
sundb 985430b4fc
Change lzf to handle values larger than UINT32_MAX (#9776)
Redis supports inserting data over 4GB into string (and recently for lists too, see #9357),
But LZF compression used in RDB files (see `rdbcompression` config), and in quicklist
(see `list-compress-depth` config) does not support compress/decompress data over
UINT32_MAX, which will result in corrupting the rdb after compression.

Internal changes:
1. Modify the `unsigned int` parameter of `lzf_compress/lzf_decompress` to `size_t`.
2. Modify the variable types in `lzf_compress` involving offsets and lengths to `size_t`.
3. Set LZF_USE_OFFSETS to 0.
    When LZF_USE_OFFSETS is 1, lzf store offset into `LZF_HSLOT`(32bit). 
    Even in 64-bit, `LZF_USE_OFFSETS` defaults to 1, because lzf assumes that it only
    compresses and decompresses data smaller than UINT32_MAX.
    But now we need to make lzf support 64-bit, turning on `LZF_USE_OFFSETS` will make
    it impossible to store 64-bit offsets or pointers.
    BTW, disable LZF_USE_OFFSETS also brings a few performance improvements.

Tests:
1. Add test for compress/decompress string large than UINT32_MAX.
2. Add unittest for compress/decompress quicklistNode.
2021-11-16 13:12:25 +02:00

191 lines
5.8 KiB
C++

/*
* Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License ("GPL") version 2 or any later version,
* in which case the provisions of the GPL are applicable instead of
* the above. If you wish to allow the use of your version of this file
* only under the terms of the GPL and not to allow others to use your
* version of this file under the BSD license, indicate your decision
* by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL. If you do not delete the
* provisions above, a recipient may use your version of this file under
* either the BSD or the GPL.
*/
#ifndef LZFP_h
#define LZFP_h
#define STANDALONE 1 /* at the moment, this is ok. */
#ifndef STANDALONE
# include "lzf.h"
#endif
/*
* Size of hashtable is (1 << HLOG) * sizeof (char *)
* decompression is independent of the hash table size
* the difference between 15 and 14 is very small
* for small blocks (and 14 is usually a bit faster).
* For a low-memory/faster configuration, use HLOG == 13;
* For best compression, use 15 or 16 (or more, up to 22).
*/
#ifndef HLOG
# define HLOG 16
#endif
/*
* Sacrifice very little compression quality in favour of compression speed.
* This gives almost the same compression as the default code, and is
* (very roughly) 15% faster. This is the preferred mode of operation.
*/
#ifndef VERY_FAST
# define VERY_FAST 1
#endif
/*
* Sacrifice some more compression quality in favour of compression speed.
* (roughly 1-2% worse compression for large blocks and
* 9-10% for small, redundant, blocks and >>20% better speed in both cases)
* In short: when in need for speed, enable this for binary data,
* possibly disable this for text data.
*/
#ifndef ULTRA_FAST
# define ULTRA_FAST 0
#endif
/*
* Unconditionally aligning does not cost very much, so do it if unsure
*/
#ifndef STRICT_ALIGN
# if !(defined(__i386) || defined (__amd64))
# define STRICT_ALIGN 1
# else
# define STRICT_ALIGN 0
# endif
#endif
/*
* You may choose to pre-set the hash table (might be faster on some
* modern cpus and large (>>64k) blocks, and also makes compression
* deterministic/repeatable when the configuration otherwise is the same).
*/
#ifndef INIT_HTAB
# define INIT_HTAB 0
#endif
/*
* Avoid assigning values to errno variable? for some embedding purposes
* (linux kernel for example), this is necessary. NOTE: this breaks
* the documentation in lzf.h. Avoiding errno has no speed impact.
*/
#ifndef AVOID_ERRNO
# define AVOID_ERRNO 0
#endif
/*
* Whether to pass the LZF_STATE variable as argument, or allocate it
* on the stack. For small-stack environments, define this to 1.
* NOTE: this breaks the prototype in lzf.h.
*/
#ifndef LZF_STATE_ARG
# define LZF_STATE_ARG 0
#endif
/*
* Whether to add extra checks for input validity in lzf_decompress
* and return EINVAL if the input stream has been corrupted. This
* only shields against overflowing the input buffer and will not
* detect most corrupted streams.
* This check is not normally noticeable on modern hardware
* (<1% slowdown), but might slow down older cpus considerably.
*/
#ifndef CHECK_INPUT
# define CHECK_INPUT 1
#endif
/*
* Whether to store pointers or offsets inside the hash table. On
* 64 bit architectures, pointers take up twice as much space,
* and might also be slower. Default is to autodetect.
* Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'
* not being able to store offset above UINT32_MAX in 64bit. */
#define LZF_USE_OFFSETS 0
/*****************************************************************************/
/* nothing should be changed below */
#ifdef __cplusplus
# include <cstring>
# include <climits>
using namespace std;
#else
# include <string.h>
# include <limits.h>
#endif
#ifndef LZF_USE_OFFSETS
# if defined (WIN32)
# define LZF_USE_OFFSETS defined(_M_X64)
# else
# if __cplusplus > 199711L
# include <cstdint>
# else
# include <stdint.h>
# endif
# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
# endif
#endif
typedef unsigned char u8;
#if LZF_USE_OFFSETS
# define LZF_HSLOT_BIAS ((const u8 *)in_data)
typedef unsigned int LZF_HSLOT;
#else
# define LZF_HSLOT_BIAS 0
typedef const u8 *LZF_HSLOT;
#endif
typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
#if !STRICT_ALIGN
/* for unaligned accesses we need a 16 bit datatype. */
# if USHRT_MAX == 65535
typedef unsigned short u16;
# elif UINT_MAX == 65535
typedef unsigned int u16;
# else
# undef STRICT_ALIGN
# define STRICT_ALIGN 1
# endif
#endif
#if ULTRA_FAST
# undef VERY_FAST
#endif
#endif