2024-03-21 09:30:47 -04:00
|
|
|
# SPDX-FileCopyrightText: 2024 Redict Contributors
|
|
|
|
# SPDX-FileCopyrightText: 2024 Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
#
|
|
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
2024-03-21 15:11:44 -04:00
|
|
|
# SPDX-License-Identifier: LGPL-3.0-only
|
2024-03-21 09:30:47 -04:00
|
|
|
|
2010-06-02 17:14:55 -04:00
|
|
|
start_server {tags {"expire"}} {
|
2010-08-03 07:08:32 -04:00
|
|
|
test {EXPIRE - set timeouts multiple times} {
|
2010-05-14 11:31:11 -04:00
|
|
|
r set x foobar
|
|
|
|
set v1 [r expire x 5]
|
|
|
|
set v2 [r ttl x]
|
|
|
|
set v3 [r expire x 10]
|
|
|
|
set v4 [r ttl x]
|
2011-11-11 09:11:50 -05:00
|
|
|
r expire x 2
|
2010-05-14 11:31:11 -04:00
|
|
|
list $v1 $v2 $v3 $v4
|
2011-07-11 09:58:31 -04:00
|
|
|
} {1 [45] 1 10}
|
2010-05-14 11:31:11 -04:00
|
|
|
|
|
|
|
test {EXPIRE - It should be still possible to read 'x'} {
|
|
|
|
r get x
|
|
|
|
} {foobar}
|
|
|
|
|
2010-06-02 18:16:10 -04:00
|
|
|
tags {"slow"} {
|
2011-11-11 09:11:50 -05:00
|
|
|
test {EXPIRE - After 2.1 seconds the key should no longer be here} {
|
|
|
|
after 2100
|
2010-06-02 18:16:10 -04:00
|
|
|
list [r get x] [r exists x]
|
|
|
|
} {{} 0}
|
|
|
|
}
|
2010-05-14 11:31:11 -04:00
|
|
|
|
2010-08-03 07:08:32 -04:00
|
|
|
test {EXPIRE - write on expire should work} {
|
2010-05-14 11:31:11 -04:00
|
|
|
r del x
|
|
|
|
r lpush x foo
|
|
|
|
r expire x 1000
|
|
|
|
r lpush x bar
|
|
|
|
r lrange x 0 -1
|
2010-08-03 07:08:32 -04:00
|
|
|
} {bar foo}
|
2010-05-14 11:31:11 -04:00
|
|
|
|
|
|
|
test {EXPIREAT - Check for EXPIRE alike behavior} {
|
|
|
|
r del x
|
|
|
|
r set x foo
|
|
|
|
r expireat x [expr [clock seconds]+15]
|
|
|
|
r ttl x
|
|
|
|
} {1[345]}
|
|
|
|
|
|
|
|
test {SETEX - Set + Expire combo operation. Check for TTL} {
|
|
|
|
r setex x 12 test
|
|
|
|
r ttl x
|
|
|
|
} {1[012]}
|
|
|
|
|
|
|
|
test {SETEX - Check value} {
|
|
|
|
r get x
|
|
|
|
} {test}
|
|
|
|
|
|
|
|
test {SETEX - Overwrite old key} {
|
|
|
|
r setex y 1 foo
|
|
|
|
r get y
|
|
|
|
} {foo}
|
|
|
|
|
2010-06-02 18:26:39 -04:00
|
|
|
tags {"slow"} {
|
|
|
|
test {SETEX - Wait for the key to expire} {
|
2011-11-11 09:11:50 -05:00
|
|
|
after 1100
|
2010-06-02 18:26:39 -04:00
|
|
|
r get y
|
|
|
|
} {}
|
|
|
|
}
|
2010-05-14 11:31:11 -04:00
|
|
|
|
|
|
|
test {SETEX - Wrong time parameter} {
|
|
|
|
catch {r setex z -10 foo} e
|
|
|
|
set _ $e
|
|
|
|
} {*invalid expire*}
|
2010-08-03 08:25:22 -04:00
|
|
|
|
|
|
|
test {PERSIST can undo an EXPIRE} {
|
|
|
|
r set x foo
|
|
|
|
r expire x 50
|
|
|
|
list [r ttl x] [r persist x] [r ttl x] [r get x]
|
|
|
|
} {50 1 -1 foo}
|
|
|
|
|
|
|
|
test {PERSIST returns 0 against non existing or non volatile keys} {
|
|
|
|
r set x foo
|
|
|
|
list [r persist foo] [r persist nokeyatall]
|
|
|
|
} {0 0}
|
2011-11-11 09:11:50 -05:00
|
|
|
|
Squash merging 125 typo/grammar/comment/doc PRs (#7773)
List of squashed commits or PRs
===============================
commit 66801ea
Author: hwware <wen.hui.ware@gmail.com>
Date: Mon Jan 13 00:54:31 2020 -0500
typo fix in acl.c
commit 46f55db
Author: Itamar Haber <itamar@redislabs.com>
Date: Sun Sep 6 18:24:11 2020 +0300
Updates a couple of comments
Specifically:
* RM_AutoMemory completed instead of pointing to docs
* Updated link to custom type doc
commit 61a2aa0
Author: xindoo <xindoo@qq.com>
Date: Tue Sep 1 19:24:59 2020 +0800
Correct errors in code comments
commit a5871d1
Author: yz1509 <pro-756@qq.com>
Date: Tue Sep 1 18:36:06 2020 +0800
fix typos in module.c
commit 41eede7
Author: bookug <bookug@qq.com>
Date: Sat Aug 15 01:11:33 2020 +0800
docs: fix typos in comments
commit c303c84
Author: lazy-snail <ws.niu@outlook.com>
Date: Fri Aug 7 11:15:44 2020 +0800
fix spelling in redis.conf
commit 1eb76bf
Author: zhujian <zhujianxyz@gmail.com>
Date: Thu Aug 6 15:22:10 2020 +0800
add a missing 'n' in comment
commit 1530ec2
Author: Daniel Dai <764122422@qq.com>
Date: Mon Jul 27 00:46:35 2020 -0400
fix spelling in tracking.c
commit e517b31
Author: Hunter-Chen <huntcool001@gmail.com>
Date: Fri Jul 17 22:33:32 2020 +0800
Update redis.conf
Co-authored-by: Itamar Haber <itamar@redislabs.com>
commit c300eff
Author: Hunter-Chen <huntcool001@gmail.com>
Date: Fri Jul 17 22:33:23 2020 +0800
Update redis.conf
Co-authored-by: Itamar Haber <itamar@redislabs.com>
commit 4c058a8
Author: 陈浩鹏 <chenhaopeng@heytea.com>
Date: Thu Jun 25 19:00:56 2020 +0800
Grammar fix and clarification
commit 5fcaa81
Author: bodong.ybd <bodong.ybd@alibaba-inc.com>
Date: Fri Jun 19 10:09:00 2020 +0800
Fix typos
commit 4caca9a
Author: Pruthvi P <pruthvi@ixigo.com>
Date: Fri May 22 00:33:22 2020 +0530
Fix typo eviciton => eviction
commit b2a25f6
Author: Brad Dunbar <dunbarb2@gmail.com>
Date: Sun May 17 12:39:59 2020 -0400
Fix a typo.
commit 12842ae
Author: hwware <wen.hui.ware@gmail.com>
Date: Sun May 3 17:16:59 2020 -0400
fix spelling in redis conf
commit ddba07c
Author: Chris Lamb <chris@chris-lamb.co.uk>
Date: Sat May 2 23:25:34 2020 +0100
Correct a "conflicts" spelling error.
commit 8fc7bf2
Author: Nao YONASHIRO <yonashiro@r.recruit.co.jp>
Date: Thu Apr 30 10:25:27 2020 +0900
docs: fix EXPIRE_FAST_CYCLE_DURATION to ACTIVE_EXPIRE_CYCLE_FAST_DURATION
commit 9b2b67a
Author: Brad Dunbar <dunbarb2@gmail.com>
Date: Fri Apr 24 11:46:22 2020 -0400
Fix a typo.
commit 0746f10
Author: devilinrust <63737265+devilinrust@users.noreply.github.com>
Date: Thu Apr 16 00:17:53 2020 +0200
Fix typos in server.c
commit 92b588d
Author: benjessop12 <56115861+benjessop12@users.noreply.github.com>
Date: Mon Apr 13 13:43:55 2020 +0100
Fix spelling mistake in lazyfree.c
commit 1da37aa
Merge: 2d4ba28 af347a8
Author: hwware <wen.hui.ware@gmail.com>
Date: Thu Mar 5 22:41:31 2020 -0500
Merge remote-tracking branch 'upstream/unstable' into expiretypofix
commit 2d4ba28
Author: hwware <wen.hui.ware@gmail.com>
Date: Mon Mar 2 00:09:40 2020 -0500
fix typo in expire.c
commit 1a746f7
Author: SennoYuki <minakami1yuki@gmail.com>
Date: Thu Feb 27 16:54:32 2020 +0800
fix typo
commit 8599b1a
Author: dongheejeong <donghee950403@gmail.com>
Date: Sun Feb 16 20:31:43 2020 +0000
Fix typo in server.c
commit f38d4e8
Author: hwware <wen.hui.ware@gmail.com>
Date: Sun Feb 2 22:58:38 2020 -0500
fix typo in evict.c
commit fe143fc
Author: Leo Murillo <leonardo.murillo@gmail.com>
Date: Sun Feb 2 01:57:22 2020 -0600
Fix a few typos in redis.conf
commit 1ab4d21
Author: viraja1 <anchan.viraj@gmail.com>
Date: Fri Dec 27 17:15:58 2019 +0530
Fix typo in Latency API docstring
commit ca1f70e
Author: gosth <danxuedexing@qq.com>
Date: Wed Dec 18 15:18:02 2019 +0800
fix typo in sort.c
commit a57c06b
Author: ZYunH <zyunhjob@163.com>
Date: Mon Dec 16 22:28:46 2019 +0800
fix-zset-typo
commit b8c92b5
Author: git-hulk <hulk.website@gmail.com>
Date: Mon Dec 16 15:51:42 2019 +0800
FIX: typo in cluster.c, onformation->information
commit 9dd981c
Author: wujm2007 <jim.wujm@gmail.com>
Date: Mon Dec 16 09:37:52 2019 +0800
Fix typo
commit e132d7a
Author: Sebastien Williams-Wynn <s.williamswynn.mail@gmail.com>
Date: Fri Nov 15 00:14:07 2019 +0000
Minor typo change
commit 47f44d5
Author: happynote3966 <01ssrmikururudevice01@gmail.com>
Date: Mon Nov 11 22:08:48 2019 +0900
fix comment typo in redis-cli.c
commit b8bdb0d
Author: fulei <fulei@kuaishou.com>
Date: Wed Oct 16 18:00:17 2019 +0800
Fix a spelling mistake of comments in defragDictBucketCallback
commit 0def46a
Author: fulei <fulei@kuaishou.com>
Date: Wed Oct 16 13:09:27 2019 +0800
fix some spelling mistakes of comments in defrag.c
commit f3596fd
Author: Phil Rajchgot <tophil@outlook.com>
Date: Sun Oct 13 02:02:32 2019 -0400
Typo and grammar fixes
Redis and its documentation are great -- just wanted to submit a few corrections in the spirit of Hacktoberfest. Thanks for all your work on this project. I use it all the time and it works beautifully.
commit 2b928cd
Author: KangZhiDong <worldkzd@gmail.com>
Date: Sun Sep 1 07:03:11 2019 +0800
fix typos
commit 33aea14
Author: Axlgrep <axlgrep@gmail.com>
Date: Tue Aug 27 11:02:18 2019 +0800
Fixed eviction spelling issues
commit e282a80
Author: Simen Flatby <simen@oms.no>
Date: Tue Aug 20 15:25:51 2019 +0200
Update comments to reflect prop name
In the comments the prop is referenced as replica-validity-factor,
but it is really named cluster-replica-validity-factor.
commit 74d1f9a
Author: Jim Green <jimgreen2013@qq.com>
Date: Tue Aug 20 20:00:31 2019 +0800
fix comment error, the code is ok
commit eea1407
Author: Liao Tonglang <liaotonglang@gmail.com>
Date: Fri May 31 10:16:18 2019 +0800
typo fix
fix cna't to can't
commit 0da553c
Author: KAWACHI Takashi <tkawachi@gmail.com>
Date: Wed Jul 17 00:38:16 2019 +0900
Fix typo
commit 7fc8fb6
Author: Michael Prokop <mika@grml.org>
Date: Tue May 28 17:58:42 2019 +0200
Typo fixes
s/familar/familiar/
s/compatiblity/compatibility/
s/ ot / to /
s/itsef/itself/
commit 5f46c9d
Author: zhumoing <34539422+zhumoing@users.noreply.github.com>
Date: Tue May 21 21:16:50 2019 +0800
typo-fixes
typo-fixes
commit 321dfe1
Author: wxisme <850885154@qq.com>
Date: Sat Mar 16 15:10:55 2019 +0800
typo fix
commit b4fb131
Merge: 267e0e6 3df1eb8
Author: Nikitas Bastas <nikitasbst@gmail.com>
Date: Fri Feb 8 22:55:45 2019 +0200
Merge branch 'unstable' of antirez/redis into unstable
commit 267e0e6
Author: Nikitas Bastas <nikitasbst@gmail.com>
Date: Wed Jan 30 21:26:04 2019 +0200
Minor typo fix
commit 30544e7
Author: inshal96 <39904558+inshal96@users.noreply.github.com>
Date: Fri Jan 4 16:54:50 2019 +0500
remove an extra 'a' in the comments
commit 337969d
Author: BrotherGao <yangdongheng11@gmail.com>
Date: Sat Dec 29 12:37:29 2018 +0800
fix typo in redis.conf
commit 9f4b121
Merge: 423a030 e504583
Author: BrotherGao <yangdongheng@xiaomi.com>
Date: Sat Dec 29 11:41:12 2018 +0800
Merge branch 'unstable' of antirez/redis into unstable
commit 423a030
Merge: 42b02b7 46a51cd
Author: 杨东衡 <yangdongheng@xiaomi.com>
Date: Tue Dec 4 23:56:11 2018 +0800
Merge branch 'unstable' of antirez/redis into unstable
commit 42b02b7
Merge: 68c0e6e b8febe6
Author: Dongheng Yang <yangdongheng11@gmail.com>
Date: Sun Oct 28 15:54:23 2018 +0800
Merge pull request #1 from antirez/unstable
update local data
commit 714b589
Author: Christian <crifei93@gmail.com>
Date: Fri Dec 28 01:17:26 2018 +0100
fix typo "resulution"
commit e23259d
Author: garenchan <1412950785@qq.com>
Date: Wed Dec 26 09:58:35 2018 +0800
fix typo: segfauls -> segfault
commit a9359f8
Author: xjp <jianping_xie@aliyun.com>
Date: Tue Dec 18 17:31:44 2018 +0800
Fixed REDISMODULE_H spell bug
commit a12c3e4
Author: jdiaz <jrd.palacios@gmail.com>
Date: Sat Dec 15 23:39:52 2018 -0600
Fixes hyperloglog hash function comment block description
commit 770eb11
Author: 林上耀 <1210tom@163.com>
Date: Sun Nov 25 17:16:10 2018 +0800
fix typo
commit fd97fbb
Author: Chris Lamb <chris@chris-lamb.co.uk>
Date: Fri Nov 23 17:14:01 2018 +0100
Correct "unsupported" typo.
commit a85522d
Author: Jungnam Lee <jungnam.lee@oracle.com>
Date: Thu Nov 8 23:01:29 2018 +0900
fix typo in test comments
commit ade8007
Author: Arun Kumar <palerdot@users.noreply.github.com>
Date: Tue Oct 23 16:56:35 2018 +0530
Fixed grammatical typo
Fixed typo for word 'dictionary'
commit 869ee39
Author: Hamid Alaei <hamid.a85@gmail.com>
Date: Sun Aug 12 16:40:02 2018 +0430
fix documentations: (ThreadSafeContextStart/Stop -> ThreadSafeContextLock/Unlock), minor typo
commit f89d158
Author: Mayank Jain <mayankjain255@gmail.com>
Date: Tue Jul 31 23:01:21 2018 +0530
Updated README.md with some spelling corrections.
Made correction in spelling of some misspelled words.
commit 892198e
Author: dsomeshwar <someshwar.dhayalan@gmail.com>
Date: Sat Jul 21 23:23:04 2018 +0530
typo fix
commit 8a4d780
Author: Itamar Haber <itamar@redislabs.com>
Date: Mon Apr 30 02:06:52 2018 +0300
Fixes some typos
commit e3acef6
Author: Noah Rosamilia <ivoahivoah@gmail.com>
Date: Sat Mar 3 23:41:21 2018 -0500
Fix typo in /deps/README.md
commit 04442fb
Author: WuYunlong <xzsyeb@126.com>
Date: Sat Mar 3 10:32:42 2018 +0800
Fix typo in readSyncBulkPayload() comment.
commit 9f36880
Author: WuYunlong <xzsyeb@126.com>
Date: Sat Mar 3 10:20:37 2018 +0800
replication.c comment: run_id -> replid.
commit f866b4a
Author: Francesco 'makevoid' Canessa <makevoid@gmail.com>
Date: Thu Feb 22 22:01:56 2018 +0000
fix comment typo in server.c
commit 0ebc69b
Author: 줍 <jubee0124@gmail.com>
Date: Mon Feb 12 16:38:48 2018 +0900
Fix typo in redis.conf
Fix `five behaviors` to `eight behaviors` in [this sentence ](antirez/redis@unstable/redis.conf#L564)
commit b50a620
Author: martinbroadhurst <martinbroadhurst@users.noreply.github.com>
Date: Thu Dec 28 12:07:30 2017 +0000
Fix typo in valgrind.sup
commit 7d8f349
Author: Peter Boughton <peter@sorcerersisle.com>
Date: Mon Nov 27 19:52:19 2017 +0000
Update CONTRIBUTING; refer doc updates to redis-doc repo.
commit 02dec7e
Author: Klauswk <klauswk1@hotmail.com>
Date: Tue Oct 24 16:18:38 2017 -0200
Fix typo in comment
commit e1efbc8
Author: chenshi <baiwfg2@gmail.com>
Date: Tue Oct 3 18:26:30 2017 +0800
Correct two spelling errors of comments
commit 93327d8
Author: spacewander <spacewanderlzx@gmail.com>
Date: Wed Sep 13 16:47:24 2017 +0800
Update the comment for OBJ_ENCODING_EMBSTR_SIZE_LIMIT's value
The value of OBJ_ENCODING_EMBSTR_SIZE_LIMIT is 44 now instead of 39.
commit 63d361f
Author: spacewander <spacewanderlzx@gmail.com>
Date: Tue Sep 12 15:06:42 2017 +0800
Fix <prevlen> related doc in ziplist.c
According to the definition of ZIP_BIG_PREVLEN and other related code,
the guard of single byte <prevlen> should be 254 instead of 255.
commit ebe228d
Author: hanael80 <hanael80@gmail.com>
Date: Tue Aug 15 09:09:40 2017 +0900
Fix typo
commit 6b696e6
Author: Matt Robenolt <matt@ydekproductions.com>
Date: Mon Aug 14 14:50:47 2017 -0700
Fix typo in LATENCY DOCTOR output
commit a2ec6ae
Author: caosiyang <caosiyang@qiyi.com>
Date: Tue Aug 15 14:15:16 2017 +0800
Fix a typo: form => from
commit 3ab7699
Author: caosiyang <caosiyang@qiyi.com>
Date: Thu Aug 10 18:40:33 2017 +0800
Fix a typo: replicationFeedSlavesFromMaster() => replicationFeedSlavesFromMasterStream()
commit 72d43ef
Author: caosiyang <caosiyang@qiyi.com>
Date: Tue Aug 8 15:57:25 2017 +0800
fix a typo: servewr => server
commit 707c958
Author: Bo Cai <charpty@gmail.com>
Date: Wed Jul 26 21:49:42 2017 +0800
redis-cli.c typo: conut -> count.
Signed-off-by: Bo Cai <charpty@gmail.com>
commit b9385b2
Author: JackDrogon <jack.xsuperman@gmail.com>
Date: Fri Jun 30 14:22:31 2017 +0800
Fix some spell problems
commit 20d9230
Author: akosel <aaronjkosel@gmail.com>
Date: Sun Jun 4 19:35:13 2017 -0500
Fix typo
commit b167bfc
Author: Krzysiek Witkowicz <krzysiekwitkowicz@gmail.com>
Date: Mon May 22 21:32:27 2017 +0100
Fix #4008 small typo in comment
commit 2b78ac8
Author: Jake Clarkson <jacobwclarkson@gmail.com>
Date: Wed Apr 26 15:49:50 2017 +0100
Correct typo in tests/unit/hyperloglog.tcl
commit b0f1cdb
Author: Qi Luo <qiluo-msft@users.noreply.github.com>
Date: Wed Apr 19 14:25:18 2017 -0700
Fix typo
commit a90b0f9
Author: charsyam <charsyam@naver.com>
Date: Thu Mar 16 18:19:53 2017 +0900
fix typos
fix typos
fix typos
commit 8430a79
Author: Richard Hart <richardhart92@gmail.com>
Date: Mon Mar 13 22:17:41 2017 -0400
Fixed log message typo in listenToPort.
commit 481a1c2
Author: Vinod Kumar <kumar003vinod@gmail.com>
Date: Sun Jan 15 23:04:51 2017 +0530
src/db.c: Correct "save" -> "safe" typo
commit 586b4d3
Author: wangshaonan <wshn13@gmail.com>
Date: Wed Dec 21 20:28:27 2016 +0800
Fix typo they->the in helloworld.c
commit c1c4b5e
Author: Jenner <hypxm@qq.com>
Date: Mon Dec 19 16:39:46 2016 +0800
typo error
commit 1ee1a3f
Author: tielei <43289893@qq.com>
Date: Mon Jul 18 13:52:25 2016 +0800
fix some comments
commit 11a41fb
Author: Otto Kekäläinen <otto@seravo.fi>
Date: Sun Jul 3 10:23:55 2016 +0100
Fix spelling in documentation and comments
commit 5fb5d82
Author: francischan <f1ancis621@gmail.com>
Date: Tue Jun 28 00:19:33 2016 +0800
Fix outdated comments about redis.c file.
It should now refer to server.c file.
commit 6b254bc
Author: lmatt-bit <lmatt123n@gmail.com>
Date: Thu Apr 21 21:45:58 2016 +0800
Refine the comment of dictRehashMilliseconds func
SLAVECONF->REPLCONF in comment - by andyli029
commit ee9869f
Author: clark.kang <charsyam@naver.com>
Date: Tue Mar 22 11:09:51 2016 +0900
fix typos
commit f7b3b11
Author: Harisankar H <harisankarh@gmail.com>
Date: Wed Mar 9 11:49:42 2016 +0530
Typo correction: "faield" --> "failed"
Typo correction: "faield" --> "failed"
commit 3fd40fc
Author: Itamar Haber <itamar@redislabs.com>
Date: Thu Feb 25 10:31:51 2016 +0200
Fixes a typo in comments
commit 621c160
Author: Prayag Verma <prayag.verma@gmail.com>
Date: Mon Feb 1 12:36:20 2016 +0530
Fix typo in Readme.md
Spelling mistakes -
`eviciton` > `eviction`
`familar` > `familiar`
commit d7d07d6
Author: WonCheol Lee <toctoc21c@gmail.com>
Date: Wed Dec 30 15:11:34 2015 +0900
Typo fixed
commit a4dade7
Author: Felix Bünemann <buenemann@louis.info>
Date: Mon Dec 28 11:02:55 2015 +0100
[ci skip] Improve supervised upstart config docs
This mentions that "expect stop" is required for supervised upstart
to work correctly. See http://upstart.ubuntu.com/cookbook/#expect-stop
for an explanation.
commit d9caba9
Author: daurnimator <quae@daurnimator.com>
Date: Mon Dec 21 18:30:03 2015 +1100
README: Remove trailing whitespace
commit 72d42e5
Author: daurnimator <quae@daurnimator.com>
Date: Mon Dec 21 18:29:32 2015 +1100
README: Fix typo. th => the
commit dd6e957
Author: daurnimator <quae@daurnimator.com>
Date: Mon Dec 21 18:29:20 2015 +1100
README: Fix typo. familar => familiar
commit 3a12b23
Author: daurnimator <quae@daurnimator.com>
Date: Mon Dec 21 18:28:54 2015 +1100
README: Fix typo. eviciton => eviction
commit 2d1d03b
Author: daurnimator <quae@daurnimator.com>
Date: Mon Dec 21 18:21:45 2015 +1100
README: Fix typo. sever => server
commit 3973b06
Author: Itamar Haber <itamar@garantiadata.com>
Date: Sat Dec 19 17:01:20 2015 +0200
Typo fix
commit 4f2e460
Author: Steve Gao <fu@2token.com>
Date: Fri Dec 4 10:22:05 2015 +0800
Update README - fix typos
commit b21667c
Author: binyan <binbin.yan@nokia.com>
Date: Wed Dec 2 22:48:37 2015 +0800
delete redundancy color judge in sdscatcolor
commit 88894c7
Author: binyan <binbin.yan@nokia.com>
Date: Wed Dec 2 22:14:42 2015 +0800
the example output shoule be HelloWorld
commit 2763470
Author: binyan <binbin.yan@nokia.com>
Date: Wed Dec 2 17:41:39 2015 +0800
modify error word keyevente
Signed-off-by: binyan <binbin.yan@nokia.com>
commit 0847b3d
Author: Bruno Martins <bscmartins@gmail.com>
Date: Wed Nov 4 11:37:01 2015 +0000
typo
commit bbb9e9e
Author: dawedawe <dawedawe@gmx.de>
Date: Fri Mar 27 00:46:41 2015 +0100
typo: zimap -> zipmap
commit 5ed297e
Author: Axel Advento <badwolf.bloodseeker.rev@gmail.com>
Date: Tue Mar 3 15:58:29 2015 +0800
Fix 'salve' typos to 'slave'
commit edec9d6
Author: LudwikJaniuk <ludvig.janiuk@gmail.com>
Date: Wed Jun 12 14:12:47 2019 +0200
Update README.md
Co-Authored-By: Qix <Qix-@users.noreply.github.com>
commit 692a7af
Author: LudwikJaniuk <ludvig.janiuk@gmail.com>
Date: Tue May 28 14:32:04 2019 +0200
grammar
commit d962b0a
Author: Nick Frost <nickfrostatx@gmail.com>
Date: Wed Jul 20 15:17:12 2016 -0700
Minor grammar fix
commit 24fff01aaccaf5956973ada8c50ceb1462e211c6 (typos)
Author: Chad Miller <chadm@squareup.com>
Date: Tue Sep 8 13:46:11 2020 -0400
Fix faulty comment about operation of unlink()
commit 3cd5c1f3326c52aa552ada7ec797c6bb16452355
Author: Kevin <kevin.xgr@gmail.com>
Date: Wed Nov 20 00:13:50 2019 +0800
Fix typo in server.c.
From a83af59 Mon Sep 17 00:00:00 2001
From: wuwo <wuwo@wacai.com>
Date: Fri, 17 Mar 2017 20:37:45 +0800
Subject: [PATCH] falure to failure
From c961896 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B7=A6=E6=87=B6?= <veficos@gmail.com>
Date: Sat, 27 May 2017 15:33:04 +0800
Subject: [PATCH] fix typo
From e600ef2 Mon Sep 17 00:00:00 2001
From: "rui.zou" <rui.zou@yunify.com>
Date: Sat, 30 Sep 2017 12:38:15 +0800
Subject: [PATCH] fix a typo
From c7d07fa Mon Sep 17 00:00:00 2001
From: Alexandre Perrin <alex@kaworu.ch>
Date: Thu, 16 Aug 2018 10:35:31 +0200
Subject: [PATCH] deps README.md typo
From b25cb67 Mon Sep 17 00:00:00 2001
From: Guy Korland <gkorland@gmail.com>
Date: Wed, 26 Sep 2018 10:55:37 +0300
Subject: [PATCH 1/2] fix typos in header
From ad28ca6 Mon Sep 17 00:00:00 2001
From: Guy Korland <gkorland@gmail.com>
Date: Wed, 26 Sep 2018 11:02:36 +0300
Subject: [PATCH 2/2] fix typos
commit 34924cdedd8552466fc22c1168d49236cb7ee915
Author: Adrian Lynch <adi_ady_ade@hotmail.com>
Date: Sat Apr 4 21:59:15 2015 +0100
Typos fixed
commit fd2a1e7
Author: Jan <jsteemann@users.noreply.github.com>
Date: Sat Oct 27 19:13:01 2018 +0200
Fix typos
Fix typos
commit e14e47c1a234b53b0e103c5f6a1c61481cbcbb02
Author: Andy Lester <andy@petdance.com>
Date: Fri Aug 2 22:30:07 2019 -0500
Fix multiple misspellings of "following"
commit 79b948ce2dac6b453fe80995abbcaac04c213d5a
Author: Andy Lester <andy@petdance.com>
Date: Fri Aug 2 22:24:28 2019 -0500
Fix misspelling of create-cluster
commit 1fffde52666dc99ab35efbd31071a4c008cb5a71
Author: Andy Lester <andy@petdance.com>
Date: Wed Jul 31 17:57:56 2019 -0500
Fix typos
commit 204c9ba9651e9e05fd73936b452b9a30be456cfe
Author: Xiaobo Zhu <xiaobo.zhu@shopee.com>
Date: Tue Aug 13 22:19:25 2019 +0800
fix typos
Squashed commit of the following:
commit 1d9aaf8
Author: danmedani <danmedani@gmail.com>
Date: Sun Aug 2 11:40:26 2015 -0700
README typo fix.
Squashed commit of the following:
commit 32bfa7c
Author: Erik Dubbelboer <erik@dubbelboer.com>
Date: Mon Jul 6 21:15:08 2015 +0200
Fixed grammer
Squashed commit of the following:
commit b24f69c
Author: Sisir Koppaka <sisir.koppaka@gmail.com>
Date: Mon Mar 2 22:38:45 2015 -0500
utils/hashtable/rehashing.c: Fix typos
Squashed commit of the following:
commit 4e04082
Author: Erik Dubbelboer <erik@dubbelboer.com>
Date: Mon Mar 23 08:22:21 2015 +0000
Small config file documentation improvements
Squashed commit of the following:
commit acb8773
Author: ctd1500 <ctd1500@gmail.com>
Date: Fri May 8 01:52:48 2015 -0700
Typo and grammar fixes in readme
commit 2eb75b6
Author: ctd1500 <ctd1500@gmail.com>
Date: Fri May 8 01:36:18 2015 -0700
fixed redis.conf comment
Squashed commit of the following:
commit a8249a2
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Fri Dec 11 11:39:52 2015 +0530
Revise correction of typos.
Squashed commit of the following:
commit 3c02028
Author: zhaojun11 <zhaojun11@jd.com>
Date: Wed Jan 17 19:05:28 2018 +0800
Fix typos include two code typos in cluster.c and latency.c
Squashed commit of the following:
commit 9dba47c
Author: q191201771 <191201771@qq.com>
Date: Sat Jan 4 11:31:04 2020 +0800
fix function listCreate comment in adlist.c
Update src/server.c
commit 2c7c2cb536e78dd211b1ac6f7bda00f0f54faaeb
Author: charpty <charpty@gmail.com>
Date: Tue May 1 23:16:59 2018 +0800
server.c typo: modules system dictionary type comment
Signed-off-by: charpty <charpty@gmail.com>
commit a8395323fb63cb59cb3591cb0f0c8edb7c29a680
Author: Itamar Haber <itamar@redislabs.com>
Date: Sun May 6 00:25:18 2018 +0300
Updates test_helper.tcl's help with undocumented options
Specifically:
* Host
* Port
* Client
commit bde6f9ced15755cd6407b4af7d601b030f36d60b
Author: wxisme <850885154@qq.com>
Date: Wed Aug 8 15:19:19 2018 +0800
fix comments in deps files
commit 3172474ba991532ab799ee1873439f3402412331
Author: wxisme <850885154@qq.com>
Date: Wed Aug 8 14:33:49 2018 +0800
fix some comments
commit 01b6f2b6858b5cf2ce4ad5092d2c746e755f53f0
Author: Thor Juhasz <thor@juhasz.pro>
Date: Sun Nov 18 14:37:41 2018 +0100
Minor fixes to comments
Found some parts a little unclear on a first read, which prompted me to have a better look at the file and fix some minor things I noticed.
Fixing minor typos and grammar. There are no changes to configuration options.
These changes are only meant to help the user better understand the explanations to the various configuration options
2020-09-10 06:43:38 -04:00
|
|
|
test {EXPIRE precision is now the millisecond} {
|
2011-11-11 09:11:50 -05:00
|
|
|
# This test is very likely to do a false positive if the
|
|
|
|
# server is under pressure, so if it does not work give it a few more
|
|
|
|
# chances.
|
2023-05-09 07:14:22 -04:00
|
|
|
for {set j 0} {$j < 30} {incr j} {
|
2011-11-11 09:11:50 -05:00
|
|
|
r del x
|
|
|
|
r setex x 1 somevalue
|
2023-05-09 07:14:22 -04:00
|
|
|
after 800
|
2011-11-11 09:11:50 -05:00
|
|
|
set a [r get x]
|
2023-05-09 07:14:22 -04:00
|
|
|
if {$a ne {somevalue}} continue
|
|
|
|
after 300
|
2011-11-11 09:11:50 -05:00
|
|
|
set b [r get x]
|
2023-05-09 07:14:22 -04:00
|
|
|
if {$b eq {}} break
|
2011-11-11 09:11:50 -05:00
|
|
|
}
|
2021-04-13 10:35:10 -04:00
|
|
|
if {$::verbose} {
|
|
|
|
puts "millisecond expire test attempts: $j"
|
|
|
|
}
|
2023-05-09 07:14:22 -04:00
|
|
|
assert_equal $a {somevalue}
|
|
|
|
assert_equal $b {}
|
|
|
|
}
|
2011-11-11 09:11:50 -05:00
|
|
|
|
Fix timing issue in sub-second expires test (#9821)
The `PEXPIRE/PSETEX/PEXPIREAT can set sub-second expires` test is
a very time sensitive test, it used to occasionally fail on MacOS.
It will perform there internal tests in a loop, as long as one
fails, it will try to excute again in the next loop.
oranagra suggested that we can split it into three individual tests,
so that if one fails, we do not need to retry the others. And maybe
it will increase the chances of success dramatically.
Each is executed 500 times, and the number of retries is collected:
```
PSETEX, total: 500, sum: 745, min: 0, max: 13, avg: 1.49
PEXPIRE, total: 500, sum: 575, min: 0, max: 16, avg: 1.15
PEXPIREAT, total: 500, sum: 0, min: 0, max: 0, avg: 0.0
ALL(old_way), total: 500, sum: 8090, min: 0, max: 138, avg: 16.18
```
And we can see the threshold is very low.
Splitting the test also makes the code better to maintain.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-22 01:39:27 -05:00
|
|
|
test "PSETEX can set sub-second expires" {
|
|
|
|
# This test is very likely to do a false positive if the server is
|
|
|
|
# under pressure, so if it does not work give it a few more chances.
|
2021-11-13 00:55:48 -05:00
|
|
|
for {set j 0} {$j < 50} {incr j} {
|
2021-06-09 08:13:24 -04:00
|
|
|
r del x
|
2011-11-11 09:11:50 -05:00
|
|
|
r psetex x 100 somevalue
|
|
|
|
set a [r get x]
|
2022-01-17 03:42:13 -05:00
|
|
|
after 101
|
2011-11-11 09:11:50 -05:00
|
|
|
set b [r get x]
|
|
|
|
|
Fix timing issue in sub-second expires test (#9821)
The `PEXPIRE/PSETEX/PEXPIREAT can set sub-second expires` test is
a very time sensitive test, it used to occasionally fail on MacOS.
It will perform there internal tests in a loop, as long as one
fails, it will try to excute again in the next loop.
oranagra suggested that we can split it into three individual tests,
so that if one fails, we do not need to retry the others. And maybe
it will increase the chances of success dramatically.
Each is executed 500 times, and the number of retries is collected:
```
PSETEX, total: 500, sum: 745, min: 0, max: 13, avg: 1.49
PEXPIRE, total: 500, sum: 575, min: 0, max: 16, avg: 1.15
PEXPIREAT, total: 500, sum: 0, min: 0, max: 0, avg: 0.0
ALL(old_way), total: 500, sum: 8090, min: 0, max: 138, avg: 16.18
```
And we can see the threshold is very low.
Splitting the test also makes the code better to maintain.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-22 01:39:27 -05:00
|
|
|
if {$a eq {somevalue} && $b eq {}} break
|
|
|
|
}
|
|
|
|
if {$::verbose} { puts "PSETEX sub-second expire test attempts: $j" }
|
|
|
|
list $a $b
|
|
|
|
} {somevalue {}}
|
|
|
|
|
|
|
|
test "PEXPIRE can set sub-second expires" {
|
|
|
|
# This test is very likely to do a false positive if the server is
|
|
|
|
# under pressure, so if it does not work give it a few more chances.
|
|
|
|
for {set j 0} {$j < 50} {incr j} {
|
2011-11-11 09:11:50 -05:00
|
|
|
r set x somevalue
|
|
|
|
r pexpire x 100
|
|
|
|
set c [r get x]
|
2022-01-17 03:42:13 -05:00
|
|
|
after 101
|
2011-11-11 09:11:50 -05:00
|
|
|
set d [r get x]
|
|
|
|
|
Fix timing issue in sub-second expires test (#9821)
The `PEXPIRE/PSETEX/PEXPIREAT can set sub-second expires` test is
a very time sensitive test, it used to occasionally fail on MacOS.
It will perform there internal tests in a loop, as long as one
fails, it will try to excute again in the next loop.
oranagra suggested that we can split it into three individual tests,
so that if one fails, we do not need to retry the others. And maybe
it will increase the chances of success dramatically.
Each is executed 500 times, and the number of retries is collected:
```
PSETEX, total: 500, sum: 745, min: 0, max: 13, avg: 1.49
PEXPIRE, total: 500, sum: 575, min: 0, max: 16, avg: 1.15
PEXPIREAT, total: 500, sum: 0, min: 0, max: 0, avg: 0.0
ALL(old_way), total: 500, sum: 8090, min: 0, max: 138, avg: 16.18
```
And we can see the threshold is very low.
Splitting the test also makes the code better to maintain.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-22 01:39:27 -05:00
|
|
|
if {$c eq {somevalue} && $d eq {}} break
|
|
|
|
}
|
|
|
|
if {$::verbose} { puts "PEXPIRE sub-second expire test attempts: $j" }
|
|
|
|
list $c $d
|
|
|
|
} {somevalue {}}
|
|
|
|
|
|
|
|
test "PEXPIREAT can set sub-second expires" {
|
|
|
|
# This test is very likely to do a false positive if the server is
|
|
|
|
# under pressure, so if it does not work give it a few more chances.
|
|
|
|
for {set j 0} {$j < 50} {incr j} {
|
2011-11-11 09:11:50 -05:00
|
|
|
r set x somevalue
|
2020-09-13 06:50:23 -04:00
|
|
|
set now [r time]
|
|
|
|
r pexpireat x [expr ([lindex $now 0]*1000)+([lindex $now 1]/1000)+200]
|
2011-11-11 09:11:50 -05:00
|
|
|
set e [r get x]
|
2022-01-17 03:42:13 -05:00
|
|
|
after 201
|
2011-11-11 09:11:50 -05:00
|
|
|
set f [r get x]
|
|
|
|
|
Fix timing issue in sub-second expires test (#9821)
The `PEXPIRE/PSETEX/PEXPIREAT can set sub-second expires` test is
a very time sensitive test, it used to occasionally fail on MacOS.
It will perform there internal tests in a loop, as long as one
fails, it will try to excute again in the next loop.
oranagra suggested that we can split it into three individual tests,
so that if one fails, we do not need to retry the others. And maybe
it will increase the chances of success dramatically.
Each is executed 500 times, and the number of retries is collected:
```
PSETEX, total: 500, sum: 745, min: 0, max: 13, avg: 1.49
PEXPIRE, total: 500, sum: 575, min: 0, max: 16, avg: 1.15
PEXPIREAT, total: 500, sum: 0, min: 0, max: 0, avg: 0.0
ALL(old_way), total: 500, sum: 8090, min: 0, max: 138, avg: 16.18
```
And we can see the threshold is very low.
Splitting the test also makes the code better to maintain.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-22 01:39:27 -05:00
|
|
|
if {$e eq {somevalue} && $f eq {}} break
|
2020-09-13 06:50:23 -04:00
|
|
|
}
|
Fix timing issue in sub-second expires test (#9821)
The `PEXPIRE/PSETEX/PEXPIREAT can set sub-second expires` test is
a very time sensitive test, it used to occasionally fail on MacOS.
It will perform there internal tests in a loop, as long as one
fails, it will try to excute again in the next loop.
oranagra suggested that we can split it into three individual tests,
so that if one fails, we do not need to retry the others. And maybe
it will increase the chances of success dramatically.
Each is executed 500 times, and the number of retries is collected:
```
PSETEX, total: 500, sum: 745, min: 0, max: 13, avg: 1.49
PEXPIRE, total: 500, sum: 575, min: 0, max: 16, avg: 1.15
PEXPIREAT, total: 500, sum: 0, min: 0, max: 0, avg: 0.0
ALL(old_way), total: 500, sum: 8090, min: 0, max: 138, avg: 16.18
```
And we can see the threshold is very low.
Splitting the test also makes the code better to maintain.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-22 01:39:27 -05:00
|
|
|
if {$::verbose} { puts "PEXPIREAT sub-second expire test attempts: $j" }
|
|
|
|
list $e $f
|
|
|
|
} {somevalue {}}
|
2011-11-11 09:11:50 -05:00
|
|
|
|
2018-06-21 10:08:09 -04:00
|
|
|
test {TTL returns time to live in seconds} {
|
2012-11-12 17:04:36 -05:00
|
|
|
r del x
|
|
|
|
r setex x 10 somevalue
|
|
|
|
set ttl [r ttl x]
|
|
|
|
assert {$ttl > 8 && $ttl <= 10}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {PTTL returns time to live in milliseconds} {
|
2011-11-11 09:11:50 -05:00
|
|
|
r del x
|
|
|
|
r setex x 1 somevalue
|
|
|
|
set ttl [r pttl x]
|
2022-12-22 03:51:43 -05:00
|
|
|
assert {$ttl > 500 && $ttl <= 1000}
|
2011-11-11 09:11:50 -05:00
|
|
|
}
|
2011-11-12 05:27:38 -05:00
|
|
|
|
2021-05-30 02:20:32 -04:00
|
|
|
test {TTL / PTTL / EXPIRETIME / PEXPIRETIME return -1 if key has no expire} {
|
2012-11-12 17:04:36 -05:00
|
|
|
r del x
|
|
|
|
r set x hello
|
2021-05-30 02:20:32 -04:00
|
|
|
list [r ttl x] [r pttl x] [r expiretime x] [r pexpiretime x]
|
|
|
|
} {-1 -1 -1 -1}
|
2012-11-12 17:04:36 -05:00
|
|
|
|
2021-05-30 02:20:32 -04:00
|
|
|
test {TTL / PTTL / EXPIRETIME / PEXPIRETIME return -2 if key does not exit} {
|
2012-11-12 17:04:36 -05:00
|
|
|
r del x
|
2021-05-30 02:20:32 -04:00
|
|
|
list [r ttl x] [r pttl x] [r expiretime x] [r pexpiretime x]
|
|
|
|
} {-2 -2 -2 -2}
|
|
|
|
|
|
|
|
test {EXPIRETIME returns absolute expiration time in seconds} {
|
|
|
|
r del x
|
|
|
|
set abs_expire [expr [clock seconds] + 100]
|
|
|
|
r set x somevalue exat $abs_expire
|
|
|
|
assert_equal [r expiretime x] $abs_expire
|
|
|
|
}
|
|
|
|
|
|
|
|
test {PEXPIRETIME returns absolute expiration time in milliseconds} {
|
|
|
|
r del x
|
|
|
|
set abs_expire [expr [clock milliseconds] + 100000]
|
|
|
|
r set x somevalue pxat $abs_expire
|
|
|
|
assert_equal [r pexpiretime x] $abs_expire
|
|
|
|
}
|
2012-11-12 17:04:36 -05:00
|
|
|
|
2024-03-21 05:56:59 -04:00
|
|
|
test {Redict should actively expire keys incrementally} {
|
2011-11-12 05:27:38 -05:00
|
|
|
r flushdb
|
|
|
|
r psetex key1 500 a
|
|
|
|
r psetex key2 500 a
|
|
|
|
r psetex key3 500 a
|
2021-06-09 08:13:24 -04:00
|
|
|
assert_equal 3 [r dbsize]
|
2024-03-21 05:56:59 -04:00
|
|
|
# Redict expires random keys ten times every second so we are
|
2011-11-12 05:27:38 -05:00
|
|
|
# fairly sure that all the three keys should be evicted after
|
2021-06-09 08:13:24 -04:00
|
|
|
# two seconds.
|
|
|
|
wait_for_condition 20 100 {
|
|
|
|
[r dbsize] eq 0
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
} else {
|
|
|
|
fail "Keys did not actively expire."
|
2021-06-09 08:13:24 -04:00
|
|
|
}
|
|
|
|
}
|
2012-04-30 11:57:12 -04:00
|
|
|
|
2024-03-21 05:56:59 -04:00
|
|
|
test {Redict should lazy expire keys} {
|
2013-03-28 06:36:49 -04:00
|
|
|
r flushdb
|
|
|
|
r debug set-active-expire 0
|
2021-06-09 08:13:24 -04:00
|
|
|
r psetex key1{t} 500 a
|
|
|
|
r psetex key2{t} 500 a
|
|
|
|
r psetex key3{t} 500 a
|
2013-03-28 06:36:49 -04:00
|
|
|
set size1 [r dbsize]
|
2024-03-21 05:56:59 -04:00
|
|
|
# Redict expires random keys ten times every second so we are
|
2013-03-28 06:36:49 -04:00
|
|
|
# fairly sure that all the three keys should be evicted after
|
|
|
|
# one second.
|
|
|
|
after 1000
|
|
|
|
set size2 [r dbsize]
|
2021-06-09 08:13:24 -04:00
|
|
|
r mget key1{t} key2{t} key3{t}
|
2013-03-28 06:36:49 -04:00
|
|
|
set size3 [r dbsize]
|
|
|
|
r debug set-active-expire 1
|
|
|
|
list $size1 $size2 $size3
|
2021-06-09 08:13:24 -04:00
|
|
|
} {3 3 0} {needs:debug}
|
2013-03-28 06:36:49 -04:00
|
|
|
|
2013-03-28 06:46:14 -04:00
|
|
|
test {EXPIRE should not resurrect keys (issue #1026)} {
|
|
|
|
r debug set-active-expire 0
|
|
|
|
r set foo bar
|
|
|
|
r pexpire foo 500
|
|
|
|
after 1000
|
|
|
|
r expire foo 10
|
|
|
|
r debug set-active-expire 1
|
|
|
|
r exists foo
|
2021-06-09 08:13:24 -04:00
|
|
|
} {0} {needs:debug}
|
2013-03-28 06:46:14 -04:00
|
|
|
|
2012-04-30 11:57:12 -04:00
|
|
|
test {5 keys in, 5 keys out} {
|
|
|
|
r flushdb
|
|
|
|
r set a c
|
|
|
|
r expire a 5
|
|
|
|
r set t c
|
|
|
|
r set e c
|
|
|
|
r set s c
|
|
|
|
r set foo b
|
2021-06-13 01:42:20 -04:00
|
|
|
assert_equal [lsort [r keys *]] {a e foo s t}
|
|
|
|
r del a ; # Do not leak volatile keys to other tests
|
|
|
|
}
|
2016-07-06 05:50:13 -04:00
|
|
|
|
|
|
|
test {EXPIRE with empty string as TTL should report an error} {
|
|
|
|
r set foo bar
|
|
|
|
catch {r expire foo ""} e
|
|
|
|
set e
|
|
|
|
} {*not an integer*}
|
2017-06-16 05:51:38 -04:00
|
|
|
|
2021-02-21 02:09:54 -05:00
|
|
|
test {SET with EX with big integer should report an error} {
|
|
|
|
catch {r set foo bar EX 10000000000000000} e
|
|
|
|
set e
|
2022-01-23 03:05:06 -05:00
|
|
|
} {ERR invalid expire time in 'set' command}
|
2021-02-21 02:09:54 -05:00
|
|
|
|
|
|
|
test {SET with EX with smallest integer should report an error} {
|
|
|
|
catch {r SET foo bar EX -9999999999999999} e
|
|
|
|
set e
|
2022-01-23 03:05:06 -05:00
|
|
|
} {ERR invalid expire time in 'set' command}
|
2021-02-21 02:09:54 -05:00
|
|
|
|
|
|
|
test {GETEX with big integer should report an error} {
|
|
|
|
r set foo bar
|
|
|
|
catch {r GETEX foo EX 10000000000000000} e
|
|
|
|
set e
|
2022-01-23 03:05:06 -05:00
|
|
|
} {ERR invalid expire time in 'getex' command}
|
2021-02-21 02:09:54 -05:00
|
|
|
|
|
|
|
test {GETEX with smallest integer should report an error} {
|
|
|
|
r set foo bar
|
|
|
|
catch {r GETEX foo EX -9999999999999999} e
|
|
|
|
set e
|
2022-01-23 03:05:06 -05:00
|
|
|
} {ERR invalid expire time in 'getex' command}
|
2021-02-21 02:09:54 -05:00
|
|
|
|
|
|
|
test {EXPIRE with big integer overflows when converted to milliseconds} {
|
|
|
|
r set foo bar
|
2021-11-24 02:39:23 -05:00
|
|
|
|
|
|
|
# Hit `when > LLONG_MAX - basetime`
|
2022-01-23 03:05:06 -05:00
|
|
|
assert_error "ERR invalid expire time in 'expire' command" {r EXPIRE foo 9223370399119966}
|
2021-11-24 02:39:23 -05:00
|
|
|
|
|
|
|
# Hit `when > LLONG_MAX / 1000`
|
2022-01-23 03:05:06 -05:00
|
|
|
assert_error "ERR invalid expire time in 'expire' command" {r EXPIRE foo 9223372036854776}
|
|
|
|
assert_error "ERR invalid expire time in 'expire' command" {r EXPIRE foo 10000000000000000}
|
|
|
|
assert_error "ERR invalid expire time in 'expire' command" {r EXPIRE foo 18446744073709561}
|
2021-11-24 02:39:23 -05:00
|
|
|
|
|
|
|
assert_equal {-1} [r ttl foo]
|
|
|
|
}
|
2021-02-21 02:09:54 -05:00
|
|
|
|
|
|
|
test {PEXPIRE with big integer overflow when basetime is added} {
|
|
|
|
r set foo bar
|
|
|
|
catch {r PEXPIRE foo 9223372036854770000} e
|
|
|
|
set e
|
2022-01-23 03:05:06 -05:00
|
|
|
} {ERR invalid expire time in 'pexpire' command}
|
2021-02-21 02:09:54 -05:00
|
|
|
|
|
|
|
test {EXPIRE with big negative integer} {
|
|
|
|
r set foo bar
|
2021-11-24 02:39:23 -05:00
|
|
|
|
|
|
|
# Hit `when < LLONG_MIN / 1000`
|
2022-01-23 03:05:06 -05:00
|
|
|
assert_error "ERR invalid expire time in 'expire' command" {r EXPIRE foo -9223372036854776}
|
|
|
|
assert_error "ERR invalid expire time in 'expire' command" {r EXPIRE foo -9999999999999999}
|
2021-11-24 02:39:23 -05:00
|
|
|
|
2021-02-21 02:09:54 -05:00
|
|
|
r ttl foo
|
|
|
|
} {-1}
|
|
|
|
|
|
|
|
test {PEXPIREAT with big integer works} {
|
|
|
|
r set foo bar
|
|
|
|
r PEXPIREAT foo 9223372036854770000
|
|
|
|
} {1}
|
|
|
|
|
|
|
|
test {PEXPIREAT with big negative integer works} {
|
|
|
|
r set foo bar
|
|
|
|
r PEXPIREAT foo -9223372036854770000
|
|
|
|
r ttl foo
|
|
|
|
} {-2}
|
|
|
|
|
2021-05-30 02:20:32 -04:00
|
|
|
# Start a new server with empty data and AOF file.
|
Implement Multi Part AOF mechanism to avoid AOFRW overheads. (#9788)
Implement Multi-Part AOF mechanism to avoid overheads during AOFRW.
Introducing a folder with multiple AOF files tracked by a manifest file.
The main issues with the the original AOFRW mechanism are:
* buffering of commands that are processed during rewrite (consuming a lot of RAM)
* freezes of the main process when the AOFRW completes to drain the remaining part of the buffer and fsync it.
* double disk IO for the data that arrives during AOFRW (had to be written to both the old and new AOF files)
The main modifications of this PR:
1. Remove the AOF rewrite buffer and related code.
2. Divide the AOF into multiple files, they are classified as two types, one is the the `BASE` type,
it represents the full amount of data (Maybe AOF or RDB format) after each AOFRW, there is only
one `BASE` file at most. The second is `INCR` type, may have more than one. They represent the
incremental commands since the last AOFRW.
3. Use a AOF manifest file to record and manage these AOF files mentioned above.
4. The original configuration of `appendfilename` will be the base part of the new file name, for example:
`appendonly.aof.1.base.rdb` and `appendonly.aof.2.incr.aof`
5. Add manifest-related TCL tests, and modified some existing tests that depend on the `appendfilename`
6. Remove the `aof_rewrite_buffer_length` field in info.
7. Add `aof-disable-auto-gc` configuration. By default we're automatically deleting HISTORY type AOFs.
It also gives users the opportunity to preserve the history AOFs. just for testing use now.
8. Add AOFRW limiting measure. When the AOFRW failures reaches the threshold (3 times now),
we will delay the execution of the next AOFRW by 1 minute. If the next AOFRW also fails, it will be
delayed by 2 minutes. The next is 4, 8, 16, the maximum delay is 60 minutes (1 hour). During the limit
period, we can still use the 'bgrewriteaof' command to execute AOFRW immediately.
9. Support upgrade (load) data from old version redis.
10. Add `appenddirname` configuration, as the directory name of the append only files. All AOF files and
manifest file will be placed in this directory.
11. Only the last AOF file (BASE or INCR) can be truncated. Otherwise redis will exit even if
`aof-load-truncated` is enabled.
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-03 12:14:13 -05:00
|
|
|
start_server {overrides {appendonly {yes} appendfsync always} tags {external:skip}} {
|
2021-05-30 02:20:32 -04:00
|
|
|
test {All time-to-live(TTL) in commands are propagated as absolute timestamp in milliseconds in AOF} {
|
|
|
|
# This test makes sure that expire times are propagated as absolute
|
|
|
|
# times to the AOF file and not as relative time, so that when the AOF
|
|
|
|
# is reloaded the TTLs are not being shifted forward to the future.
|
|
|
|
# We want the time to logically pass when the server is restarted!
|
|
|
|
|
Implement Multi Part AOF mechanism to avoid AOFRW overheads. (#9788)
Implement Multi-Part AOF mechanism to avoid overheads during AOFRW.
Introducing a folder with multiple AOF files tracked by a manifest file.
The main issues with the the original AOFRW mechanism are:
* buffering of commands that are processed during rewrite (consuming a lot of RAM)
* freezes of the main process when the AOFRW completes to drain the remaining part of the buffer and fsync it.
* double disk IO for the data that arrives during AOFRW (had to be written to both the old and new AOF files)
The main modifications of this PR:
1. Remove the AOF rewrite buffer and related code.
2. Divide the AOF into multiple files, they are classified as two types, one is the the `BASE` type,
it represents the full amount of data (Maybe AOF or RDB format) after each AOFRW, there is only
one `BASE` file at most. The second is `INCR` type, may have more than one. They represent the
incremental commands since the last AOFRW.
3. Use a AOF manifest file to record and manage these AOF files mentioned above.
4. The original configuration of `appendfilename` will be the base part of the new file name, for example:
`appendonly.aof.1.base.rdb` and `appendonly.aof.2.incr.aof`
5. Add manifest-related TCL tests, and modified some existing tests that depend on the `appendfilename`
6. Remove the `aof_rewrite_buffer_length` field in info.
7. Add `aof-disable-auto-gc` configuration. By default we're automatically deleting HISTORY type AOFs.
It also gives users the opportunity to preserve the history AOFs. just for testing use now.
8. Add AOFRW limiting measure. When the AOFRW failures reaches the threshold (3 times now),
we will delay the execution of the next AOFRW by 1 minute. If the next AOFRW also fails, it will be
delayed by 2 minutes. The next is 4, 8, 16, the maximum delay is 60 minutes (1 hour). During the limit
period, we can still use the 'bgrewriteaof' command to execute AOFRW immediately.
9. Support upgrade (load) data from old version redis.
10. Add `appenddirname` configuration, as the directory name of the append only files. All AOF files and
manifest file will be placed in this directory.
11. Only the last AOF file (BASE or INCR) can be truncated. Otherwise redis will exit even if
`aof-load-truncated` is enabled.
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-03 12:14:13 -05:00
|
|
|
set aof [get_last_incr_aof_path r]
|
2021-05-30 02:20:32 -04:00
|
|
|
|
|
|
|
# Apply each TTL-related command to a unique key
|
|
|
|
# SET commands
|
|
|
|
r set foo1 bar ex 100
|
|
|
|
r set foo2 bar px 100000
|
|
|
|
r set foo3 bar exat [expr [clock seconds]+100]
|
2023-06-15 03:07:47 -04:00
|
|
|
r set foo4 bar PXAT [expr [clock milliseconds]+100000]
|
2021-05-30 02:20:32 -04:00
|
|
|
r setex foo5 100 bar
|
|
|
|
r psetex foo6 100000 bar
|
|
|
|
# EXPIRE-family commands
|
|
|
|
r set foo7 bar
|
|
|
|
r expire foo7 100
|
|
|
|
r set foo8 bar
|
|
|
|
r pexpire foo8 100000
|
|
|
|
r set foo9 bar
|
|
|
|
r expireat foo9 [expr [clock seconds]+100]
|
|
|
|
r set foo10 bar
|
|
|
|
r pexpireat foo10 [expr [clock seconds]*1000+100000]
|
|
|
|
r set foo11 bar
|
|
|
|
r expireat foo11 [expr [clock seconds]-100]
|
|
|
|
# GETEX commands
|
|
|
|
r set foo12 bar
|
|
|
|
r getex foo12 ex 100
|
|
|
|
r set foo13 bar
|
|
|
|
r getex foo13 px 100000
|
|
|
|
r set foo14 bar
|
|
|
|
r getex foo14 exat [expr [clock seconds]+100]
|
|
|
|
r set foo15 bar
|
|
|
|
r getex foo15 pxat [expr [clock milliseconds]+100000]
|
|
|
|
# RESTORE commands
|
|
|
|
r set foo16 bar
|
|
|
|
set encoded [r dump foo16]
|
|
|
|
r restore foo17 100000 $encoded
|
|
|
|
r restore foo18 [expr [clock milliseconds]+100000] $encoded absttl
|
|
|
|
|
2023-09-08 09:10:17 -04:00
|
|
|
# Assert that each TTL-related command are persisted with absolute timestamps in AOF
|
2021-05-30 02:20:32 -04:00
|
|
|
assert_aof_content $aof {
|
|
|
|
{select *}
|
|
|
|
{set foo1 bar PXAT *}
|
|
|
|
{set foo2 bar PXAT *}
|
|
|
|
{set foo3 bar PXAT *}
|
|
|
|
{set foo4 bar PXAT *}
|
|
|
|
{set foo5 bar PXAT *}
|
|
|
|
{set foo6 bar PXAT *}
|
|
|
|
{set foo7 bar}
|
|
|
|
{pexpireat foo7 *}
|
|
|
|
{set foo8 bar}
|
|
|
|
{pexpireat foo8 *}
|
|
|
|
{set foo9 bar}
|
|
|
|
{pexpireat foo9 *}
|
|
|
|
{set foo10 bar}
|
|
|
|
{pexpireat foo10 *}
|
|
|
|
{set foo11 bar}
|
|
|
|
{del foo11}
|
|
|
|
{set foo12 bar}
|
|
|
|
{pexpireat foo12 *}
|
|
|
|
{set foo13 bar}
|
|
|
|
{pexpireat foo13 *}
|
|
|
|
{set foo14 bar}
|
|
|
|
{pexpireat foo14 *}
|
|
|
|
{set foo15 bar}
|
|
|
|
{pexpireat foo15 *}
|
|
|
|
{set foo16 bar}
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
{restore foo17 * * ABSTTL}
|
|
|
|
{restore foo18 * * absttl}
|
2021-05-30 02:20:32 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
# Remember the absolute TTLs of all the keys
|
|
|
|
set ttl1 [r pexpiretime foo1]
|
|
|
|
set ttl2 [r pexpiretime foo2]
|
|
|
|
set ttl3 [r pexpiretime foo3]
|
|
|
|
set ttl4 [r pexpiretime foo4]
|
|
|
|
set ttl5 [r pexpiretime foo5]
|
|
|
|
set ttl6 [r pexpiretime foo6]
|
|
|
|
set ttl7 [r pexpiretime foo7]
|
|
|
|
set ttl8 [r pexpiretime foo8]
|
|
|
|
set ttl9 [r pexpiretime foo9]
|
|
|
|
set ttl10 [r pexpiretime foo10]
|
|
|
|
assert_equal "-2" [r pexpiretime foo11] ; # foo11 is gone
|
|
|
|
set ttl12 [r pexpiretime foo12]
|
|
|
|
set ttl13 [r pexpiretime foo13]
|
|
|
|
set ttl14 [r pexpiretime foo14]
|
|
|
|
set ttl15 [r pexpiretime foo15]
|
|
|
|
assert_equal "-1" [r pexpiretime foo16] ; # foo16 has no TTL
|
|
|
|
set ttl17 [r pexpiretime foo17]
|
|
|
|
set ttl18 [r pexpiretime foo18]
|
|
|
|
|
|
|
|
# Let some time pass and reload data from AOF
|
|
|
|
after 2000
|
|
|
|
r debug loadaof
|
|
|
|
|
|
|
|
# Assert that relative TTLs are roughly the same
|
|
|
|
assert_range [r ttl foo1] 90 98
|
|
|
|
assert_range [r ttl foo2] 90 98
|
|
|
|
assert_range [r ttl foo3] 90 98
|
|
|
|
assert_range [r ttl foo4] 90 98
|
|
|
|
assert_range [r ttl foo5] 90 98
|
|
|
|
assert_range [r ttl foo6] 90 98
|
|
|
|
assert_range [r ttl foo7] 90 98
|
|
|
|
assert_range [r ttl foo8] 90 98
|
|
|
|
assert_range [r ttl foo9] 90 98
|
|
|
|
assert_range [r ttl foo10] 90 98
|
|
|
|
assert_equal [r ttl foo11] "-2" ; # foo11 is gone
|
|
|
|
assert_range [r ttl foo12] 90 98
|
|
|
|
assert_range [r ttl foo13] 90 98
|
|
|
|
assert_range [r ttl foo14] 90 98
|
|
|
|
assert_range [r ttl foo15] 90 98
|
|
|
|
assert_equal [r ttl foo16] "-1" ; # foo16 has no TTL
|
|
|
|
assert_range [r ttl foo17] 90 98
|
|
|
|
assert_range [r ttl foo18] 90 98
|
|
|
|
|
|
|
|
# Assert that all keys have restored the same absolute TTLs from AOF
|
|
|
|
assert_equal [r pexpiretime foo1] $ttl1
|
|
|
|
assert_equal [r pexpiretime foo2] $ttl2
|
|
|
|
assert_equal [r pexpiretime foo3] $ttl3
|
|
|
|
assert_equal [r pexpiretime foo4] $ttl4
|
|
|
|
assert_equal [r pexpiretime foo5] $ttl5
|
|
|
|
assert_equal [r pexpiretime foo6] $ttl6
|
|
|
|
assert_equal [r pexpiretime foo7] $ttl7
|
|
|
|
assert_equal [r pexpiretime foo8] $ttl8
|
|
|
|
assert_equal [r pexpiretime foo9] $ttl9
|
|
|
|
assert_equal [r pexpiretime foo10] $ttl10
|
|
|
|
assert_equal [r pexpiretime foo11] "-2" ; # foo11 is gone
|
|
|
|
assert_equal [r pexpiretime foo12] $ttl12
|
|
|
|
assert_equal [r pexpiretime foo13] $ttl13
|
|
|
|
assert_equal [r pexpiretime foo14] $ttl14
|
|
|
|
assert_equal [r pexpiretime foo15] $ttl15
|
|
|
|
assert_equal [r pexpiretime foo16] "-1" ; # foo16 has no TTL
|
|
|
|
assert_equal [r pexpiretime foo17] $ttl17
|
|
|
|
assert_equal [r pexpiretime foo18] $ttl18
|
2021-06-09 08:13:24 -04:00
|
|
|
} {} {needs:debug}
|
2021-01-19 11:49:26 -05:00
|
|
|
}
|
2017-06-16 05:51:38 -04:00
|
|
|
|
2021-05-30 02:20:32 -04:00
|
|
|
test {All TTL in commands are propagated as absolute timestamp in replication stream} {
|
|
|
|
# Make sure that both relative and absolute expire commands are propagated
|
|
|
|
# as absolute to replicas for two reasons:
|
|
|
|
# 1) We want to avoid replicas retaining data much longer than primary due
|
|
|
|
# to replication lag.
|
|
|
|
# 2) We want to unify the way TTLs are replicated in both RDB and replication
|
|
|
|
# stream, which is as absolute timestamps.
|
2024-03-21 05:56:59 -04:00
|
|
|
# See: https://github.com/redict/redict/issues/8433
|
2021-01-19 11:49:26 -05:00
|
|
|
|
2021-06-13 01:42:20 -04:00
|
|
|
r flushall ; # Clean up keyspace to avoid interference by keys from other tests
|
2021-01-19 11:49:26 -05:00
|
|
|
set repl [attach_to_replication_stream]
|
2021-05-30 02:20:32 -04:00
|
|
|
# SET commands
|
2021-01-27 12:47:26 -05:00
|
|
|
r set foo1 bar ex 200
|
2021-01-19 11:49:26 -05:00
|
|
|
r set foo1 bar px 100000
|
2021-01-27 12:47:26 -05:00
|
|
|
r set foo1 bar exat [expr [clock seconds]+100]
|
2021-06-01 01:01:10 -04:00
|
|
|
r set foo1 bar pxat [expr [clock milliseconds]+100000]
|
2021-01-19 11:49:26 -05:00
|
|
|
r setex foo1 100 bar
|
|
|
|
r psetex foo1 100000 bar
|
|
|
|
r set foo2 bar
|
2021-05-30 02:20:32 -04:00
|
|
|
# EXPIRE-family commands
|
2021-01-19 11:49:26 -05:00
|
|
|
r expire foo2 100
|
|
|
|
r pexpire foo2 100000
|
|
|
|
r set foo3 bar
|
|
|
|
r expireat foo3 [expr [clock seconds]+100]
|
|
|
|
r pexpireat foo3 [expr [clock seconds]*1000+100000]
|
|
|
|
r expireat foo3 [expr [clock seconds]-100]
|
2021-05-30 02:20:32 -04:00
|
|
|
# GETEX-family commands
|
2021-01-27 12:47:26 -05:00
|
|
|
r set foo4 bar
|
|
|
|
r getex foo4 ex 200
|
|
|
|
r getex foo4 px 200000
|
|
|
|
r getex foo4 exat [expr [clock seconds]+100]
|
2021-06-01 01:01:10 -04:00
|
|
|
r getex foo4 pxat [expr [clock milliseconds]+100000]
|
2021-05-30 02:20:32 -04:00
|
|
|
# RESTORE commands
|
|
|
|
r set foo5 bar
|
|
|
|
set encoded [r dump foo5]
|
|
|
|
r restore foo6 100000 $encoded
|
|
|
|
r restore foo7 [expr [clock milliseconds]+100000] $encoded absttl
|
|
|
|
|
2021-01-19 11:49:26 -05:00
|
|
|
assert_replication_stream $repl {
|
|
|
|
{select *}
|
2021-01-27 12:47:26 -05:00
|
|
|
{set foo1 bar PXAT *}
|
|
|
|
{set foo1 bar PXAT *}
|
2021-05-30 02:20:32 -04:00
|
|
|
{set foo1 bar PXAT *}
|
2023-06-15 03:07:47 -04:00
|
|
|
{set foo1 bar pxat *}
|
2021-05-30 02:20:32 -04:00
|
|
|
{set foo1 bar PXAT *}
|
|
|
|
{set foo1 bar PXAT *}
|
2021-01-19 11:49:26 -05:00
|
|
|
{set foo2 bar}
|
2021-05-30 02:20:32 -04:00
|
|
|
{pexpireat foo2 *}
|
|
|
|
{pexpireat foo2 *}
|
2021-01-19 11:49:26 -05:00
|
|
|
{set foo3 bar}
|
2021-05-30 02:20:32 -04:00
|
|
|
{pexpireat foo3 *}
|
2021-01-19 11:49:26 -05:00
|
|
|
{pexpireat foo3 *}
|
|
|
|
{del foo3}
|
2021-01-27 12:47:26 -05:00
|
|
|
{set foo4 bar}
|
|
|
|
{pexpireat foo4 *}
|
|
|
|
{pexpireat foo4 *}
|
2021-05-30 02:20:32 -04:00
|
|
|
{pexpireat foo4 *}
|
|
|
|
{pexpireat foo4 *}
|
|
|
|
{set foo5 bar}
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
{restore foo6 * * ABSTTL}
|
|
|
|
{restore foo7 * * absttl}
|
2021-05-30 02:20:32 -04:00
|
|
|
}
|
2021-12-19 10:41:51 -05:00
|
|
|
close_replication_stream $repl
|
2021-06-29 09:48:52 -04:00
|
|
|
} {} {needs:repl}
|
2021-05-30 02:20:32 -04:00
|
|
|
|
|
|
|
# Start another server to test replication of TTLs
|
2021-06-09 08:13:24 -04:00
|
|
|
start_server {tags {needs:repl external:skip}} {
|
2021-05-30 02:20:32 -04:00
|
|
|
# Set the outer layer server as primary
|
|
|
|
set primary [srv -1 client]
|
|
|
|
set primary_host [srv -1 host]
|
|
|
|
set primary_port [srv -1 port]
|
|
|
|
# Set this inner layer server as replica
|
|
|
|
set replica [srv 0 client]
|
|
|
|
|
|
|
|
test {First server should have role slave after REPLICAOF} {
|
|
|
|
$replica replicaof $primary_host $primary_port
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s 0 role] eq {slave}
|
|
|
|
} else {
|
|
|
|
fail "Replication not started."
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {For all replicated TTL-related commands, absolute expire times are identical on primary and replica} {
|
|
|
|
# Apply each TTL-related command to a unique key on primary
|
|
|
|
# SET commands
|
|
|
|
$primary set foo1 bar ex 100
|
|
|
|
$primary set foo2 bar px 100000
|
|
|
|
$primary set foo3 bar exat [expr [clock seconds]+100]
|
|
|
|
$primary set foo4 bar pxat [expr [clock milliseconds]+100000]
|
|
|
|
$primary setex foo5 100 bar
|
|
|
|
$primary psetex foo6 100000 bar
|
|
|
|
# EXPIRE-family commands
|
|
|
|
$primary set foo7 bar
|
|
|
|
$primary expire foo7 100
|
|
|
|
$primary set foo8 bar
|
|
|
|
$primary pexpire foo8 100000
|
|
|
|
$primary set foo9 bar
|
|
|
|
$primary expireat foo9 [expr [clock seconds]+100]
|
|
|
|
$primary set foo10 bar
|
|
|
|
$primary pexpireat foo10 [expr [clock milliseconds]+100000]
|
|
|
|
# GETEX commands
|
|
|
|
$primary set foo11 bar
|
|
|
|
$primary getex foo11 ex 100
|
|
|
|
$primary set foo12 bar
|
|
|
|
$primary getex foo12 px 100000
|
|
|
|
$primary set foo13 bar
|
|
|
|
$primary getex foo13 exat [expr [clock seconds]+100]
|
|
|
|
$primary set foo14 bar
|
|
|
|
$primary getex foo14 pxat [expr [clock milliseconds]+100000]
|
|
|
|
# RESTORE commands
|
|
|
|
$primary set foo15 bar
|
|
|
|
set encoded [$primary dump foo15]
|
|
|
|
$primary restore foo16 100000 $encoded
|
|
|
|
$primary restore foo17 [expr [clock milliseconds]+100000] $encoded absttl
|
|
|
|
|
|
|
|
# Wait for replica to get the keys and TTLs
|
|
|
|
assert {[$primary wait 1 0] == 1}
|
|
|
|
|
|
|
|
# Verify absolute TTLs are identical on primary and replica for all keys
|
|
|
|
# This is because TTLs are always replicated as absolute values
|
|
|
|
foreach key [$primary keys *] {
|
|
|
|
assert_equal [$primary pexpiretime $key] [$replica pexpiretime $key]
|
|
|
|
}
|
2021-01-19 11:49:26 -05:00
|
|
|
}
|
2023-02-20 03:23:25 -05:00
|
|
|
|
|
|
|
test {expired key which is created in writeable replicas should be deleted by active expiry} {
|
|
|
|
$primary flushall
|
|
|
|
$replica config set replica-read-only no
|
|
|
|
foreach {yes_or_no} {yes no} {
|
|
|
|
$replica config set appendonly $yes_or_no
|
|
|
|
waitForBgrewriteaof $replica
|
|
|
|
set prev_expired [s expired_keys]
|
|
|
|
$replica set foo bar PX 1
|
|
|
|
wait_for_condition 100 10 {
|
|
|
|
[s expired_keys] eq $prev_expired + 1
|
|
|
|
} else {
|
|
|
|
fail "key not expired"
|
|
|
|
}
|
|
|
|
assert_equal {} [$replica get foo]
|
|
|
|
}
|
|
|
|
}
|
2017-06-16 05:51:38 -04:00
|
|
|
}
|
2019-12-18 01:49:38 -05:00
|
|
|
|
|
|
|
test {SET command will remove expire} {
|
|
|
|
r set foo bar EX 100
|
|
|
|
r set foo bar
|
|
|
|
r ttl foo
|
|
|
|
} {-1}
|
|
|
|
|
|
|
|
test {SET - use KEEPTTL option, TTL should not be removed} {
|
|
|
|
r set foo bar EX 100
|
|
|
|
r set foo bar KEEPTTL
|
|
|
|
set ttl [r ttl foo]
|
|
|
|
assert {$ttl <= 100 && $ttl > 90}
|
|
|
|
}
|
2020-06-03 05:55:18 -04:00
|
|
|
|
|
|
|
test {SET - use KEEPTTL option, TTL should not be removed after loadaof} {
|
|
|
|
r config set appendonly yes
|
|
|
|
r set foo bar EX 100
|
|
|
|
r set foo bar2 KEEPTTL
|
|
|
|
after 2000
|
|
|
|
r debug loadaof
|
|
|
|
set ttl [r ttl foo]
|
|
|
|
assert {$ttl <= 98 && $ttl > 90}
|
2021-06-09 08:13:24 -04:00
|
|
|
} {} {needs:debug}
|
2021-01-27 12:47:26 -05:00
|
|
|
|
|
|
|
test {GETEX use of PERSIST option should remove TTL} {
|
|
|
|
r set foo bar EX 100
|
|
|
|
r getex foo PERSIST
|
|
|
|
r ttl foo
|
|
|
|
} {-1}
|
|
|
|
|
|
|
|
test {GETEX use of PERSIST option should remove TTL after loadaof} {
|
2023-02-09 00:57:19 -05:00
|
|
|
r config set appendonly yes
|
2021-01-27 12:47:26 -05:00
|
|
|
r set foo bar EX 100
|
|
|
|
r getex foo PERSIST
|
|
|
|
r debug loadaof
|
|
|
|
r ttl foo
|
2021-06-09 08:13:24 -04:00
|
|
|
} {-1} {needs:debug}
|
2021-01-27 12:47:26 -05:00
|
|
|
|
|
|
|
test {GETEX propagate as to replica as PERSIST, DEL, or nothing} {
|
2023-03-08 15:39:54 -05:00
|
|
|
# In the above tests, many keys with random expiration times are set, flush
|
|
|
|
# the DBs to avoid active expiry kicking in and messing the replication streams.
|
|
|
|
r flushall
|
2021-01-27 12:47:26 -05:00
|
|
|
set repl [attach_to_replication_stream]
|
|
|
|
r set foo bar EX 100
|
|
|
|
r getex foo PERSIST
|
|
|
|
r getex foo
|
|
|
|
r getex foo exat [expr [clock seconds]-100]
|
|
|
|
assert_replication_stream $repl {
|
|
|
|
{select *}
|
2021-05-30 02:20:32 -04:00
|
|
|
{set foo bar PXAT *}
|
2021-01-27 12:47:26 -05:00
|
|
|
{persist foo}
|
|
|
|
{del foo}
|
|
|
|
}
|
2021-12-19 10:41:51 -05:00
|
|
|
close_replication_stream $repl
|
2021-06-09 08:13:24 -04:00
|
|
|
} {} {needs:repl}
|
2021-08-02 01:57:49 -04:00
|
|
|
|
|
|
|
test {EXPIRE with NX option on a key with ttl} {
|
|
|
|
r SET foo bar EX 100
|
|
|
|
assert_equal [r EXPIRE foo 200 NX] 0
|
|
|
|
assert_range [r TTL foo] 50 100
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with NX option on a key without ttl} {
|
|
|
|
r SET foo bar
|
|
|
|
assert_equal [r EXPIRE foo 200 NX] 1
|
|
|
|
assert_range [r TTL foo] 100 200
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with XX option on a key with ttl} {
|
|
|
|
r SET foo bar EX 100
|
|
|
|
assert_equal [r EXPIRE foo 200 XX] 1
|
|
|
|
assert_range [r TTL foo] 100 200
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with XX option on a key without ttl} {
|
|
|
|
r SET foo bar
|
|
|
|
assert_equal [r EXPIRE foo 200 XX] 0
|
|
|
|
assert_equal [r TTL foo] -1
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with GT option on a key with lower ttl} {
|
|
|
|
r SET foo bar EX 100
|
|
|
|
assert_equal [r EXPIRE foo 200 GT] 1
|
|
|
|
assert_range [r TTL foo] 100 200
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with GT option on a key with higher ttl} {
|
|
|
|
r SET foo bar EX 200
|
|
|
|
assert_equal [r EXPIRE foo 100 GT] 0
|
|
|
|
assert_range [r TTL foo] 100 200
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with GT option on a key without ttl} {
|
|
|
|
r SET foo bar
|
|
|
|
assert_equal [r EXPIRE foo 200 GT] 0
|
|
|
|
assert_equal [r TTL foo] -1
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with LT option on a key with higher ttl} {
|
|
|
|
r SET foo bar EX 100
|
|
|
|
assert_equal [r EXPIRE foo 200 LT] 0
|
|
|
|
assert_range [r TTL foo] 50 100
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with LT option on a key with lower ttl} {
|
|
|
|
r SET foo bar EX 200
|
|
|
|
assert_equal [r EXPIRE foo 100 LT] 1
|
|
|
|
assert_range [r TTL foo] 50 100
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with LT option on a key without ttl} {
|
|
|
|
r SET foo bar
|
|
|
|
assert_equal [r EXPIRE foo 100 LT] 1
|
|
|
|
assert_range [r TTL foo] 50 100
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with LT and XX option on a key with ttl} {
|
|
|
|
r SET foo bar EX 200
|
|
|
|
assert_equal [r EXPIRE foo 100 LT XX] 1
|
|
|
|
assert_range [r TTL foo] 50 100
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with LT and XX option on a key without ttl} {
|
|
|
|
r SET foo bar
|
|
|
|
assert_equal [r EXPIRE foo 200 LT XX] 0
|
|
|
|
assert_equal [r TTL foo] -1
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with conflicting options: LT GT} {
|
|
|
|
catch {r EXPIRE foo 200 LT GT} e
|
|
|
|
set e
|
|
|
|
} {ERR GT and LT options at the same time are not compatible}
|
|
|
|
|
|
|
|
test {EXPIRE with conflicting options: NX GT} {
|
|
|
|
catch {r EXPIRE foo 200 NX GT} e
|
|
|
|
set e
|
|
|
|
} {ERR NX and XX, GT or LT options at the same time are not compatible}
|
|
|
|
|
|
|
|
test {EXPIRE with conflicting options: NX LT} {
|
|
|
|
catch {r EXPIRE foo 200 NX LT} e
|
|
|
|
set e
|
|
|
|
} {ERR NX and XX, GT or LT options at the same time are not compatible}
|
|
|
|
|
|
|
|
test {EXPIRE with conflicting options: NX XX} {
|
|
|
|
catch {r EXPIRE foo 200 NX XX} e
|
|
|
|
set e
|
|
|
|
} {ERR NX and XX, GT or LT options at the same time are not compatible}
|
|
|
|
|
|
|
|
test {EXPIRE with unsupported options} {
|
|
|
|
catch {r EXPIRE foo 200 AB} e
|
|
|
|
set e
|
|
|
|
} {ERR Unsupported option AB}
|
|
|
|
|
|
|
|
test {EXPIRE with unsupported options} {
|
|
|
|
catch {r EXPIRE foo 200 XX AB} e
|
|
|
|
set e
|
|
|
|
} {ERR Unsupported option AB}
|
|
|
|
|
|
|
|
test {EXPIRE with negative expiry} {
|
|
|
|
r SET foo bar EX 100
|
|
|
|
assert_equal [r EXPIRE foo -10 LT] 1
|
|
|
|
assert_equal [r TTL foo] -2
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with negative expiry on a non-valitale key} {
|
|
|
|
r SET foo bar
|
|
|
|
assert_equal [r EXPIRE foo -10 LT] 1
|
|
|
|
assert_equal [r TTL foo] -2
|
|
|
|
} {}
|
|
|
|
|
|
|
|
test {EXPIRE with non-existed key} {
|
|
|
|
assert_equal [r EXPIRE none 100 NX] 0
|
|
|
|
assert_equal [r EXPIRE none 100 XX] 0
|
|
|
|
assert_equal [r EXPIRE none 100 GT] 0
|
|
|
|
assert_equal [r EXPIRE none 100 LT] 0
|
|
|
|
} {}
|
Fix replication inconsistency on modules that uses key space notifications (#10969)
Fix replication inconsistency on modules that uses key space notifications.
### The Problem
In general, key space notifications are invoked after the command logic was
executed (this is not always the case, we will discuss later about specific
command that do not follow this rules). For example, the `set x 1` will trigger
a `set` notification that will be invoked after the `set` logic was performed, so
if the notification logic will try to fetch `x`, it will see the new data that was written.
Consider the scenario on which the notification logic performs some write
commands. for example, the notification logic increase some counter,
`incr x{counter}`, indicating how many times `x` was changed.
The logical order by which the logic was executed is has follow:
```
set x 1
incr x{counter}
```
The issue is that the `set x 1` command is added to the replication buffer
at the end of the command invocation (specifically after the key space
notification logic was invoked and performed the `incr` command).
The replication/aof sees the commands in the wrong order:
```
incr x{counter}
set x 1
```
In this specific example the order is less important.
But if, for example, the notification would have deleted `x` then we would
end up with primary-replica inconsistency.
### The Solution
Put the command that cause the notification in its rightful place. In the
above example, the `set x 1` command logic was executed before the
notification logic, so it should be added to the replication buffer before
the commands that is invoked by the notification logic. To achieve this,
without a major code refactoring, we save a placeholder in the replication
buffer, when finishing invoking the command logic we check if the command
need to be replicated, and if it does, we use the placeholder to add it to the
replication buffer instead of appending it to the end.
To be efficient and not allocating memory on each command to save the
placeholder, the replication buffer array was modified to reuse memory
(instead of allocating it each time we want to replicate commands).
Also, to avoid saving a placeholder when not needed, we do it only for
WRITE or MAY_REPLICATE commands.
#### Additional Fixes
* Expire and Eviction notifications:
* Expire/Eviction logical order was to first perform the Expire/Eviction
and then the notification logic. The replication buffer got this in the
other way around (first notification effect and then the `del` command).
The PR fixes this issue.
* The notification effect and the `del` command was not wrap with
`multi-exec` (if needed). The PR also fix this issue.
* SPOP command:
* On spop, the `spop` notification was fired before the command logic
was executed. The change in this PR would have cause the replication
order to be change (first `spop` command and then notification `logic`)
although the logical order is first the notification logic and then the
`spop` logic. The right fix would have been to move the notification to
be fired after the command was executed (like all the other commands),
but this can be considered a breaking change. To overcome this, the PR
keeps the current behavior and changes the `spop` code to keep the right
logical order when pushing commands to the replication buffer. Another PR
will follow to fix the SPOP properly and match it to the other command (we
split it to 2 separate PR's so it will be easy to cherry-pick this PR to 7.0 if
we chose to).
#### Unhanded Known Limitations
* key miss event:
* On key miss event, if a module performed some write command on the
event (using `RM_Call`), the `dirty` counter would increase and the read
command that cause the key miss event would be replicated to the replication
and aof. This problem can also happened on a write command that open
some keys but eventually decides not to perform any action. We decided
not to handle this problem on this PR because the solution is complex
and will cause additional risks in case we will want to cherry-pick this PR.
We should decide if we want to handle it in future PR's. For now, modules
writers is advice not to perform any write commands on key miss event.
#### Testing
* We already have tests to cover cases where a notification is invoking write
commands that are also added to the replication buffer, the tests was modified
to verify that the replica gets the command in the correct logical order.
* Test was added to verify that `spop` behavior was kept unchanged.
* Test was added to verify key miss event behave as expected.
* Test was added to verify the changes do not break lazy expiration.
#### Additional Changes
* `propagateNow` function can accept a special dbid, -1, indicating not
to replicate `select`. We use this to replicate `multi/exec` on `propagatePendingCommands`
function. The side effect of this change is that now the `select` command
will appear inside the `multi/exec` block on the replication stream (instead of
outside of the `multi/exec` block). Tests was modified to match this new behavior.
2022-08-18 03:16:32 -04:00
|
|
|
|
2024-03-21 05:56:59 -04:00
|
|
|
test {Redict should not propagate the read command on lazy expire} {
|
Fix replication inconsistency on modules that uses key space notifications (#10969)
Fix replication inconsistency on modules that uses key space notifications.
### The Problem
In general, key space notifications are invoked after the command logic was
executed (this is not always the case, we will discuss later about specific
command that do not follow this rules). For example, the `set x 1` will trigger
a `set` notification that will be invoked after the `set` logic was performed, so
if the notification logic will try to fetch `x`, it will see the new data that was written.
Consider the scenario on which the notification logic performs some write
commands. for example, the notification logic increase some counter,
`incr x{counter}`, indicating how many times `x` was changed.
The logical order by which the logic was executed is has follow:
```
set x 1
incr x{counter}
```
The issue is that the `set x 1` command is added to the replication buffer
at the end of the command invocation (specifically after the key space
notification logic was invoked and performed the `incr` command).
The replication/aof sees the commands in the wrong order:
```
incr x{counter}
set x 1
```
In this specific example the order is less important.
But if, for example, the notification would have deleted `x` then we would
end up with primary-replica inconsistency.
### The Solution
Put the command that cause the notification in its rightful place. In the
above example, the `set x 1` command logic was executed before the
notification logic, so it should be added to the replication buffer before
the commands that is invoked by the notification logic. To achieve this,
without a major code refactoring, we save a placeholder in the replication
buffer, when finishing invoking the command logic we check if the command
need to be replicated, and if it does, we use the placeholder to add it to the
replication buffer instead of appending it to the end.
To be efficient and not allocating memory on each command to save the
placeholder, the replication buffer array was modified to reuse memory
(instead of allocating it each time we want to replicate commands).
Also, to avoid saving a placeholder when not needed, we do it only for
WRITE or MAY_REPLICATE commands.
#### Additional Fixes
* Expire and Eviction notifications:
* Expire/Eviction logical order was to first perform the Expire/Eviction
and then the notification logic. The replication buffer got this in the
other way around (first notification effect and then the `del` command).
The PR fixes this issue.
* The notification effect and the `del` command was not wrap with
`multi-exec` (if needed). The PR also fix this issue.
* SPOP command:
* On spop, the `spop` notification was fired before the command logic
was executed. The change in this PR would have cause the replication
order to be change (first `spop` command and then notification `logic`)
although the logical order is first the notification logic and then the
`spop` logic. The right fix would have been to move the notification to
be fired after the command was executed (like all the other commands),
but this can be considered a breaking change. To overcome this, the PR
keeps the current behavior and changes the `spop` code to keep the right
logical order when pushing commands to the replication buffer. Another PR
will follow to fix the SPOP properly and match it to the other command (we
split it to 2 separate PR's so it will be easy to cherry-pick this PR to 7.0 if
we chose to).
#### Unhanded Known Limitations
* key miss event:
* On key miss event, if a module performed some write command on the
event (using `RM_Call`), the `dirty` counter would increase and the read
command that cause the key miss event would be replicated to the replication
and aof. This problem can also happened on a write command that open
some keys but eventually decides not to perform any action. We decided
not to handle this problem on this PR because the solution is complex
and will cause additional risks in case we will want to cherry-pick this PR.
We should decide if we want to handle it in future PR's. For now, modules
writers is advice not to perform any write commands on key miss event.
#### Testing
* We already have tests to cover cases where a notification is invoking write
commands that are also added to the replication buffer, the tests was modified
to verify that the replica gets the command in the correct logical order.
* Test was added to verify that `spop` behavior was kept unchanged.
* Test was added to verify key miss event behave as expected.
* Test was added to verify the changes do not break lazy expiration.
#### Additional Changes
* `propagateNow` function can accept a special dbid, -1, indicating not
to replicate `select`. We use this to replicate `multi/exec` on `propagatePendingCommands`
function. The side effect of this change is that now the `select` command
will appear inside the `multi/exec` block on the replication stream (instead of
outside of the `multi/exec` block). Tests was modified to match this new behavior.
2022-08-18 03:16:32 -04:00
|
|
|
r debug set-active-expire 0
|
|
|
|
r flushall ; # Clean up keyspace to avoid interference by keys from other tests
|
|
|
|
r set foo bar PX 1
|
|
|
|
set repl [attach_to_replication_stream]
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[r get foo] eq {}
|
|
|
|
} else {
|
|
|
|
fail "Replication not started."
|
|
|
|
}
|
|
|
|
|
|
|
|
# dummy command to verify nothing else gets into the replication stream.
|
|
|
|
r set x 1
|
|
|
|
|
|
|
|
assert_replication_stream $repl {
|
|
|
|
{select *}
|
|
|
|
{del foo}
|
|
|
|
{set x 1}
|
|
|
|
}
|
|
|
|
close_replication_stream $repl
|
|
|
|
assert_equal [r debug set-active-expire 1] {OK}
|
|
|
|
} {} {needs:debug}
|
2023-02-14 02:33:21 -05:00
|
|
|
|
|
|
|
test {SCAN: Lazy-expire should not be wrapped in MULTI/EXEC} {
|
|
|
|
r debug set-active-expire 0
|
|
|
|
r flushall
|
|
|
|
|
|
|
|
r set foo1 bar PX 1
|
|
|
|
r set foo2 bar PX 1
|
|
|
|
after 2
|
|
|
|
|
|
|
|
set repl [attach_to_replication_stream]
|
|
|
|
|
|
|
|
r scan 0
|
|
|
|
|
|
|
|
assert_replication_stream $repl {
|
|
|
|
{select *}
|
|
|
|
{del foo*}
|
|
|
|
{del foo*}
|
|
|
|
}
|
|
|
|
close_replication_stream $repl
|
|
|
|
assert_equal [r debug set-active-expire 1] {OK}
|
|
|
|
} {} {needs:debug}
|
|
|
|
|
|
|
|
test {RANDOMKEY: Lazy-expire should not be wrapped in MULTI/EXEC} {
|
|
|
|
r debug set-active-expire 0
|
|
|
|
r flushall
|
|
|
|
|
|
|
|
r set foo1 bar PX 1
|
|
|
|
r set foo2 bar PX 1
|
|
|
|
after 2
|
|
|
|
|
|
|
|
set repl [attach_to_replication_stream]
|
|
|
|
|
|
|
|
r randomkey
|
|
|
|
|
|
|
|
assert_replication_stream $repl {
|
|
|
|
{select *}
|
|
|
|
{del foo*}
|
|
|
|
{del foo*}
|
|
|
|
}
|
|
|
|
close_replication_stream $repl
|
|
|
|
assert_equal [r debug set-active-expire 1] {OK}
|
|
|
|
} {} {needs:debug}
|
2010-05-14 11:31:11 -04:00
|
|
|
}
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
|
Fix dictionary use-after-free in active expire and make kvstore iter to respect EMPTY flag (#13135)
After #13072, there is an use-after-free error. In expireScanCallback, we
will delete the dict, and then in dictScan we will continue to use the dict,
like we will doing `dictResumeRehashing(d)` in the end, this casued an error.
In this PR, in freeDictIfNeeded, if the dict's pauserehash is set, don't
delete the dict yet, and then when scan returns try to delete it again.
At the same time, we noticed that there will be similar problems in iterator.
We may also delete elements during the iteration process, causing the dict
to be deleted, so the part related to iter in the PR has also been modified.
dictResetIterator was also missing from the previous kvstoreIteratorNextDict,
we currently have no scenario that elements will be deleted in kvstoreIterator
process, deal with it together to avoid future problems. Added some simple
tests to verify the changes.
In addition, the modification in #13072 omitted initTempDb and emptyDbAsync,
and they were also added. This PR also remove the slow flag from the expire
test (consumes 1.3s) so that problems can be found in CI in the future.
2024-03-18 11:41:54 -04:00
|
|
|
start_cluster 1 0 {tags {"expire external:skip cluster"}} {
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
test "expire scan should skip dictionaries with lot's of empty buckets" {
|
2023-10-24 04:29:40 -04:00
|
|
|
r debug set-active-expire 0
|
|
|
|
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
# Collect two slots to help determine the expiry scan logic is able
|
|
|
|
# to go past certain slots which aren't valid for scanning at the given point of time.
|
2023-11-14 07:28:46 -05:00
|
|
|
# And the next non empty slot after that still gets scanned and expiration happens.
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
|
|
|
|
# hashslot(alice) is 749
|
|
|
|
r psetex alice 500 val
|
|
|
|
|
|
|
|
# hashslot(foo) is 12182
|
|
|
|
# fill data across different slots with expiration
|
|
|
|
for {set j 1} {$j <= 100} {incr j} {
|
|
|
|
r psetex "{foo}$j" 500 a
|
|
|
|
}
|
|
|
|
# hashslot(key) is 12539
|
|
|
|
r psetex key 500 val
|
|
|
|
|
2024-02-08 09:39:58 -05:00
|
|
|
# disable resizing, the reason for not using slow bgsave is because
|
|
|
|
# it will hit the dict_force_resize_ratio.
|
|
|
|
r debug dict-resizing 0
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
|
|
|
|
# delete data to have lot's (99%) of empty buckets (slot 12182 should be skipped)
|
|
|
|
for {set j 1} {$j <= 99} {incr j} {
|
|
|
|
r del "{foo}$j"
|
|
|
|
}
|
|
|
|
|
2023-11-14 07:28:46 -05:00
|
|
|
# Trigger a full traversal of all dictionaries.
|
|
|
|
r keys *
|
|
|
|
|
2023-10-24 04:29:40 -04:00
|
|
|
r debug set-active-expire 1
|
|
|
|
|
|
|
|
# Verify {foo}100 still exists and remaining got cleaned up
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
wait_for_condition 20 100 {
|
|
|
|
[r dbsize] eq 1
|
|
|
|
} else {
|
|
|
|
if {[r dbsize] eq 0} {
|
2024-02-08 09:39:58 -05:00
|
|
|
puts [r debug htstats 0]
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
fail "scan didn't handle slot skipping logic."
|
|
|
|
} else {
|
2024-02-08 09:39:58 -05:00
|
|
|
puts [r debug htstats 0]
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
fail "scan didn't process all valid slots."
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Enable resizing
|
2024-02-08 09:39:58 -05:00
|
|
|
r debug dict-resizing 1
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
|
|
|
|
# put some data into slot 12182 and trigger the resize
|
|
|
|
r psetex "{foo}0" 500 a
|
|
|
|
|
|
|
|
# Verify all keys have expired
|
2023-11-11 05:01:04 -05:00
|
|
|
wait_for_condition 400 100 {
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
[r dbsize] eq 0
|
|
|
|
} else {
|
2023-11-11 05:01:04 -05:00
|
|
|
puts [r dbsize]
|
|
|
|
flush stdout
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
fail "Keys did not actively expire."
|
|
|
|
}
|
Fix active expire timeout when db done the scanning (#13030)
When db->expires_cursor==0, it means the DB is done the scanning,
we should exit the loop to avoid the useless scanning.
It is easy to see the active expire timeout in the modified test,
for example, let's assume that there is only 1 expired key in the
DB, and the size / buckets ratio is less than 1%, which means that
we will skip it in isExpiryDictValidForSamplingCb, and the return
value of expires_cursor is 0.
Because `data.sampled == 0` is always true, so `repeat` is also
always true, we will keep scanning the DB, but every time it is
skipped by the previous judgment (expires_cursor = 0), until the
timelimit is finally exhausted.
2024-02-05 09:56:46 -05:00
|
|
|
|
|
|
|
# Make sure we don't have any timeouts.
|
|
|
|
assert_equal 0 [s 0 expired_time_cap_reached_count]
|
2023-10-24 04:29:40 -04:00
|
|
|
} {} {needs:debug}
|
Replace cluster metadata with slot specific dictionaries (#11695)
This is an implementation of https://github.com/redis/redis/issues/10589 that eliminates 16 bytes per entry in cluster mode, that are currently used to create a linked list between entries in the same slot. Main idea is splitting main dictionary into 16k smaller dictionaries (one per slot), so we can perform all slot specific operations, such as iteration, without any additional info in the `dictEntry`. For Redis cluster, the expectation is that there will be a larger number of keys, so the fixed overhead of 16k dictionaries will be The expire dictionary is also split up so that each slot is logically decoupled, so that in subsequent revisions we will be able to atomically flush a slot of data.
## Important changes
* Incremental rehashing - one big change here is that it's not one, but rather up to 16k dictionaries that can be rehashing at the same time, in order to keep track of them, we introduce a separate queue for dictionaries that are rehashing. Also instead of rehashing a single dictionary, cron job will now try to rehash as many as it can in 1ms.
* getRandomKey - now needs to not only select a random key, from the random bucket, but also needs to select a random dictionary. Fairness is a major concern here, as it's possible that keys can be unevenly distributed across the slots. In order to address this search we introduced binary index tree). With that data structure we are able to efficiently find a random slot using binary search in O(log^2(slot count)) time.
* Iteration efficiency - when iterating dictionary with a lot of empty slots, we want to skip them efficiently. We can do this using same binary index that is used for random key selection, this index allows us to find a slot for a specific key index. For example if there are 10 keys in the slot 0, then we can quickly find a slot that contains 11th key using binary search on top of the binary index tree.
* scan API - in order to perform a scan across the entire DB, the cursor now needs to not only save position within the dictionary but also the slot id. In this change we append slot id into LSB of the cursor so it can be passed around between client and the server. This has interesting side effect, now you'll be able to start scanning specific slot by simply providing slot id as a cursor value. The plan is to not document this as defined behavior, however. It's also worth nothing the SCAN API is now technically incompatible with previous versions, although practically we don't believe it's an issue.
* Checksum calculation optimizations - During command execution, we know that all of the keys are from the same slot (outside of a few notable exceptions such as cross slot scripts and modules). We don't want to compute the checksum multiple multiple times, hence we are relying on cached slot id in the client during the command executions. All operations that access random keys, either should pass in the known slot or recompute the slot.
* Slot info in RDB - in order to resize individual dictionaries correctly, while loading RDB, it's not enough to know total number of keys (of course we could approximate number of keys per slot, but it won't be precise). To address this issue, we've added additional metadata into RDB that contains number of keys in each slot, which can be used as a hint during loading.
* DB size - besides `DBSIZE` API, we need to know size of the DB in many places want, in order to avoid scanning all dictionaries and summing up their sizes in a loop, we've introduced a new field into `redisDb` that keeps track of `key_count`. This way we can keep DBSIZE operation O(1). This is also kept for O(1) expires computation as well.
## Performance
This change improves SET performance in cluster mode by ~5%, most of the gains come from us not having to maintain linked lists for keys in slot, non-cluster mode has same performance. For workloads that rely on evictions, the performance is similar because of the extra overhead for finding keys to evict.
RDB loading performance is slightly reduced, as the slot of each key needs to be computed during the load.
## Interface changes
* Removed `overhead.hashtable.slot-to-keys` to `MEMORY STATS`
* Scan API will now require 64 bits to store the cursor, even on 32 bit systems, as the slot information will be stored.
* New RDB version to support the new op code for SLOT information.
---------
Co-authored-by: Vitaly Arbuzov <arvit@amazon.com>
Co-authored-by: Harkrishn Patro <harkrisp@amazon.com>
Co-authored-by: Roshan Khatri <rvkhatri@amazon.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-10-15 02:58:26 -04:00
|
|
|
}
|