mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 08:08:53 -05:00
50ee0f5be8
Based on feedback from interested parties
278 lines
8.8 KiB
Tcl
278 lines
8.8 KiB
Tcl
# SPDX-FileCopyrightText: 2024 Redict Contributors
|
|
# SPDX-FileCopyrightText: 2024 Salvatore Sanfilippo <antirez at gmail dot com>
|
|
#
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
# SPDX-License-Identifier: LGPL-3.0-only
|
|
|
|
start_server {tags {"hll"}} {
|
|
test {HyperLogLog self test passes} {
|
|
catch {r pfselftest} e
|
|
set e
|
|
} {OK} {needs:pfdebug}
|
|
|
|
test {PFADD without arguments creates an HLL value} {
|
|
r pfadd hll
|
|
r exists hll
|
|
} {1}
|
|
|
|
test {Approximated cardinality after creation is zero} {
|
|
r pfcount hll
|
|
} {0}
|
|
|
|
test {PFADD returns 1 when at least 1 reg was modified} {
|
|
r pfadd hll a b c
|
|
} {1}
|
|
|
|
test {PFADD returns 0 when no reg was modified} {
|
|
r pfadd hll a b c
|
|
} {0}
|
|
|
|
test {PFADD works with empty string (regression)} {
|
|
r pfadd hll ""
|
|
}
|
|
|
|
# Note that the self test stresses much better the
|
|
# cardinality estimation error. We are testing just the
|
|
# command implementation itself here.
|
|
test {PFCOUNT returns approximated cardinality of set} {
|
|
r del hll
|
|
set res {}
|
|
r pfadd hll 1 2 3 4 5
|
|
lappend res [r pfcount hll]
|
|
# Call it again to test cached value invalidation.
|
|
r pfadd hll 6 7 8 8 9 10
|
|
lappend res [r pfcount hll]
|
|
set res
|
|
} {5 10}
|
|
|
|
test {HyperLogLogs are promote from sparse to dense} {
|
|
r del hll
|
|
r config set hll-sparse-max-bytes 3000
|
|
set n 0
|
|
while {$n < 100000} {
|
|
set elements {}
|
|
for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
|
|
incr n 100
|
|
r pfadd hll {*}$elements
|
|
set card [r pfcount hll]
|
|
set err [expr {abs($card-$n)}]
|
|
assert {$err < (double($card)/100)*5}
|
|
if {$n < 1000} {
|
|
assert {[r pfdebug encoding hll] eq {sparse}}
|
|
} elseif {$n > 10000} {
|
|
assert {[r pfdebug encoding hll] eq {dense}}
|
|
}
|
|
}
|
|
} {} {needs:pfdebug}
|
|
|
|
test {Change hll-sparse-max-bytes} {
|
|
r config set hll-sparse-max-bytes 3000
|
|
r del hll
|
|
r pfadd hll a b c d e d g h i j k
|
|
assert {[r pfdebug encoding hll] eq {sparse}}
|
|
r config set hll-sparse-max-bytes 30
|
|
r pfadd hll new_element
|
|
assert {[r pfdebug encoding hll] eq {dense}}
|
|
} {} {needs:pfdebug}
|
|
|
|
test {Hyperloglog promote to dense well in different hll-sparse-max-bytes} {
|
|
set max(0) 100
|
|
set max(1) 500
|
|
set max(2) 3000
|
|
for {set i 0} {$i < [array size max]} {incr i} {
|
|
r config set hll-sparse-max-bytes $max($i)
|
|
r del hll
|
|
r pfadd hll
|
|
set len [r strlen hll]
|
|
while {$len <= $max($i)} {
|
|
assert {[r pfdebug encoding hll] eq {sparse}}
|
|
set elements {}
|
|
for {set j 0} {$j < 10} {incr j} { lappend elements [expr rand()]}
|
|
r pfadd hll {*}$elements
|
|
set len [r strlen hll]
|
|
}
|
|
assert {[r pfdebug encoding hll] eq {dense}}
|
|
}
|
|
} {} {needs:pfdebug}
|
|
|
|
test {HyperLogLog sparse encoding stress test} {
|
|
for {set x 0} {$x < 1000} {incr x} {
|
|
r del hll1
|
|
r del hll2
|
|
set numele [randomInt 100]
|
|
set elements {}
|
|
for {set j 0} {$j < $numele} {incr j} {
|
|
lappend elements [expr rand()]
|
|
}
|
|
# Force dense representation of hll2
|
|
r pfadd hll2
|
|
r pfdebug todense hll2
|
|
r pfadd hll1 {*}$elements
|
|
r pfadd hll2 {*}$elements
|
|
assert {[r pfdebug encoding hll1] eq {sparse}}
|
|
assert {[r pfdebug encoding hll2] eq {dense}}
|
|
# Cardinality estimated should match exactly.
|
|
assert {[r pfcount hll1] eq [r pfcount hll2]}
|
|
}
|
|
} {} {needs:pfdebug}
|
|
|
|
test {Corrupted sparse HyperLogLogs are detected: Additional at tail} {
|
|
r del hll
|
|
r pfadd hll a b c
|
|
r append hll "hello"
|
|
set e {}
|
|
catch {r pfcount hll} e
|
|
set e
|
|
} {*INVALIDOBJ*}
|
|
|
|
test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
|
|
r del hll
|
|
r pfadd hll a b c
|
|
r setrange hll 0 "0123"
|
|
set e {}
|
|
catch {r pfcount hll} e
|
|
set e
|
|
} {*WRONGTYPE*}
|
|
|
|
test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
|
|
r del hll
|
|
r pfadd hll a b c
|
|
r setrange hll 4 "x"
|
|
set e {}
|
|
catch {r pfcount hll} e
|
|
set e
|
|
} {*WRONGTYPE*}
|
|
|
|
test {Corrupted dense HyperLogLogs are detected: Wrong length} {
|
|
r del hll
|
|
r pfadd hll a b c
|
|
r setrange hll 4 "\x00"
|
|
set e {}
|
|
catch {r pfcount hll} e
|
|
set e
|
|
} {*WRONGTYPE*}
|
|
|
|
test {Fuzzing dense/sparse encoding: Redict should always detect errors} {
|
|
for {set j 0} {$j < 1000} {incr j} {
|
|
r del hll
|
|
set items {}
|
|
set numitems [randomInt 3000]
|
|
for {set i 0} {$i < $numitems} {incr i} {
|
|
lappend items [expr {rand()}]
|
|
}
|
|
r pfadd hll {*}$items
|
|
|
|
# Corrupt it in some random way.
|
|
for {set i 0} {$i < 5} {incr i} {
|
|
set len [r strlen hll]
|
|
set pos [randomInt $len]
|
|
set byte [randstring 1 1 binary]
|
|
r setrange hll $pos $byte
|
|
# Don't modify more bytes 50% of times
|
|
if {rand() < 0.5} break
|
|
}
|
|
|
|
# Use the hyperloglog to check if it crashes
|
|
# Redict in some way.
|
|
catch {
|
|
r pfcount hll
|
|
}
|
|
}
|
|
}
|
|
|
|
test {PFADD, PFCOUNT, PFMERGE type checking works} {
|
|
r set foo{t} bar
|
|
catch {r pfadd foo{t} 1} e
|
|
assert_match {*WRONGTYPE*} $e
|
|
catch {r pfcount foo{t}} e
|
|
assert_match {*WRONGTYPE*} $e
|
|
catch {r pfmerge bar{t} foo{t}} e
|
|
assert_match {*WRONGTYPE*} $e
|
|
catch {r pfmerge foo{t} bar{t}} e
|
|
assert_match {*WRONGTYPE*} $e
|
|
}
|
|
|
|
test {PFMERGE results on the cardinality of union of sets} {
|
|
r del hll{t} hll1{t} hll2{t} hll3{t}
|
|
r pfadd hll1{t} a b c
|
|
r pfadd hll2{t} b c d
|
|
r pfadd hll3{t} c d e
|
|
r pfmerge hll{t} hll1{t} hll2{t} hll3{t}
|
|
r pfcount hll{t}
|
|
} {5}
|
|
|
|
test {PFMERGE on missing source keys will create an empty destkey} {
|
|
r del sourcekey{t} sourcekey2{t} destkey{t} destkey2{t}
|
|
|
|
assert_equal {OK} [r pfmerge destkey{t} sourcekey{t}]
|
|
assert_equal 1 [r exists destkey{t}]
|
|
assert_equal 0 [r pfcount destkey{t}]
|
|
|
|
assert_equal {OK} [r pfmerge destkey2{t} sourcekey{t} sourcekey2{t}]
|
|
assert_equal 1 [r exists destkey2{t}]
|
|
assert_equal 0 [r pfcount destkey{t}]
|
|
}
|
|
|
|
test {PFMERGE with one empty input key, create an empty destkey} {
|
|
r del destkey
|
|
assert_equal {OK} [r pfmerge destkey]
|
|
assert_equal 1 [r exists destkey]
|
|
assert_equal 0 [r pfcount destkey]
|
|
}
|
|
|
|
test {PFMERGE with one non-empty input key, dest key is actually one of the source keys} {
|
|
r del destkey
|
|
assert_equal 1 [r pfadd destkey a b c]
|
|
assert_equal {OK} [r pfmerge destkey]
|
|
assert_equal 1 [r exists destkey]
|
|
assert_equal 3 [r pfcount destkey]
|
|
}
|
|
|
|
test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
|
|
r del hll1{t} hll2{t} hll3{t}
|
|
for {set x 1} {$x < 10000} {incr x} {
|
|
r pfadd hll1{t} "foo-$x"
|
|
r pfadd hll2{t} "bar-$x"
|
|
r pfadd hll3{t} "zap-$x"
|
|
|
|
set card [r pfcount hll1{t} hll2{t} hll3{t}]
|
|
set realcard [expr {$x*3}]
|
|
set err [expr {abs($card-$realcard)}]
|
|
assert {$err < (double($card)/100)*5}
|
|
}
|
|
}
|
|
|
|
test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
|
|
r del hll1{t} hll2{t} hll3{t}
|
|
set elements {}
|
|
for {set x 1} {$x < 10000} {incr x} {
|
|
for {set j 1} {$j <= 3} {incr j} {
|
|
set rint [randomInt 20000]
|
|
r pfadd hll$j{t} $rint
|
|
lappend elements $rint
|
|
}
|
|
}
|
|
set realcard [llength [lsort -unique $elements]]
|
|
set card [r pfcount hll1{t} hll2{t} hll3{t}]
|
|
set err [expr {abs($card-$realcard)}]
|
|
assert {$err < (double($card)/100)*5}
|
|
}
|
|
|
|
test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
|
|
r del hll
|
|
r pfadd hll 1 2 3
|
|
llength [r pfdebug getreg hll]
|
|
} {16384} {needs:pfdebug}
|
|
|
|
test {PFADD / PFCOUNT cache invalidation works} {
|
|
r del hll
|
|
r pfadd hll a b c
|
|
r pfcount hll
|
|
assert {[r getrange hll 15 15] eq "\x00"}
|
|
r pfadd hll a b c
|
|
assert {[r getrange hll 15 15] eq "\x00"}
|
|
r pfadd hll 1 2 3
|
|
assert {[r getrange hll 15 15] eq "\x80"}
|
|
}
|
|
}
|