mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 16:18:28 -05:00
Switch PFCOUNT to LogLog-Beta algorithm.
The new algorithm provides the same speed with a smaller error for cardinalities in the range 0-100k. Before switching, the new and old algorithm behavior was studied in details in the context of issue #3677. You can find a few graphs and motivations there.
This commit is contained in:
parent
0224be8811
commit
87538cb7fe
@ -688,10 +688,6 @@ void loadServerConfigFromString(char *config) {
|
|||||||
err = sentinelHandleConfiguration(argv+1,argc-1);
|
err = sentinelHandleConfiguration(argv+1,argc-1);
|
||||||
if (err) goto loaderr;
|
if (err) goto loaderr;
|
||||||
}
|
}
|
||||||
} else if (!strcasecmp(argv[0],"hll-use-loglogbeta") && argc == 2) {
|
|
||||||
if ((server.hll_use_loglogbeta = yesnotoi(argv[1])) == -1) {
|
|
||||||
err = "argument must be 'yes' or 'no'"; goto loaderr;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
err = "Bad directive or wrong number of arguments"; goto loaderr;
|
err = "Bad directive or wrong number of arguments"; goto loaderr;
|
||||||
}
|
}
|
||||||
@ -985,8 +981,6 @@ void configSetCommand(client *c) {
|
|||||||
"slave-lazy-flush",server.repl_slave_lazy_flush) {
|
"slave-lazy-flush",server.repl_slave_lazy_flush) {
|
||||||
} config_set_bool_field(
|
} config_set_bool_field(
|
||||||
"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) {
|
"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) {
|
||||||
} config_set_bool_field(
|
|
||||||
"hll-use-loglogbeta",server.hll_use_loglogbeta) {
|
|
||||||
|
|
||||||
/* Numerical fields.
|
/* Numerical fields.
|
||||||
* config_set_numerical_field(name,var,min,max) */
|
* config_set_numerical_field(name,var,min,max) */
|
||||||
@ -1251,8 +1245,6 @@ void configGetCommand(client *c) {
|
|||||||
server.lazyfree_lazy_server_del);
|
server.lazyfree_lazy_server_del);
|
||||||
config_get_bool_field("slave-lazy-flush",
|
config_get_bool_field("slave-lazy-flush",
|
||||||
server.repl_slave_lazy_flush);
|
server.repl_slave_lazy_flush);
|
||||||
config_get_bool_field("hll-use-loglogbeta",
|
|
||||||
server.hll_use_loglogbeta);
|
|
||||||
|
|
||||||
/* Enum values */
|
/* Enum values */
|
||||||
config_get_enum_field("maxmemory-policy",
|
config_get_enum_field("maxmemory-policy",
|
||||||
@ -1971,7 +1963,6 @@ int rewriteConfig(char *path) {
|
|||||||
rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE);
|
rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE);
|
||||||
rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL);
|
rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL);
|
||||||
rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH);
|
rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH);
|
||||||
rewriteConfigYesNoOption(state,"hll-use-loglogbeta",server.hll_use_loglogbeta,CONFIG_DEFAULT_HLL_USE_LOGLOGBETA);
|
|
||||||
|
|
||||||
/* Rewrite Sentinel config if in Sentinel mode. */
|
/* Rewrite Sentinel config if in Sentinel mode. */
|
||||||
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
|
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
|
||||||
|
@ -994,50 +994,21 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
|
|||||||
serverPanic("Unknown HyperLogLog encoding in hllCount()");
|
serverPanic("Unknown HyperLogLog encoding in hllCount()");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(server.hll_use_loglogbeta) {
|
/* Apply loglog-beta to the raw estimate. See:
|
||||||
/* For loglog-beta there is a single formula to compute
|
* "LogLog-Beta and More: A New Algorithm for Cardinality Estimation
|
||||||
* cardinality for the enture range
|
* Based on LogLog Counting" Jason Qin, Denys Kim, Yumei Tung
|
||||||
*/
|
* arXiv:1612.02284 */
|
||||||
|
double zl = log(ez + 1);
|
||||||
|
double beta = -0.370393911*ez +
|
||||||
|
0.070471823*zl +
|
||||||
|
0.17393686*pow(zl,2) +
|
||||||
|
0.16339839*pow(zl,3) +
|
||||||
|
-0.09237745*pow(zl,4) +
|
||||||
|
0.03738027*pow(zl,5) +
|
||||||
|
-0.005384159*pow(zl,6) +
|
||||||
|
0.00042419*pow(zl,7);
|
||||||
|
|
||||||
double zl = log(ez + 1);
|
E = llroundl(alpha*m*(m-ez)*(1/(E+beta)));
|
||||||
double beta = -0.370393911*ez +
|
|
||||||
0.070471823*zl +
|
|
||||||
0.17393686*pow(zl,2) +
|
|
||||||
0.16339839*pow(zl,3) +
|
|
||||||
-0.09237745*pow(zl,4) +
|
|
||||||
0.03738027*pow(zl,5) +
|
|
||||||
-0.005384159*pow(zl,6) +
|
|
||||||
0.00042419*pow(zl,7);
|
|
||||||
|
|
||||||
E = llroundl(alpha*m*(m-ez)*(1/(E+beta)));
|
|
||||||
} else {
|
|
||||||
/* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */
|
|
||||||
E = (1/E)*alpha*m*m;
|
|
||||||
|
|
||||||
/* Use the LINEARCOUNTING algorithm for small cardinalities.
|
|
||||||
* For larger values but up to 72000 HyperLogLog raw approximation is
|
|
||||||
* used since linear counting error starts to increase. However HyperLogLog
|
|
||||||
* shows a strong bias in the range 2.5*16384 - 72000, so we try to
|
|
||||||
* compensate for it. */
|
|
||||||
if (E < m*2.5 && ez != 0) {
|
|
||||||
E = m*log(m/ez); /* LINEARCOUNTING() */
|
|
||||||
} else if (m == 16384 && E < 72000) {
|
|
||||||
/* We did polynomial regression of the bias for this range, this
|
|
||||||
* way we can compute the bias for a given cardinality and correct
|
|
||||||
* according to it. Only apply the correction for P=14 that's what
|
|
||||||
* we use and the value the correction was verified with. */
|
|
||||||
double bias = 5.9119*1.0e-18*(E*E*E*E)
|
|
||||||
-1.4253*1.0e-12*(E*E*E)+
|
|
||||||
1.2940*1.0e-7*(E*E)
|
|
||||||
-5.2921*1.0e-3*E+
|
|
||||||
83.3216;
|
|
||||||
E -= E*(bias/100);
|
|
||||||
}
|
|
||||||
/* We don't apply the correction for E > 1/30 of 2^32 since we use
|
|
||||||
* a 64 bit function and 6 bit counters. To apply the correction for
|
|
||||||
* 1/30 of 2^64 is not needed since it would require a huge set
|
|
||||||
* to approach such a value. */
|
|
||||||
}
|
|
||||||
return (uint64_t) E;
|
return (uint64_t) E;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1400,7 +1400,6 @@ void initServerConfig(void) {
|
|||||||
server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION;
|
server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION;
|
||||||
server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE;
|
server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE;
|
||||||
server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL;
|
server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL;
|
||||||
server.hll_use_loglogbeta = CONFIG_DEFAULT_HLL_USE_LOGLOGBETA;
|
|
||||||
|
|
||||||
server.lruclock = getLRUClock();
|
server.lruclock = getLRUClock();
|
||||||
resetServerSaveParams();
|
resetServerSaveParams();
|
||||||
|
@ -151,7 +151,6 @@ typedef long long mstime_t; /* millisecond time type. */
|
|||||||
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0
|
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0
|
||||||
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0
|
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0
|
||||||
#define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0
|
#define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0
|
||||||
#define CONFIG_DEFAULT_HLL_USE_LOGLOGBETA 0
|
|
||||||
|
|
||||||
#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
|
#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
|
||||||
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
|
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
|
||||||
@ -1150,7 +1149,6 @@ struct redisServer {
|
|||||||
int watchdog_period; /* Software watchdog period in ms. 0 = off */
|
int watchdog_period; /* Software watchdog period in ms. 0 = off */
|
||||||
/* System hardware info */
|
/* System hardware info */
|
||||||
size_t system_memory_size; /* Total memory in system as reported by OS */
|
size_t system_memory_size; /* Total memory in system as reported by OS */
|
||||||
int hll_use_loglogbeta; /* Use loglog-beta algorithm for HLL */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct pubsubPattern {
|
typedef struct pubsubPattern {
|
||||||
|
Loading…
Reference in New Issue
Block a user