From f898a9e97dbf711ee192aee60134a07841c735d0 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 16 Sep 2021 19:07:08 +0800 Subject: [PATCH] Adds limit to SINTERCARD/ZINTERCARD. (#9425) Implements the [LIMIT limit] variant of SINTERCARD/ZINTERCARD. Now with the LIMIT, we can stop the searching when cardinality reaching the limit, and return the cardinality ASAP. Note that in SINTERCARD, the old synatx was: `SINTERCARD key [key ...]` In order to add a optional parameter, we must break the old synatx. So the new syntax of SINTERCARD will be consistent with ZINTERCARD. New syntax: `SINTERCARD numkeys key [key ...] [LIMIT limit]`. Note that this means that SINTERCARD has a different syntax than SINTER and SINTERSTORE (taking numkeys argument) As for ZINTERCARD, we can easily add a optional parameter to it. New syntax: `ZINTERCARD numkeys key [key ...] [LIMIT limit]` --- src/db.c | 5 ++++ src/server.c | 5 ++-- src/server.h | 1 + src/t_set.c | 49 ++++++++++++++++++++++++++++++++++++---- src/t_zset.c | 39 ++++++++++++++++++++++++++++++-- tests/unit/type/set.tcl | 42 +++++++++++++++++++++++++++++++--- tests/unit/type/zset.tcl | 15 ++++++++++++ 7 files changed, 145 insertions(+), 11 deletions(-) diff --git a/src/db.c b/src/db.c index 4caecd57d..43f51ffdf 100644 --- a/src/db.c +++ b/src/db.c @@ -1681,6 +1681,11 @@ int genericGetKeys(int storeKeyOfs, int keyCountOfs, int firstKeyOfs, int keySte return result->numkeys; } +int sintercardGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) { + UNUSED(cmd); + return genericGetKeys(0, 1, 2, 1, argv, argc, result); +} + int zunionInterDiffStoreGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) { UNUSED(cmd); return genericGetKeys(1, 2, 3, 1, argv, argc, result); diff --git a/src/server.c b/src/server.c index ca6534bb0..72000cc9c 100644 --- a/src/server.c +++ b/src/server.c @@ -535,11 +535,12 @@ struct redisCommand redisCommandTable[] = { KSPEC_BS_INDEX,.bs.index={1}, KSPEC_FK_RANGE,.fk.range={-1,1,0}}}}, - {"sintercard",sinterCardCommand,-2, + {"sintercard",sinterCardCommand,-3, "read-only @set", {{"read", KSPEC_BS_INDEX,.bs.index={1}, - KSPEC_FK_RANGE,.fk.range={-1,1,0}}}}, + KSPEC_FK_KEYNUM,.fk.range={0,1,1}}}, + sintercardGetKeys}, {"sinterstore",sinterstoreCommand,-3, "write use-memory @set", diff --git a/src/server.h b/src/server.h index f8c6a1aae..3e337929f 100644 --- a/src/server.h +++ b/src/server.h @@ -2545,6 +2545,7 @@ void freeObjAsync(robj *key, robj *obj, int dbid); int *getKeysPrepareResult(getKeysResult *result, int numkeys); int getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result); void getKeysFreeResult(getKeysResult *result); +int sintercardGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result); int zunionInterDiffGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result); int zunionInterDiffStoreGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result); int evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result); diff --git a/src/t_set.c b/src/t_set.c index cd824f888..8239662aa 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -847,8 +847,17 @@ int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) { return 0; } +/* SINTER / SINTERSTORE / SINTERCARD + * + * 'cardinality_only' work for SINTERCARD, only return the cardinality + * with minimum processing and memory overheads. + * + * 'limit' work for SINTERCARD, stop searching after reaching the limit. + * Passing a 0 means unlimited. + */ void sinterGenericCommand(client *c, robj **setkeys, - unsigned long setnum, robj *dstkey, int cardinality_only) { + unsigned long setnum, robj *dstkey, + int cardinality_only, unsigned long limit) { robj **sets = zmalloc(sizeof(robj*)*setnum); setTypeIterator *si; robj *dstset = NULL; @@ -946,6 +955,10 @@ void sinterGenericCommand(client *c, robj **setkeys, if (j == setnum) { if (cardinality_only) { cardinality++; + + /* We stop the searching after reaching the limit. */ + if (limit && cardinality >= limit) + break; } else if (!dstkey) { if (encoding == OBJ_ENCODING_HT) addReplyBulkCBuffer(c,elesds,sdslen(elesds)); @@ -993,16 +1006,44 @@ void sinterGenericCommand(client *c, robj **setkeys, /* SINTER key [key ...] */ void sinterCommand(client *c) { - sinterGenericCommand(c,c->argv+1,c->argc-1,NULL,0); + sinterGenericCommand(c, c->argv+1, c->argc-1, NULL, 0, 0); } +/* SINTERCARD numkeys key [key ...] [LIMIT limit] */ void sinterCardCommand(client *c) { - sinterGenericCommand(c,c->argv+1,c->argc-1,NULL,1); + long j; + long numkeys = 0; /* Number of keys. */ + long limit = 0; /* 0 means not limit. */ + + if (getRangeLongFromObjectOrReply(c, c->argv[1], 1, LONG_MAX, + &numkeys, "numkeys should be greater than 0") != C_OK) + return; + if (numkeys > (c->argc - 2)) { + addReplyError(c, "Number of keys can't be greater than number of args"); + return; + } + + for (j = 2 + numkeys; j < c->argc; j++) { + char *opt = c->argv[j]->ptr; + int moreargs = (c->argc - 1) - j; + + if (!strcasecmp(opt, "LIMIT") && moreargs) { + j++; + if (getPositiveLongFromObjectOrReply(c, c->argv[j], &limit, + "LIMIT can't be negative") != C_OK) + return; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + } + + sinterGenericCommand(c, c->argv+2, numkeys, NULL, 1, limit); } /* SINTERSTORE destination key [key ...] */ void sinterstoreCommand(client *c) { - sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],0); + sinterGenericCommand(c, c->argv+2, c->argc-2, c->argv[1], 0, 0); } #define SET_OP_UNION 0 diff --git a/src/t_zset.c b/src/t_zset.c index a15967e49..44d6ff12e 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -2057,6 +2057,14 @@ void zuiClearIterator(zsetopsrc *op) { } } +void zuiDiscardDirtyValue(zsetopval *val) { + if (val->flags & OPVAL_DIRTY_SDS) { + sdsfree(val->ele); + val->ele = NULL; + val->flags &= ~OPVAL_DIRTY_SDS; + } +} + unsigned long zuiLength(zsetopsrc *op) { if (op->subject == NULL) return 0; @@ -2091,8 +2099,7 @@ int zuiNext(zsetopsrc *op, zsetopval *val) { if (op->subject == NULL) return 0; - if (val->flags & OPVAL_DIRTY_SDS) - sdsfree(val->ele); + zuiDiscardDirtyValue(val); memset(val,0,sizeof(zsetopval)); @@ -2491,7 +2498,9 @@ dictType setAccumulatorDictType = { * this value is 1, for ZUNIONSTORE/ZINTERSTORE/ZDIFFSTORE command, this value is 2. * * 'op' SET_OP_INTER, SET_OP_UNION or SET_OP_DIFF. + * * 'cardinality_only' is currently only applicable when 'op' is SET_OP_INTER. + * Work for SINTERCARD, only return the cardinality with minimum processing and memory overheads. */ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, int op, int cardinality_only) { @@ -2507,6 +2516,7 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in zskiplistNode *znode; int withscores = 0; unsigned long cardinality = 0; + long limit = 0; /* Stop searching after reaching the limit. 0 means unlimited. */ /* expect setnum input keys to be given */ if ((getLongFromObjectOrReply(c, c->argv[numkeysIndex], &setnum, NULL) != C_OK)) @@ -2589,6 +2599,17 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in { j++; remaining--; withscores = 1; + } else if (cardinality_only && remaining >= 2 && + !strcasecmp(c->argv[j]->ptr, "limit")) + { + j++; remaining--; + if (getPositiveLongFromObjectOrReply(c, c->argv[j], &limit, + "LIMIT can't be negative") != C_OK) + { + zfree(src); + return; + } + j++; remaining--; } else { zfree(src); addReplyErrorObject(c,shared.syntaxerr); @@ -2636,6 +2657,13 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in /* Only continue when present in every input. */ if (j == setnum && cardinality_only) { cardinality++; + + /* We stop the searching after reaching the limit. */ + if (limit && cardinality >= (unsigned long)limit) { + /* Cleanup before we break the zuiNext loop. */ + zuiDiscardDirtyValue(&zval); + break; + } } else if (j == setnum) { tmp = zuiNewSdsFromValue(&zval); znode = zslInsert(dstzset->zsl,score,tmp); @@ -2758,30 +2786,37 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in zfree(src); } +/* ZUNIONSTORE destination numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] */ void zunionstoreCommand(client *c) { zunionInterDiffGenericCommand(c, c->argv[1], 2, SET_OP_UNION, 0); } +/* ZINTERSTORE destination numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] */ void zinterstoreCommand(client *c) { zunionInterDiffGenericCommand(c, c->argv[1], 2, SET_OP_INTER, 0); } +/* ZDIFFSTORE destination numkeys key [key ...] */ void zdiffstoreCommand(client *c) { zunionInterDiffGenericCommand(c, c->argv[1], 2, SET_OP_DIFF, 0); } +/* ZUNION numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] [WITHSCORES] */ void zunionCommand(client *c) { zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_UNION, 0); } +/* ZINTER numkeys key [key ...] [WEIGHTS weight] [AGGREGATE SUM|MIN|MAX] [WITHSCORES] */ void zinterCommand(client *c) { zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_INTER, 0); } +/* ZINTERCARD numkeys key [key ...] [LIMIT limit] */ void zinterCardCommand(client *c) { zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_INTER, 1); } +/* ZDIFF numkeys key [key ...] [WITHSCORES] */ void zdiffCommand(client *c) { zunionInterDiffGenericCommand(c, NULL, 1, SET_OP_DIFF, 0); } diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 9c512726d..7e30fd3be 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -143,8 +143,38 @@ start_server { r srem myset 1 2 3 4 5 6 7 8 } {3} + test "SINTERCARD with illegal arguments" { + assert_error "ERR wrong number of arguments*" {r sintercard} + assert_error "ERR wrong number of arguments*" {r sintercard 1} + + assert_error "ERR numkeys*" {r sintercard 0 myset{t}} + assert_error "ERR numkeys*" {r sintercard a myset{t}} + + assert_error "ERR Number of keys*" {r sintercard 2 myset{t}} + assert_error "ERR Number of keys*" {r sintercard 3 myset{t} myset2{t}} + + assert_error "ERR syntax error*" {r sintercard 1 myset{t} myset2{t}} + assert_error "ERR syntax error*" {r sintercard 1 myset{t} bar_arg} + assert_error "ERR syntax error*" {r sintercard 1 myset{t} LIMIT} + + assert_error "ERR LIMIT*" {r sintercard 1 myset{t} LIMIT -1} + assert_error "ERR LIMIT*" {r sintercard 1 myset{t} LIMIT a} + } + + test "SINTERCARD against non-set should throw error" { + r del set{t} + r sadd set{t} a b c + r set key1{t} x + + assert_error "WRONGTYPE*" {r sintercard 1 key1{t}} + assert_error "WRONGTYPE*" {r sintercard 2 set{t} key1{t}} + assert_error "WRONGTYPE*" {r sintercard 2 key1{t} noset{t}} + } + test "SINTERCARD against non-existing key" { - assert_equal 0 [r sintercard non-existing-key] + assert_equal 0 [r sintercard 1 non-existing-key] + assert_equal 0 [r sintercard 1 non-existing-key limit 0] + assert_equal 0 [r sintercard 1 non-existing-key limit 10] } foreach {type} {hashtable intset} { @@ -187,7 +217,10 @@ start_server { } test "SINTERCARD with two sets - $type" { - assert_equal 6 [r sintercard set1{t} set2{t}] + assert_equal 6 [r sintercard 2 set1{t} set2{t}] + assert_equal 6 [r sintercard 2 set1{t} set2{t} limit 0] + assert_equal 3 [r sintercard 2 set1{t} set2{t} limit 3] + assert_equal 6 [r sintercard 2 set1{t} set2{t} limit 10] } test "SINTERSTORE with two sets - $type" { @@ -220,7 +253,10 @@ start_server { } test "SINTERCARD against three sets - $type" { - assert_equal 3 [r sintercard set1{t} set2{t} set3{t}] + assert_equal 3 [r sintercard 3 set1{t} set2{t} set3{t}] + assert_equal 3 [r sintercard 3 set1{t} set2{t} set3{t} limit 0] + assert_equal 2 [r sintercard 3 set1{t} set2{t} set3{t} limit 2] + assert_equal 3 [r sintercard 3 set1{t} set2{t} set3{t} limit 10] } test "SINTERSTORE with three sets - $type" { diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 84449a83a..2ccb19175 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -652,6 +652,7 @@ start_server {tags {"zset"}} { assert_equal {} [r zunion 1 zseta] assert_equal {} [r zinter 1 zseta] assert_equal 0 [r zintercard 1 zseta] + assert_equal 0 [r zintercard 1 zseta limit 0] assert_equal {} [r zdiff 1 zseta] } @@ -670,6 +671,7 @@ start_server {tags {"zset"}} { assert_equal {a 1 b 2} [r zunion 2 zseta{t} zsetb{t} withscores] assert_equal {} [r zinter 2 zseta{t} zsetb{t} withscores] assert_equal 0 [r zintercard 2 zseta{t} zsetb{t}] + assert_equal 0 [r zintercard 2 zseta{t} zsetb{t} limit 0] assert_equal {a 1 b 2} [r zdiff 2 zseta{t} zsetb{t} withscores] } @@ -698,6 +700,7 @@ start_server {tags {"zset"}} { assert_equal {1 2 2 2 4 4 3 6} [r zunion 2 zsetd{t} zsetf{t} withscores] assert_equal {1 2 3 6} [r zinter 2 zsetd{t} zsetf{t} withscores] assert_equal 2 [r zintercard 2 zsetd{t} zsetf{t}] + assert_equal 2 [r zintercard 2 zsetd{t} zsetf{t} limit 0] assert_equal {2 2} [r zdiff 2 zsetd{t} zsetf{t} withscores] } @@ -750,8 +753,20 @@ start_server {tags {"zset"}} { assert_equal {b 3 c 5} [r zinter 2 zseta{t} zsetb{t} withscores] } + test "ZINTERCARD with illegal arguments" { + assert_error "ERR syntax error*" {r zintercard 1 zseta{t} zseta{t}} + assert_error "ERR syntax error*" {r zintercard 1 zseta{t} bar_arg} + assert_error "ERR syntax error*" {r zintercard 1 zseta{t} LIMIT} + + assert_error "ERR LIMIT*" {r zintercard 1 myset{t} LIMIT -1} + assert_error "ERR LIMIT*" {r zintercard 1 myset{t} LIMIT a} + } + test "ZINTERCARD basics - $encoding" { assert_equal 2 [r zintercard 2 zseta{t} zsetb{t}] + assert_equal 2 [r zintercard 2 zseta{t} zsetb{t} limit 0] + assert_equal 1 [r zintercard 2 zseta{t} zsetb{t} limit 1] + assert_equal 2 [r zintercard 2 zseta{t} zsetb{t} limit 10] } test "ZINTER RESP3 - $encoding" {