2012-11-08 12:25:23 -05:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2015-07-26 09:14:57 -04:00
|
|
|
#include "server.h"
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* ================================ MULTI/EXEC ============================== */
|
|
|
|
|
|
|
|
/* Client state initialization for MULTI/EXEC */
|
2015-07-26 09:20:46 -04:00
|
|
|
void initClientMultiState(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
c->mstate.commands = NULL;
|
|
|
|
c->mstate.count = 0;
|
2018-12-11 05:39:18 -05:00
|
|
|
c->mstate.cmd_flags = 0;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Release all the resources associated with MULTI/EXEC state */
|
2015-07-26 09:20:46 -04:00
|
|
|
void freeClientMultiState(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
int j;
|
|
|
|
|
|
|
|
for (j = 0; j < c->mstate.count; j++) {
|
|
|
|
int i;
|
|
|
|
multiCmd *mc = c->mstate.commands+j;
|
|
|
|
|
|
|
|
for (i = 0; i < mc->argc; i++)
|
|
|
|
decrRefCount(mc->argv[i]);
|
|
|
|
zfree(mc->argv);
|
|
|
|
}
|
|
|
|
zfree(c->mstate.commands);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add a new command into the MULTI commands queue */
|
2015-07-26 09:20:46 -04:00
|
|
|
void queueMultiCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
multiCmd *mc;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
c->mstate.commands = zrealloc(c->mstate.commands,
|
|
|
|
sizeof(multiCmd)*(c->mstate.count+1));
|
|
|
|
mc = c->mstate.commands+c->mstate.count;
|
2011-07-08 06:59:30 -04:00
|
|
|
mc->cmd = c->cmd;
|
2010-06-21 18:07:48 -04:00
|
|
|
mc->argc = c->argc;
|
|
|
|
mc->argv = zmalloc(sizeof(robj*)*c->argc);
|
|
|
|
memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
|
|
|
|
for (j = 0; j < c->argc; j++)
|
|
|
|
incrRefCount(mc->argv[j]);
|
|
|
|
c->mstate.count++;
|
2018-12-11 05:39:18 -05:00
|
|
|
c->mstate.cmd_flags |= c->cmd->flags;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void discardTransaction(client *c) {
|
2012-03-20 12:32:48 -04:00
|
|
|
freeClientMultiState(c);
|
|
|
|
initClientMultiState(c);
|
2015-07-27 03:41:48 -04:00
|
|
|
c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
|
2012-03-20 12:32:48 -04:00
|
|
|
unwatchAllKeys(c);
|
|
|
|
}
|
|
|
|
|
2012-11-15 14:11:05 -05:00
|
|
|
/* Flag the transacation as DIRTY_EXEC so that EXEC will fail.
|
|
|
|
* Should be called every time there is an error while queueing a command. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void flagTransaction(client *c) {
|
2015-07-27 03:41:48 -04:00
|
|
|
if (c->flags & CLIENT_MULTI)
|
|
|
|
c->flags |= CLIENT_DIRTY_EXEC;
|
2012-11-15 14:11:05 -05:00
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void multiCommand(client *c) {
|
2015-07-27 03:41:48 -04:00
|
|
|
if (c->flags & CLIENT_MULTI) {
|
2010-09-02 13:52:24 -04:00
|
|
|
addReplyError(c,"MULTI calls can not be nested");
|
2010-06-21 18:07:48 -04:00
|
|
|
return;
|
|
|
|
}
|
2015-07-27 03:41:48 -04:00
|
|
|
c->flags |= CLIENT_MULTI;
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void discardCommand(client *c) {
|
2015-07-27 03:41:48 -04:00
|
|
|
if (!(c->flags & CLIENT_MULTI)) {
|
2010-09-02 13:52:24 -04:00
|
|
|
addReplyError(c,"DISCARD without MULTI");
|
2010-06-21 18:07:48 -04:00
|
|
|
return;
|
|
|
|
}
|
2012-03-20 12:32:48 -04:00
|
|
|
discardTransaction(c);
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
|
2013-01-16 12:00:20 -05:00
|
|
|
* implementation for more information. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void execCommandPropagateMulti(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *multistring = createStringObject("MULTI",5);
|
|
|
|
|
2013-03-26 05:27:45 -04:00
|
|
|
propagate(server.multiCommand,c->db->id,&multistring,1,
|
2015-07-27 03:41:48 -04:00
|
|
|
PROPAGATE_AOF|PROPAGATE_REPL);
|
2010-06-21 18:07:48 -04:00
|
|
|
decrRefCount(multistring);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void execCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
int j;
|
|
|
|
robj **orig_argv;
|
|
|
|
int orig_argc;
|
2011-07-08 06:59:30 -04:00
|
|
|
struct redisCommand *orig_cmd;
|
2013-03-26 05:58:10 -04:00
|
|
|
int must_propagate = 0; /* Need to propagate MULTI/EXEC to AOF / slaves? */
|
Fix replication of SLAVEOF inside transaction.
In Redis 4.0 replication, with the introduction of PSYNC2, masters and
slaves replicate commands to cascading slaves and to the replication
backlog itself in a different way compared to the past.
Masters actually replicate the effects of client commands.
Slaves just propagate what they receive from masters.
This mechanism can cause problems when the configuration of an instance
is changed from master to slave inside a transaction. For instance
we could send to a master instance the following sequence:
MULTI
SLAVEOF 127.0.0.1 0
EXEC
SLAVEOF NO ONE
Before the fixes in this commit, the MULTI command used to be propagated
into the replication backlog, however after the SLAVEOF command the
instance is a slave, so the EXEC implementation failed to also propagate
the EXEC command. When the slaves of the above instance reconnected,
they were incrementally synchronized just sending a "MULTI". This put
the master client (in the slaves) into MULTI state, breaking the
replication.
Notably even Redis Sentinel uses the above approach in order to guarantee
that configuration changes are always performed together with rewrites
of the configuration and with clients disconnection. Sentiel does:
MULTI
SLAVEOF ...
CONFIG REWRITE
CLIENT KILL TYPE normal
EXEC
So this was a really problematic issue. However even with the fix in
this commit, that will add the final EXEC to the replication stream in
case the instance was switched from master to slave during the
transaction, the result would be to increment the slave replication
offset, so a successive reconnection with the new master, will not
permit a successful partial resynchronization: no way the new master can
provide us with the backlog needed, we incremented our offset to a value
that the new master cannot have.
However the EXEC implementation waits to emit the MULTI, so that if the
commands inside the transaction actually do not need to be replicated,
no commands propagation happens at all. From multi.c:
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
The above code is already modified by this commit you are reading.
Now also ADMIN commands do not trigger the emission of MULTI. It is actually
not clear why we do not just check for CMD_WRITE... Probably I wrote it this
way in order to make the code more reliable: better to over-emit MULTI
than not emitting it in time.
So this commit should indeed fix issue #3836 (verified), however it looks
like some reconsideration of this code path is needed in the long term.
BONUS POINT: The reverse bug.
Even in a read only slave "B", in a replication setup like:
A -> B -> C
There are commands without the READONLY nor the ADMIN flag, that are also
not flagged as WRITE commands. An example is just the PING command.
So if we send B the following sequence:
MULTI
PING
SLAVEOF NO ONE
EXEC
The result will be the reverse bug, where only EXEC is emitted, but not the
previous MULTI. However this apparently does not create problems in practice
but it is yet another acknowledge of the fact some work is needed here
in order to make this code path less surprising.
Note that there are many different approaches we could follow. For instance
MULTI/EXEC blocks containing administrative commands may be allowed ONLY
if all the commands are administrative ones, otherwise they could be
denined. When allowed, the commands could simply never be replicated at all.
2017-07-12 05:07:28 -04:00
|
|
|
int was_master = server.masterhost == NULL;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
if (!(c->flags & CLIENT_MULTI)) {
|
2010-09-02 13:52:24 -04:00
|
|
|
addReplyError(c,"EXEC without MULTI");
|
2010-06-21 18:07:48 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-11-15 14:11:05 -05:00
|
|
|
/* Check if we need to abort the EXEC because:
|
|
|
|
* 1) Some WATCHed key was touched.
|
|
|
|
* 2) There was a previous error while queueing commands.
|
|
|
|
* A failed EXEC in the first case returns a multi bulk nil object
|
|
|
|
* (technically it is not an error but a special behavior), while
|
|
|
|
* in the second an EXECABORT error is returned. */
|
2015-07-27 03:41:48 -04:00
|
|
|
if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC)) {
|
|
|
|
addReply(c, c->flags & CLIENT_DIRTY_EXEC ? shared.execaborterr :
|
2012-11-15 14:11:05 -05:00
|
|
|
shared.nullmultibulk);
|
2013-03-26 05:48:15 -04:00
|
|
|
discardTransaction(c);
|
2012-10-16 11:35:50 -04:00
|
|
|
goto handle_monitor;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
2018-12-11 05:39:18 -05:00
|
|
|
/* If there are write commands inside the transaction, and this is a read
|
|
|
|
* only slave, we want to send an error. This happens when the transaction
|
|
|
|
* was initiated when the instance was a master or a writable replica and
|
|
|
|
* then the configuration changed (for example instance was turned into
|
|
|
|
* a replica). */
|
2018-12-11 06:47:36 -05:00
|
|
|
if (!server.loading && server.masterhost && server.repl_slave_ro &&
|
2018-12-11 05:39:18 -05:00
|
|
|
!(c->flags & CLIENT_MASTER) && c->mstate.cmd_flags & CMD_WRITE)
|
|
|
|
{
|
|
|
|
addReplyError(c,
|
|
|
|
"Transaction contains write commands but instance "
|
2018-12-11 06:53:54 -05:00
|
|
|
"is now a read-only replica. EXEC aborted.");
|
2018-12-11 05:39:18 -05:00
|
|
|
discardTransaction(c);
|
|
|
|
goto handle_monitor;
|
|
|
|
}
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Exec all the queued commands */
|
|
|
|
unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
|
|
|
|
orig_argv = c->argv;
|
|
|
|
orig_argc = c->argc;
|
2011-07-08 06:59:30 -04:00
|
|
|
orig_cmd = c->cmd;
|
2010-09-02 06:38:34 -04:00
|
|
|
addReplyMultiBulkLen(c,c->mstate.count);
|
2010-06-21 18:07:48 -04:00
|
|
|
for (j = 0; j < c->mstate.count; j++) {
|
|
|
|
c->argc = c->mstate.commands[j].argc;
|
|
|
|
c->argv = c->mstate.commands[j].argv;
|
2011-07-08 06:59:30 -04:00
|
|
|
c->cmd = c->mstate.commands[j].cmd;
|
2013-03-26 05:58:10 -04:00
|
|
|
|
Fix replication of SLAVEOF inside transaction.
In Redis 4.0 replication, with the introduction of PSYNC2, masters and
slaves replicate commands to cascading slaves and to the replication
backlog itself in a different way compared to the past.
Masters actually replicate the effects of client commands.
Slaves just propagate what they receive from masters.
This mechanism can cause problems when the configuration of an instance
is changed from master to slave inside a transaction. For instance
we could send to a master instance the following sequence:
MULTI
SLAVEOF 127.0.0.1 0
EXEC
SLAVEOF NO ONE
Before the fixes in this commit, the MULTI command used to be propagated
into the replication backlog, however after the SLAVEOF command the
instance is a slave, so the EXEC implementation failed to also propagate
the EXEC command. When the slaves of the above instance reconnected,
they were incrementally synchronized just sending a "MULTI". This put
the master client (in the slaves) into MULTI state, breaking the
replication.
Notably even Redis Sentinel uses the above approach in order to guarantee
that configuration changes are always performed together with rewrites
of the configuration and with clients disconnection. Sentiel does:
MULTI
SLAVEOF ...
CONFIG REWRITE
CLIENT KILL TYPE normal
EXEC
So this was a really problematic issue. However even with the fix in
this commit, that will add the final EXEC to the replication stream in
case the instance was switched from master to slave during the
transaction, the result would be to increment the slave replication
offset, so a successive reconnection with the new master, will not
permit a successful partial resynchronization: no way the new master can
provide us with the backlog needed, we incremented our offset to a value
that the new master cannot have.
However the EXEC implementation waits to emit the MULTI, so that if the
commands inside the transaction actually do not need to be replicated,
no commands propagation happens at all. From multi.c:
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
The above code is already modified by this commit you are reading.
Now also ADMIN commands do not trigger the emission of MULTI. It is actually
not clear why we do not just check for CMD_WRITE... Probably I wrote it this
way in order to make the code more reliable: better to over-emit MULTI
than not emitting it in time.
So this commit should indeed fix issue #3836 (verified), however it looks
like some reconsideration of this code path is needed in the long term.
BONUS POINT: The reverse bug.
Even in a read only slave "B", in a replication setup like:
A -> B -> C
There are commands without the READONLY nor the ADMIN flag, that are also
not flagged as WRITE commands. An example is just the PING command.
So if we send B the following sequence:
MULTI
PING
SLAVEOF NO ONE
EXEC
The result will be the reverse bug, where only EXEC is emitted, but not the
previous MULTI. However this apparently does not create problems in practice
but it is yet another acknowledge of the fact some work is needed here
in order to make this code path less surprising.
Note that there are many different approaches we could follow. For instance
MULTI/EXEC blocks containing administrative commands may be allowed ONLY
if all the commands are administrative ones, otherwise they could be
denined. When allowed, the commands could simply never be replicated at all.
2017-07-12 05:07:28 -04:00
|
|
|
/* Propagate a MULTI request once we encounter the first command which
|
|
|
|
* is not readonly nor an administrative one.
|
2013-03-26 05:58:10 -04:00
|
|
|
* This way we'll deliver the MULTI/..../EXEC block as a whole and
|
|
|
|
* both the AOF and the replication link will have the same consistency
|
|
|
|
* and atomicity guarantees. */
|
Fix replication of SLAVEOF inside transaction.
In Redis 4.0 replication, with the introduction of PSYNC2, masters and
slaves replicate commands to cascading slaves and to the replication
backlog itself in a different way compared to the past.
Masters actually replicate the effects of client commands.
Slaves just propagate what they receive from masters.
This mechanism can cause problems when the configuration of an instance
is changed from master to slave inside a transaction. For instance
we could send to a master instance the following sequence:
MULTI
SLAVEOF 127.0.0.1 0
EXEC
SLAVEOF NO ONE
Before the fixes in this commit, the MULTI command used to be propagated
into the replication backlog, however after the SLAVEOF command the
instance is a slave, so the EXEC implementation failed to also propagate
the EXEC command. When the slaves of the above instance reconnected,
they were incrementally synchronized just sending a "MULTI". This put
the master client (in the slaves) into MULTI state, breaking the
replication.
Notably even Redis Sentinel uses the above approach in order to guarantee
that configuration changes are always performed together with rewrites
of the configuration and with clients disconnection. Sentiel does:
MULTI
SLAVEOF ...
CONFIG REWRITE
CLIENT KILL TYPE normal
EXEC
So this was a really problematic issue. However even with the fix in
this commit, that will add the final EXEC to the replication stream in
case the instance was switched from master to slave during the
transaction, the result would be to increment the slave replication
offset, so a successive reconnection with the new master, will not
permit a successful partial resynchronization: no way the new master can
provide us with the backlog needed, we incremented our offset to a value
that the new master cannot have.
However the EXEC implementation waits to emit the MULTI, so that if the
commands inside the transaction actually do not need to be replicated,
no commands propagation happens at all. From multi.c:
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
The above code is already modified by this commit you are reading.
Now also ADMIN commands do not trigger the emission of MULTI. It is actually
not clear why we do not just check for CMD_WRITE... Probably I wrote it this
way in order to make the code more reliable: better to over-emit MULTI
than not emitting it in time.
So this commit should indeed fix issue #3836 (verified), however it looks
like some reconsideration of this code path is needed in the long term.
BONUS POINT: The reverse bug.
Even in a read only slave "B", in a replication setup like:
A -> B -> C
There are commands without the READONLY nor the ADMIN flag, that are also
not flagged as WRITE commands. An example is just the PING command.
So if we send B the following sequence:
MULTI
PING
SLAVEOF NO ONE
EXEC
The result will be the reverse bug, where only EXEC is emitted, but not the
previous MULTI. However this apparently does not create problems in practice
but it is yet another acknowledge of the fact some work is needed here
in order to make this code path less surprising.
Note that there are many different approaches we could follow. For instance
MULTI/EXEC blocks containing administrative commands may be allowed ONLY
if all the commands are administrative ones, otherwise they could be
denined. When allowed, the commands could simply never be replicated at all.
2017-07-12 05:07:28 -04:00
|
|
|
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
|
2013-03-26 05:58:10 -04:00
|
|
|
execCommandPropagateMulti(c);
|
|
|
|
must_propagate = 1;
|
|
|
|
}
|
|
|
|
|
2018-08-02 02:59:28 -04:00
|
|
|
call(c,server.loading ? CMD_CALL_NONE : CMD_CALL_FULL);
|
2011-02-23 03:39:29 -05:00
|
|
|
|
|
|
|
/* Commands may alter argc/argv, restore mstate. */
|
|
|
|
c->mstate.commands[j].argc = c->argc;
|
|
|
|
c->mstate.commands[j].argv = c->argv;
|
2011-07-08 06:59:30 -04:00
|
|
|
c->mstate.commands[j].cmd = c->cmd;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
c->argv = orig_argv;
|
|
|
|
c->argc = orig_argc;
|
2011-07-08 06:59:30 -04:00
|
|
|
c->cmd = orig_cmd;
|
2013-03-26 05:48:15 -04:00
|
|
|
discardTransaction(c);
|
Fix replication of SLAVEOF inside transaction.
In Redis 4.0 replication, with the introduction of PSYNC2, masters and
slaves replicate commands to cascading slaves and to the replication
backlog itself in a different way compared to the past.
Masters actually replicate the effects of client commands.
Slaves just propagate what they receive from masters.
This mechanism can cause problems when the configuration of an instance
is changed from master to slave inside a transaction. For instance
we could send to a master instance the following sequence:
MULTI
SLAVEOF 127.0.0.1 0
EXEC
SLAVEOF NO ONE
Before the fixes in this commit, the MULTI command used to be propagated
into the replication backlog, however after the SLAVEOF command the
instance is a slave, so the EXEC implementation failed to also propagate
the EXEC command. When the slaves of the above instance reconnected,
they were incrementally synchronized just sending a "MULTI". This put
the master client (in the slaves) into MULTI state, breaking the
replication.
Notably even Redis Sentinel uses the above approach in order to guarantee
that configuration changes are always performed together with rewrites
of the configuration and with clients disconnection. Sentiel does:
MULTI
SLAVEOF ...
CONFIG REWRITE
CLIENT KILL TYPE normal
EXEC
So this was a really problematic issue. However even with the fix in
this commit, that will add the final EXEC to the replication stream in
case the instance was switched from master to slave during the
transaction, the result would be to increment the slave replication
offset, so a successive reconnection with the new master, will not
permit a successful partial resynchronization: no way the new master can
provide us with the backlog needed, we incremented our offset to a value
that the new master cannot have.
However the EXEC implementation waits to emit the MULTI, so that if the
commands inside the transaction actually do not need to be replicated,
no commands propagation happens at all. From multi.c:
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
The above code is already modified by this commit you are reading.
Now also ADMIN commands do not trigger the emission of MULTI. It is actually
not clear why we do not just check for CMD_WRITE... Probably I wrote it this
way in order to make the code more reliable: better to over-emit MULTI
than not emitting it in time.
So this commit should indeed fix issue #3836 (verified), however it looks
like some reconsideration of this code path is needed in the long term.
BONUS POINT: The reverse bug.
Even in a read only slave "B", in a replication setup like:
A -> B -> C
There are commands without the READONLY nor the ADMIN flag, that are also
not flagged as WRITE commands. An example is just the PING command.
So if we send B the following sequence:
MULTI
PING
SLAVEOF NO ONE
EXEC
The result will be the reverse bug, where only EXEC is emitted, but not the
previous MULTI. However this apparently does not create problems in practice
but it is yet another acknowledge of the fact some work is needed here
in order to make this code path less surprising.
Note that there are many different approaches we could follow. For instance
MULTI/EXEC blocks containing administrative commands may be allowed ONLY
if all the commands are administrative ones, otherwise they could be
denined. When allowed, the commands could simply never be replicated at all.
2017-07-12 05:07:28 -04:00
|
|
|
|
2013-03-26 05:58:10 -04:00
|
|
|
/* Make sure the EXEC command will be propagated as well if MULTI
|
|
|
|
* was already propagated. */
|
Fix replication of SLAVEOF inside transaction.
In Redis 4.0 replication, with the introduction of PSYNC2, masters and
slaves replicate commands to cascading slaves and to the replication
backlog itself in a different way compared to the past.
Masters actually replicate the effects of client commands.
Slaves just propagate what they receive from masters.
This mechanism can cause problems when the configuration of an instance
is changed from master to slave inside a transaction. For instance
we could send to a master instance the following sequence:
MULTI
SLAVEOF 127.0.0.1 0
EXEC
SLAVEOF NO ONE
Before the fixes in this commit, the MULTI command used to be propagated
into the replication backlog, however after the SLAVEOF command the
instance is a slave, so the EXEC implementation failed to also propagate
the EXEC command. When the slaves of the above instance reconnected,
they were incrementally synchronized just sending a "MULTI". This put
the master client (in the slaves) into MULTI state, breaking the
replication.
Notably even Redis Sentinel uses the above approach in order to guarantee
that configuration changes are always performed together with rewrites
of the configuration and with clients disconnection. Sentiel does:
MULTI
SLAVEOF ...
CONFIG REWRITE
CLIENT KILL TYPE normal
EXEC
So this was a really problematic issue. However even with the fix in
this commit, that will add the final EXEC to the replication stream in
case the instance was switched from master to slave during the
transaction, the result would be to increment the slave replication
offset, so a successive reconnection with the new master, will not
permit a successful partial resynchronization: no way the new master can
provide us with the backlog needed, we incremented our offset to a value
that the new master cannot have.
However the EXEC implementation waits to emit the MULTI, so that if the
commands inside the transaction actually do not need to be replicated,
no commands propagation happens at all. From multi.c:
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
The above code is already modified by this commit you are reading.
Now also ADMIN commands do not trigger the emission of MULTI. It is actually
not clear why we do not just check for CMD_WRITE... Probably I wrote it this
way in order to make the code more reliable: better to over-emit MULTI
than not emitting it in time.
So this commit should indeed fix issue #3836 (verified), however it looks
like some reconsideration of this code path is needed in the long term.
BONUS POINT: The reverse bug.
Even in a read only slave "B", in a replication setup like:
A -> B -> C
There are commands without the READONLY nor the ADMIN flag, that are also
not flagged as WRITE commands. An example is just the PING command.
So if we send B the following sequence:
MULTI
PING
SLAVEOF NO ONE
EXEC
The result will be the reverse bug, where only EXEC is emitted, but not the
previous MULTI. However this apparently does not create problems in practice
but it is yet another acknowledge of the fact some work is needed here
in order to make this code path less surprising.
Note that there are many different approaches we could follow. For instance
MULTI/EXEC blocks containing administrative commands may be allowed ONLY
if all the commands are administrative ones, otherwise they could be
denined. When allowed, the commands could simply never be replicated at all.
2017-07-12 05:07:28 -04:00
|
|
|
if (must_propagate) {
|
|
|
|
int is_master = server.masterhost == NULL;
|
|
|
|
server.dirty++;
|
|
|
|
/* If inside the MULTI/EXEC block this instance was suddenly
|
|
|
|
* switched from master to slave (using the SLAVEOF command), the
|
|
|
|
* initial MULTI was propagated into the replication backlog, but the
|
|
|
|
* rest was not. We need to make sure to at least terminate the
|
|
|
|
* backlog with the final EXEC. */
|
|
|
|
if (server.repl_backlog && was_master && !is_master) {
|
|
|
|
char *execcmd = "*1\r\n$4\r\nEXEC\r\n";
|
|
|
|
feedReplicationBacklog(execcmd,strlen(execcmd));
|
|
|
|
}
|
|
|
|
}
|
2012-10-16 11:35:50 -04:00
|
|
|
|
|
|
|
handle_monitor:
|
|
|
|
/* Send EXEC to clients waiting data from MONITOR. We do it here
|
|
|
|
* since the natural order of commands execution is actually:
|
|
|
|
* MUTLI, EXEC, ... commands inside transaction ...
|
2015-07-27 03:41:48 -04:00
|
|
|
* Instead EXEC is flagged as CMD_SKIP_MONITOR in the command
|
2012-10-16 11:35:50 -04:00
|
|
|
* table, and we do it here with correct ordering. */
|
|
|
|
if (listLength(server.monitors) && !server.loading)
|
|
|
|
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
|
|
|
|
*
|
|
|
|
* The implementation uses a per-DB hash table mapping keys to list of clients
|
|
|
|
* WATCHing those keys, so that given a key that is going to be modified
|
|
|
|
* we can mark all the associated clients as dirty.
|
|
|
|
*
|
|
|
|
* Also every client contains a list of WATCHed keys so that's possible to
|
|
|
|
* un-watch such keys when the client is freed or when UNWATCH is called. */
|
|
|
|
|
|
|
|
/* In the client->watched_keys list we need to use watchedKey structures
|
|
|
|
* as in order to identify a key in Redis we need both the key name and the
|
|
|
|
* DB */
|
|
|
|
typedef struct watchedKey {
|
|
|
|
robj *key;
|
|
|
|
redisDb *db;
|
|
|
|
} watchedKey;
|
|
|
|
|
|
|
|
/* Watch for the specified key */
|
2015-07-26 09:20:46 -04:00
|
|
|
void watchForKey(client *c, robj *key) {
|
2010-06-21 18:07:48 -04:00
|
|
|
list *clients = NULL;
|
|
|
|
listIter li;
|
|
|
|
listNode *ln;
|
|
|
|
watchedKey *wk;
|
|
|
|
|
|
|
|
/* Check if we are already watching for this key */
|
|
|
|
listRewind(c->watched_keys,&li);
|
|
|
|
while((ln = listNext(&li))) {
|
|
|
|
wk = listNodeValue(ln);
|
|
|
|
if (wk->db == c->db && equalStringObjects(key,wk->key))
|
|
|
|
return; /* Key already watched */
|
|
|
|
}
|
|
|
|
/* This key is not already watched in this DB. Let's add it */
|
|
|
|
clients = dictFetchValue(c->db->watched_keys,key);
|
2014-06-26 12:48:40 -04:00
|
|
|
if (!clients) {
|
2010-06-21 18:07:48 -04:00
|
|
|
clients = listCreate();
|
|
|
|
dictAdd(c->db->watched_keys,key,clients);
|
|
|
|
incrRefCount(key);
|
|
|
|
}
|
|
|
|
listAddNodeTail(clients,c);
|
2013-01-16 12:00:20 -05:00
|
|
|
/* Add the new key to the list of keys watched by this client */
|
2010-06-21 18:07:48 -04:00
|
|
|
wk = zmalloc(sizeof(*wk));
|
|
|
|
wk->key = key;
|
|
|
|
wk->db = c->db;
|
|
|
|
incrRefCount(key);
|
|
|
|
listAddNodeTail(c->watched_keys,wk);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
|
|
|
|
* flag is up to the caller. */
|
2015-07-26 09:20:46 -04:00
|
|
|
void unwatchAllKeys(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
listIter li;
|
|
|
|
listNode *ln;
|
|
|
|
|
|
|
|
if (listLength(c->watched_keys) == 0) return;
|
|
|
|
listRewind(c->watched_keys,&li);
|
|
|
|
while((ln = listNext(&li))) {
|
|
|
|
list *clients;
|
|
|
|
watchedKey *wk;
|
|
|
|
|
|
|
|
/* Lookup the watched key -> clients list and remove the client
|
|
|
|
* from the list */
|
|
|
|
wk = listNodeValue(ln);
|
|
|
|
clients = dictFetchValue(wk->db->watched_keys, wk->key);
|
2015-07-26 09:29:53 -04:00
|
|
|
serverAssertWithInfo(c,NULL,clients != NULL);
|
2010-06-21 18:07:48 -04:00
|
|
|
listDelNode(clients,listSearchKey(clients,c));
|
|
|
|
/* Kill the entry at all if this was the only client */
|
|
|
|
if (listLength(clients) == 0)
|
|
|
|
dictDelete(wk->db->watched_keys, wk->key);
|
|
|
|
/* Remove this watched key from the client->watched list */
|
|
|
|
listDelNode(c->watched_keys,ln);
|
|
|
|
decrRefCount(wk->key);
|
|
|
|
zfree(wk);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* "Touch" a key, so that if this key is being WATCHed by some client the
|
|
|
|
* next EXEC will fail. */
|
|
|
|
void touchWatchedKey(redisDb *db, robj *key) {
|
|
|
|
list *clients;
|
|
|
|
listIter li;
|
|
|
|
listNode *ln;
|
|
|
|
|
|
|
|
if (dictSize(db->watched_keys) == 0) return;
|
|
|
|
clients = dictFetchValue(db->watched_keys, key);
|
|
|
|
if (!clients) return;
|
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
/* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Check if we are already watching for this key */
|
|
|
|
listRewind(clients,&li);
|
|
|
|
while((ln = listNext(&li))) {
|
2015-07-26 09:20:46 -04:00
|
|
|
client *c = listNodeValue(ln);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
c->flags |= CLIENT_DIRTY_CAS;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* On FLUSHDB or FLUSHALL all the watched keys that are present before the
|
|
|
|
* flush but will be deleted as effect of the flushing operation should
|
|
|
|
* be touched. "dbid" is the DB that's getting the flush. -1 if it is
|
|
|
|
* a FLUSHALL operation (all the DBs flushed). */
|
|
|
|
void touchWatchedKeysOnFlush(int dbid) {
|
|
|
|
listIter li1, li2;
|
|
|
|
listNode *ln;
|
|
|
|
|
|
|
|
/* For every client, check all the waited keys */
|
|
|
|
listRewind(server.clients,&li1);
|
|
|
|
while((ln = listNext(&li1))) {
|
2015-07-26 09:20:46 -04:00
|
|
|
client *c = listNodeValue(ln);
|
2010-06-21 18:07:48 -04:00
|
|
|
listRewind(c->watched_keys,&li2);
|
|
|
|
while((ln = listNext(&li2))) {
|
|
|
|
watchedKey *wk = listNodeValue(ln);
|
|
|
|
|
|
|
|
/* For every watched key matching the specified DB, if the
|
|
|
|
* key exists, mark the client as dirty, as the key will be
|
|
|
|
* removed. */
|
|
|
|
if (dbid == -1 || wk->db->id == dbid) {
|
|
|
|
if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
|
2015-07-27 03:41:48 -04:00
|
|
|
c->flags |= CLIENT_DIRTY_CAS;
|
2010-06-21 18:07:48 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void watchCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
int j;
|
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
if (c->flags & CLIENT_MULTI) {
|
2010-09-02 13:52:24 -04:00
|
|
|
addReplyError(c,"WATCH inside MULTI is not allowed");
|
2010-06-21 18:07:48 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (j = 1; j < c->argc; j++)
|
|
|
|
watchForKey(c,c->argv[j]);
|
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
void unwatchCommand(client *c) {
|
2010-06-21 18:07:48 -04:00
|
|
|
unwatchAllKeys(c);
|
2015-07-27 03:41:48 -04:00
|
|
|
c->flags &= (~CLIENT_DIRTY_CAS);
|
2010-06-21 18:07:48 -04:00
|
|
|
addReply(c,shared.ok);
|
|
|
|
}
|