2012-11-08 12:25:23 -05:00
/* Asynchronous replication implementation.
*
* Copyright ( c ) 2009 - 2012 , Salvatore Sanfilippo < antirez at gmail dot com >
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* * Redistributions of source code must retain the above copyright notice ,
* this list of conditions and the following disclaimer .
* * Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE .
*/
2010-06-21 18:07:48 -04:00
# include "redis.h"
# include <sys/time.h>
# include <unistd.h>
# include <fcntl.h>
2012-09-17 06:45:57 -04:00
# include <sys/socket.h>
2010-06-21 18:07:48 -04:00
# include <sys/stat.h>
2010-11-04 12:29:53 -04:00
/* ---------------------------------- MASTER -------------------------------- */
2010-06-21 18:07:48 -04:00
void replicationFeedSlaves ( list * slaves , int dictid , robj * * argv , int argc ) {
listNode * ln ;
listIter li ;
2011-05-29 20:55:13 -04:00
int j ;
2010-06-21 18:07:48 -04:00
listRewind ( slaves , & li ) ;
while ( ( ln = listNext ( & li ) ) ) {
redisClient * slave = ln - > value ;
/* Don't feed slaves that are still waiting for BGSAVE to start */
if ( slave - > replstate = = REDIS_REPL_WAIT_BGSAVE_START ) continue ;
2011-05-29 20:55:13 -04:00
/* Feed slaves that are waiting for the initial SYNC (so these commands
2013-01-16 12:00:20 -05:00
* are queued in the output buffer until the initial SYNC completes ) ,
2011-05-29 20:55:13 -04:00
* or are already in sync with the master . */
2012-11-02 11:31:28 -04:00
if ( server . slaveseldb ! = dictid ) {
2010-06-21 18:07:48 -04:00
robj * selectcmd ;
2012-03-29 13:06:53 -04:00
if ( dictid > = 0 & & dictid < REDIS_SHARED_SELECT_CMDS ) {
selectcmd = shared . select [ dictid ] ;
2012-03-30 04:39:34 -04:00
incrRefCount ( selectcmd ) ;
2012-03-29 13:06:53 -04:00
} else {
2010-06-21 18:07:48 -04:00
selectcmd = createObject ( REDIS_STRING ,
sdscatprintf ( sdsempty ( ) , " select %d \r \n " , dictid ) ) ;
}
addReply ( slave , selectcmd ) ;
2012-03-29 13:06:53 -04:00
decrRefCount ( selectcmd ) ;
2010-06-21 18:07:48 -04:00
}
2011-05-29 20:55:13 -04:00
addReplyMultiBulkLen ( slave , argc ) ;
for ( j = 0 ; j < argc ; j + + ) addReplyBulk ( slave , argv [ j ] ) ;
2010-06-21 18:07:48 -04:00
}
2012-11-02 11:31:28 -04:00
server . slaveseldb = dictid ;
2010-06-21 18:07:48 -04:00
}
2012-03-07 06:12:15 -05:00
void replicationFeedMonitors ( redisClient * c , list * monitors , int dictid , robj * * argv , int argc ) {
2010-06-21 18:07:48 -04:00
listNode * ln ;
listIter li ;
2012-03-07 06:12:15 -05:00
int j , port ;
2010-06-21 18:07:48 -04:00
sds cmdrepr = sdsnew ( " + " ) ;
robj * cmdobj ;
2012-03-07 06:12:15 -05:00
char ip [ 32 ] ;
2010-06-21 18:07:48 -04:00
struct timeval tv ;
gettimeofday ( & tv , NULL ) ;
2010-12-14 11:39:34 -05:00
cmdrepr = sdscatprintf ( cmdrepr , " %ld.%06ld " , ( long ) tv . tv_sec , ( long ) tv . tv_usec ) ;
2012-03-07 06:12:15 -05:00
if ( c - > flags & REDIS_LUA_CLIENT ) {
2012-11-01 17:10:45 -04:00
cmdrepr = sdscatprintf ( cmdrepr , " [%d lua] " , dictid ) ;
} else if ( c - > flags & REDIS_UNIX_SOCKET ) {
cmdrepr = sdscatprintf ( cmdrepr , " [%d unix:%s] " , dictid , server . unixsocket ) ;
2012-03-07 06:12:15 -05:00
} else {
anetPeerToString ( c - > fd , ip , & port ) ;
2012-11-01 17:10:45 -04:00
cmdrepr = sdscatprintf ( cmdrepr , " [%d %s:%d] " , dictid , ip , port ) ;
2012-03-07 06:12:15 -05:00
}
2010-06-21 18:07:48 -04:00
for ( j = 0 ; j < argc ; j + + ) {
if ( argv [ j ] - > encoding = = REDIS_ENCODING_INT ) {
2010-07-01 14:22:46 -04:00
cmdrepr = sdscatprintf ( cmdrepr , " \" %ld \" " , ( long ) argv [ j ] - > ptr ) ;
2010-06-21 18:07:48 -04:00
} else {
cmdrepr = sdscatrepr ( cmdrepr , ( char * ) argv [ j ] - > ptr ,
sdslen ( argv [ j ] - > ptr ) ) ;
}
if ( j ! = argc - 1 )
cmdrepr = sdscatlen ( cmdrepr , " " , 1 ) ;
}
cmdrepr = sdscatlen ( cmdrepr , " \r \n " , 2 ) ;
cmdobj = createObject ( REDIS_STRING , cmdrepr ) ;
listRewind ( monitors , & li ) ;
while ( ( ln = listNext ( & li ) ) ) {
redisClient * monitor = ln - > value ;
addReply ( monitor , cmdobj ) ;
}
decrRefCount ( cmdobj ) ;
}
void syncCommand ( redisClient * c ) {
2013-01-16 12:00:20 -05:00
/* ignore SYNC if already slave or in monitor mode */
2010-06-21 18:07:48 -04:00
if ( c - > flags & REDIS_SLAVE ) return ;
2010-08-24 10:04:13 -04:00
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok . . . */
2011-12-21 06:23:18 -05:00
if ( server . masterhost & & server . repl_state ! = REDIS_REPL_CONNECTED ) {
2010-09-02 13:52:24 -04:00
addReplyError ( c , " Can't SYNC while not connected with my master " ) ;
2010-08-24 10:04:13 -04:00
return ;
}
2010-06-21 18:07:48 -04:00
/* SYNC can't be issued when the server has pending data to send to
* the client about already issued commands . We need a fresh reply
* buffer registering the differences between the BGSAVE and the current
* dataset , so that we can copy to other slaves if needed . */
if ( listLength ( c - > reply ) ! = 0 ) {
2010-09-02 13:52:24 -04:00
addReplyError ( c , " SYNC is invalid with pending input " ) ;
2010-06-21 18:07:48 -04:00
return ;
}
redisLog ( REDIS_NOTICE , " Slave ask for synchronization " ) ;
/* Here we need to check if there is a background saving operation
* in progress , or if it is required to start one */
2011-12-21 06:22:13 -05:00
if ( server . rdb_child_pid ! = - 1 ) {
2010-06-21 18:07:48 -04:00
/* Ok a background save is in progress. Let's check if it is a good
* one for replication , i . e . if there is another slave that is
* registering differences since the server forked to save */
redisClient * slave ;
listNode * ln ;
listIter li ;
listRewind ( server . slaves , & li ) ;
while ( ( ln = listNext ( & li ) ) ) {
slave = ln - > value ;
if ( slave - > replstate = = REDIS_REPL_WAIT_BGSAVE_END ) break ;
}
if ( ln ) {
/* Perfect, the server is already registering differences for
* another slave . Set the right state , and copy the buffer . */
2011-12-30 13:34:40 -05:00
copyClientOutputBuffer ( c , slave ) ;
2010-06-21 18:07:48 -04:00
c - > replstate = REDIS_REPL_WAIT_BGSAVE_END ;
redisLog ( REDIS_NOTICE , " Waiting for end of BGSAVE for SYNC " ) ;
} else {
/* No way, we need to wait for the next BGSAVE in order to
* register differences */
c - > replstate = REDIS_REPL_WAIT_BGSAVE_START ;
redisLog ( REDIS_NOTICE , " Waiting for next BGSAVE for SYNC " ) ;
}
} else {
/* Ok we don't have a BGSAVE in progress, let's start one */
redisLog ( REDIS_NOTICE , " Starting BGSAVE for SYNC " ) ;
2011-12-21 06:22:13 -05:00
if ( rdbSaveBackground ( server . rdb_filename ) ! = REDIS_OK ) {
2010-06-21 18:07:48 -04:00
redisLog ( REDIS_NOTICE , " Replication failed, can't BGSAVE " ) ;
2010-09-02 13:52:24 -04:00
addReplyError ( c , " Unable to perform background save " ) ;
2010-06-21 18:07:48 -04:00
return ;
}
c - > replstate = REDIS_REPL_WAIT_BGSAVE_END ;
}
2013-01-31 05:14:15 -05:00
if ( server . repl_disable_tcp_nodelay )
anetDisableTcpNoDelay ( NULL , c - > fd ) ; /* Non critical if it fails. */
2010-06-21 18:07:48 -04:00
c - > repldbfd = - 1 ;
c - > flags | = REDIS_SLAVE ;
2012-11-02 11:31:28 -04:00
server . slaveseldb = - 1 ; /* Force to re-emit the SELECT command. */
2010-06-21 18:07:48 -04:00
listAddNodeTail ( server . slaves , c ) ;
return ;
}
2012-06-26 03:47:47 -04:00
/* REPLCONF <option> <value> <option> <value> ...
* This command is used by a slave in order to configure the replication
* process before starting it with the SYNC command .
*
* Currently the only use of this command is to communicate to the master
* what is the listening port of the Slave redis instance , so that the
* master can accurately list slaves and their listening ports in
* the INFO output .
*
* In the future the same command can be used in order to configure
* the replication to initiate an incremental replication instead of a
* full resync . */
void replconfCommand ( redisClient * c ) {
int j ;
if ( ( c - > argc % 2 ) = = 0 ) {
/* Number of arguments must be odd to make sure that every
* option has a corresponding value . */
addReply ( c , shared . syntaxerr ) ;
return ;
}
/* Process every option-value pair. */
for ( j = 1 ; j < c - > argc ; j + = 2 ) {
if ( ! strcasecmp ( c - > argv [ j ] - > ptr , " listening-port " ) ) {
long port ;
if ( ( getLongFromObjectOrReply ( c , c - > argv [ j + 1 ] ,
& port , NULL ) ! = REDIS_OK ) )
return ;
c - > slave_listening_port = port ;
} else {
addReplyErrorFormat ( c , " Unrecognized REPLCONF option: %s " ,
( char * ) c - > argv [ j ] - > ptr ) ;
return ;
}
}
addReply ( c , shared . ok ) ;
}
2010-06-21 18:07:48 -04:00
void sendBulkToSlave ( aeEventLoop * el , int fd , void * privdata , int mask ) {
redisClient * slave = privdata ;
REDIS_NOTUSED ( el ) ;
REDIS_NOTUSED ( mask ) ;
char buf [ REDIS_IOBUF_LEN ] ;
ssize_t nwritten , buflen ;
if ( slave - > repldboff = = 0 ) {
/* Write the bulk write count before to transfer the DB. In theory here
* we don ' t know how much room there is in the output buffer of the
2013-01-16 12:00:20 -05:00
* socket , but in practice SO_SNDLOWAT ( the minimum count for output
2010-06-21 18:07:48 -04:00
* operations ) will never be smaller than the few bytes we need . */
sds bulkcount ;
bulkcount = sdscatprintf ( sdsempty ( ) , " $%lld \r \n " , ( unsigned long long )
slave - > repldbsize ) ;
if ( write ( fd , bulkcount , sdslen ( bulkcount ) ) ! = ( signed ) sdslen ( bulkcount ) )
{
sdsfree ( bulkcount ) ;
freeClient ( slave ) ;
return ;
}
sdsfree ( bulkcount ) ;
}
lseek ( slave - > repldbfd , slave - > repldboff , SEEK_SET ) ;
buflen = read ( slave - > repldbfd , buf , REDIS_IOBUF_LEN ) ;
if ( buflen < = 0 ) {
redisLog ( REDIS_WARNING , " Read error sending DB to slave: %s " ,
( buflen = = 0 ) ? " premature EOF " : strerror ( errno ) ) ;
freeClient ( slave ) ;
return ;
}
if ( ( nwritten = write ( fd , buf , buflen ) ) = = - 1 ) {
redisLog ( REDIS_VERBOSE , " Write error sending DB to slave: %s " ,
strerror ( errno ) ) ;
freeClient ( slave ) ;
return ;
}
slave - > repldboff + = nwritten ;
if ( slave - > repldboff = = slave - > repldbsize ) {
close ( slave - > repldbfd ) ;
slave - > repldbfd = - 1 ;
aeDeleteFileEvent ( server . el , slave - > fd , AE_WRITABLE ) ;
slave - > replstate = REDIS_REPL_ONLINE ;
if ( aeCreateFileEvent ( server . el , slave - > fd , AE_WRITABLE ,
sendReplyToClient , slave ) = = AE_ERR ) {
freeClient ( slave ) ;
return ;
}
redisLog ( REDIS_NOTICE , " Synchronization with slave succeeded " ) ;
}
}
2013-01-16 12:00:20 -05:00
/* This function is called at the end of every background saving.
2010-06-21 18:07:48 -04:00
* The argument bgsaveerr is REDIS_OK if the background saving succeeded
* otherwise REDIS_ERR is passed to the function .
*
* The goal of this function is to handle slaves waiting for a successful
* background saving in order to perform non - blocking synchronization . */
void updateSlavesWaitingBgsave ( int bgsaveerr ) {
listNode * ln ;
int startbgsave = 0 ;
listIter li ;
listRewind ( server . slaves , & li ) ;
while ( ( ln = listNext ( & li ) ) ) {
redisClient * slave = ln - > value ;
if ( slave - > replstate = = REDIS_REPL_WAIT_BGSAVE_START ) {
startbgsave = 1 ;
slave - > replstate = REDIS_REPL_WAIT_BGSAVE_END ;
} else if ( slave - > replstate = = REDIS_REPL_WAIT_BGSAVE_END ) {
struct redis_stat buf ;
if ( bgsaveerr ! = REDIS_OK ) {
freeClient ( slave ) ;
redisLog ( REDIS_WARNING , " SYNC failed. BGSAVE child returned an error " ) ;
continue ;
}
2011-12-21 06:22:13 -05:00
if ( ( slave - > repldbfd = open ( server . rdb_filename , O_RDONLY ) ) = = - 1 | |
2010-06-21 18:07:48 -04:00
redis_fstat ( slave - > repldbfd , & buf ) = = - 1 ) {
freeClient ( slave ) ;
redisLog ( REDIS_WARNING , " SYNC failed. Can't open/stat DB after BGSAVE: %s " , strerror ( errno ) ) ;
continue ;
}
slave - > repldboff = 0 ;
slave - > repldbsize = buf . st_size ;
slave - > replstate = REDIS_REPL_SEND_BULK ;
aeDeleteFileEvent ( server . el , slave - > fd , AE_WRITABLE ) ;
if ( aeCreateFileEvent ( server . el , slave - > fd , AE_WRITABLE , sendBulkToSlave , slave ) = = AE_ERR ) {
freeClient ( slave ) ;
continue ;
}
}
}
if ( startbgsave ) {
2011-12-21 06:22:13 -05:00
if ( rdbSaveBackground ( server . rdb_filename ) ! = REDIS_OK ) {
2010-06-21 18:07:48 -04:00
listIter li ;
listRewind ( server . slaves , & li ) ;
redisLog ( REDIS_WARNING , " SYNC failed. BGSAVE failed " ) ;
while ( ( ln = listNext ( & li ) ) ) {
redisClient * slave = ln - > value ;
if ( slave - > replstate = = REDIS_REPL_WAIT_BGSAVE_START )
freeClient ( slave ) ;
}
}
}
}
2010-11-04 12:29:53 -04:00
/* ----------------------------------- SLAVE -------------------------------- */
/* Abort the async download of the bulk dataset while SYNC-ing with master */
void replicationAbortSyncTransfer ( void ) {
2011-12-21 06:23:18 -05:00
redisAssert ( server . repl_state = = REDIS_REPL_TRANSFER ) ;
2010-11-04 12:29:53 -04:00
aeDeleteFileEvent ( server . el , server . repl_transfer_s , AE_READABLE ) ;
close ( server . repl_transfer_s ) ;
close ( server . repl_transfer_fd ) ;
unlink ( server . repl_transfer_tmpfile ) ;
zfree ( server . repl_transfer_tmpfile ) ;
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_CONNECT ;
2010-11-04 12:29:53 -04:00
}
/* Asynchronously read the SYNC payload we receive from a master */
2012-08-24 13:28:44 -04:00
# define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
2010-11-04 12:29:53 -04:00
void readSyncBulkPayload ( aeEventLoop * el , int fd , void * privdata , int mask ) {
2010-11-04 13:09:35 -04:00
char buf [ 4096 ] ;
2010-11-04 12:35:03 -04:00
ssize_t nread , readlen ;
2012-08-24 13:28:44 -04:00
off_t left ;
2010-11-04 12:35:03 -04:00
REDIS_NOTUSED ( el ) ;
REDIS_NOTUSED ( privdata ) ;
REDIS_NOTUSED ( mask ) ;
2010-11-04 12:29:53 -04:00
2012-08-24 13:28:44 -04:00
/* If repl_transfer_size == -1 we still have to read the bulk length
2010-11-04 13:09:35 -04:00
* from the master reply . */
2012-08-24 13:28:44 -04:00
if ( server . repl_transfer_size = = - 1 ) {
2012-03-31 05:23:30 -04:00
if ( syncReadLine ( fd , buf , 1024 , server . repl_syncio_timeout * 1000 ) = = - 1 ) {
2010-11-04 13:09:35 -04:00
redisLog ( REDIS_WARNING ,
" I/O error reading bulk count from MASTER: %s " ,
strerror ( errno ) ) ;
2011-05-22 06:41:24 -04:00
goto error ;
2010-11-04 13:09:35 -04:00
}
2011-05-22 06:41:24 -04:00
2010-11-04 13:09:35 -04:00
if ( buf [ 0 ] = = ' - ' ) {
redisLog ( REDIS_WARNING ,
" MASTER aborted replication with an error: %s " ,
buf + 1 ) ;
2011-05-22 06:41:24 -04:00
goto error ;
2011-01-20 07:18:23 -05:00
} else if ( buf [ 0 ] = = ' \0 ' ) {
/* At this stage just a newline works as a PING in order to take
* the connection live . So we refresh our last interaction
* timestamp . */
2012-03-27 11:39:58 -04:00
server . repl_transfer_lastio = server . unixtime ;
2011-01-20 07:18:23 -05:00
return ;
2010-11-04 13:09:35 -04:00
} else if ( buf [ 0 ] ! = ' $ ' ) {
redisLog ( REDIS_WARNING , " Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right? " ) ;
2011-05-22 06:41:24 -04:00
goto error ;
2010-11-04 13:09:35 -04:00
}
2012-08-24 13:28:44 -04:00
server . repl_transfer_size = strtol ( buf + 1 , NULL , 10 ) ;
2010-11-04 13:09:35 -04:00
redisLog ( REDIS_NOTICE ,
" MASTER <-> SLAVE sync: receiving %ld bytes from master " ,
2012-08-24 13:28:44 -04:00
server . repl_transfer_size ) ;
2010-11-04 13:09:35 -04:00
return ;
}
/* Read bulk data */
2012-08-24 13:28:44 -04:00
left = server . repl_transfer_size - server . repl_transfer_read ;
readlen = ( left < ( signed ) sizeof ( buf ) ) ? left : ( signed ) sizeof ( buf ) ;
2010-11-04 12:29:53 -04:00
nread = read ( fd , buf , readlen ) ;
if ( nread < = 0 ) {
redisLog ( REDIS_WARNING , " I/O error trying to sync with MASTER: %s " ,
( nread = = - 1 ) ? strerror ( errno ) : " connection lost " ) ;
replicationAbortSyncTransfer ( ) ;
return ;
}
2012-03-27 11:39:58 -04:00
server . repl_transfer_lastio = server . unixtime ;
2010-11-04 12:29:53 -04:00
if ( write ( server . repl_transfer_fd , buf , nread ) ! = nread ) {
2012-04-25 15:21:56 -04:00
redisLog ( REDIS_WARNING , " Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s " , strerror ( errno ) ) ;
2011-05-22 06:41:24 -04:00
goto error ;
2010-11-04 12:29:53 -04:00
}
2012-08-24 13:28:44 -04:00
server . repl_transfer_read + = nread ;
/* Sync data on disk from time to time, otherwise at the end of the transfer
* we may suffer a big delay as the memory buffers are copied into the
* actual disk . */
if ( server . repl_transfer_read > =
server . repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC )
{
off_t sync_size = server . repl_transfer_read -
server . repl_transfer_last_fsync_off ;
rdb_fsync_range ( server . repl_transfer_fd ,
server . repl_transfer_last_fsync_off , sync_size ) ;
server . repl_transfer_last_fsync_off + = sync_size ;
}
2010-11-04 12:29:53 -04:00
/* Check if the transfer is now complete */
2012-08-24 13:28:44 -04:00
if ( server . repl_transfer_read = = server . repl_transfer_size ) {
2011-12-21 06:22:13 -05:00
if ( rename ( server . repl_transfer_tmpfile , server . rdb_filename ) = = - 1 ) {
2010-11-04 12:29:53 -04:00
redisLog ( REDIS_WARNING , " Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s " , strerror ( errno ) ) ;
replicationAbortSyncTransfer ( ) ;
return ;
}
2010-11-04 13:14:20 -04:00
redisLog ( REDIS_NOTICE , " MASTER <-> SLAVE sync: Loading DB in memory " ) ;
2013-02-08 04:26:19 -05:00
signalFlushedDb ( - 1 ) ;
2010-11-04 12:29:53 -04:00
emptyDb ( ) ;
2010-11-12 14:02:20 -05:00
/* Before loading the DB into memory we need to delete the readable
* handler , otherwise it will get called recursively since
* rdbLoad ( ) will call the event loop to process events from time to
* time for non blocking loading . */
aeDeleteFileEvent ( server . el , server . repl_transfer_s , AE_READABLE ) ;
2011-12-21 06:22:13 -05:00
if ( rdbLoad ( server . rdb_filename ) ! = REDIS_OK ) {
2010-11-04 12:29:53 -04:00
redisLog ( REDIS_WARNING , " Failed trying to load the MASTER synchronization DB from disk " ) ;
replicationAbortSyncTransfer ( ) ;
return ;
}
/* Final setup of the connected slave <- master link */
zfree ( server . repl_transfer_tmpfile ) ;
close ( server . repl_transfer_fd ) ;
server . master = createClient ( server . repl_transfer_s ) ;
server . master - > flags | = REDIS_MASTER ;
server . master - > authenticated = 1 ;
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_CONNECTED ;
2010-11-04 13:09:35 -04:00
redisLog ( REDIS_NOTICE , " MASTER <-> SLAVE sync: Finished with success " ) ;
2011-12-15 10:07:49 -05:00
/* Restart the AOF subsystem now that we finished the sync. This
* will trigger an AOF rewrite , and when done will start appending
* to the new file . */
2011-12-21 04:31:34 -05:00
if ( server . aof_state ! = REDIS_AOF_OFF ) {
2011-12-15 10:07:49 -05:00
int retry = 10 ;
stopAppendOnly ( ) ;
while ( retry - - & & startAppendOnly ( ) = = REDIS_ERR ) {
2013-01-16 12:00:20 -05:00
redisLog ( REDIS_WARNING , " Failed enabling the AOF after successful master synchronization! Trying it again in one second. " ) ;
2011-12-15 10:07:49 -05:00
sleep ( 1 ) ;
}
if ( ! retry ) {
redisLog ( REDIS_WARNING , " FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now. " ) ;
exit ( 1 ) ;
}
}
2010-11-04 12:29:53 -04:00
}
2011-05-22 06:41:24 -04:00
return ;
error :
replicationAbortSyncTransfer ( ) ;
return ;
2010-11-04 12:29:53 -04:00
}
2012-06-26 03:47:47 -04:00
/* Send a synchronous command to the master. Used to send AUTH and
2012-06-27 05:26:37 -04:00
* REPLCONF commands before starting the replication with SYNC .
2012-06-26 03:47:47 -04:00
*
* On success NULL is returned .
* On error an sds string describing the error is returned .
*/
char * sendSynchronousCommand ( int fd , . . . ) {
va_list ap ;
sds cmd = sdsempty ( ) ;
char * arg , buf [ 256 ] ;
/* Create the command to send to the master, we use simple inline
* protocol for simplicity as currently we only send simple strings . */
va_start ( ap , fd ) ;
while ( 1 ) {
arg = va_arg ( ap , char * ) ;
if ( arg = = NULL ) break ;
if ( sdslen ( cmd ) ! = 0 ) cmd = sdscatlen ( cmd , " " , 1 ) ;
cmd = sdscat ( cmd , arg ) ;
}
cmd = sdscatlen ( cmd , " \r \n " , 2 ) ;
/* Transfer command to the server. */
if ( syncWrite ( fd , cmd , sdslen ( cmd ) , server . repl_syncio_timeout * 1000 ) = = - 1 ) {
sdsfree ( cmd ) ;
return sdscatprintf ( sdsempty ( ) , " Writing to master: %s " ,
strerror ( errno ) ) ;
}
sdsfree ( cmd ) ;
/* Read the reply from the server. */
if ( syncReadLine ( fd , buf , sizeof ( buf ) , server . repl_syncio_timeout * 1000 ) = = - 1 )
{
return sdscatprintf ( sdsempty ( ) , " Reading from master: %s " ,
strerror ( errno ) ) ;
}
/* Check for errors from the server. */
if ( buf [ 0 ] ! = ' + ' ) {
return sdscatprintf ( sdsempty ( ) , " Error from master: %s " , buf ) ;
}
return NULL ; /* No errors. */
}
2011-05-19 12:53:06 -04:00
void syncWithMaster ( aeEventLoop * el , int fd , void * privdata , int mask ) {
2012-06-26 03:47:47 -04:00
char tmpfile [ 256 ] , * err ;
2010-06-21 18:07:48 -04:00
int dfd , maxtries = 5 ;
2012-08-31 09:32:57 -04:00
int sockerr = 0 ;
socklen_t errlen = sizeof ( sockerr ) ;
2011-05-19 12:53:06 -04:00
REDIS_NOTUSED ( el ) ;
REDIS_NOTUSED ( privdata ) ;
REDIS_NOTUSED ( mask ) ;
2010-06-21 18:07:48 -04:00
2011-10-18 05:09:32 -04:00
/* If this event fired after the user turned the instance into a master
* with SLAVEOF NO ONE we must just return ASAP . */
2011-12-21 06:23:18 -05:00
if ( server . repl_state = = REDIS_REPL_NONE ) {
2011-10-18 05:09:32 -04:00
close ( fd ) ;
return ;
}
2012-08-31 09:32:57 -04:00
/* Check for errors in the socket. */
if ( getsockopt ( fd , SOL_SOCKET , SO_ERROR , & sockerr , & errlen ) = = - 1 )
sockerr = errno ;
if ( sockerr ) {
aeDeleteFileEvent ( server . el , fd , AE_READABLE | AE_WRITABLE ) ;
redisLog ( REDIS_WARNING , " Error condition on socket for SYNC: %s " ,
strerror ( sockerr ) ) ;
goto error ;
}
/* If we were connecting, it's time to send a non blocking PING, we want to
* make sure the master is able to reply before going into the actual
* replication process where we have long timeouts in the order of
* seconds ( in the meantime the slave would block ) . */
if ( server . repl_state = = REDIS_REPL_CONNECTING ) {
redisLog ( REDIS_NOTICE , " Non blocking connect for SYNC fired the event. " ) ;
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply . */
aeDeleteFileEvent ( server . el , fd , AE_WRITABLE ) ;
server . repl_state = REDIS_REPL_RECEIVE_PONG ;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this . */
syncWrite ( fd , " PING \r \n " , 6 , 100 ) ;
return ;
}
/* Receive the PONG command. */
if ( server . repl_state = = REDIS_REPL_RECEIVE_PONG ) {
char buf [ 1024 ] ;
/* Delete the readable event, we no longer need it now that there is
* the PING reply to read . */
aeDeleteFileEvent ( server . el , fd , AE_READABLE ) ;
/* Read the reply with explicit timeout. */
buf [ 0 ] = ' \0 ' ;
if ( syncReadLine ( fd , buf , sizeof ( buf ) ,
server . repl_syncio_timeout * 1000 ) = = - 1 )
{
redisLog ( REDIS_WARNING ,
" I/O error reading PING reply from master: %s " ,
strerror ( errno ) ) ;
goto error ;
}
/* We don't care about the reply, it can be +PONG or an error since
* the server requires AUTH . As long as it replies correctly , it ' s
* fine from our point of view . */
if ( buf [ 0 ] ! = ' - ' & & buf [ 0 ] ! = ' + ' ) {
redisLog ( REDIS_WARNING , " Unexpected reply to PING from master. " ) ;
goto error ;
} else {
redisLog ( REDIS_NOTICE ,
" Master replied to PING, replication can continue... " ) ;
}
}
2010-06-21 18:07:48 -04:00
/* AUTH with the master if required. */
if ( server . masterauth ) {
2012-06-26 03:47:47 -04:00
err = sendSynchronousCommand ( fd , " AUTH " , server . masterauth , NULL ) ;
if ( err ) {
redisLog ( REDIS_WARNING , " Unable to AUTH to MASTER: %s " , err ) ;
sdsfree ( err ) ;
2011-05-19 12:53:06 -04:00
goto error ;
}
2012-06-26 03:47:47 -04:00
}
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly . */
{
sds port = sdsfromlonglong ( server . port ) ;
err = sendSynchronousCommand ( fd , " REPLCONF " , " listening-port " , port ,
NULL ) ;
sdsfree ( port ) ;
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening - port . */
if ( err ) {
redisLog ( REDIS_NOTICE , " (non critical): Master does not understand REPLCONF listening-port: %s " , err ) ;
sdsfree ( err ) ;
2010-06-21 18:07:48 -04:00
}
}
/* Issue the SYNC command */
2012-05-02 15:45:01 -04:00
if ( syncWrite ( fd , " SYNC \r \n " , 6 , server . repl_syncio_timeout * 1000 ) = = - 1 ) {
2010-06-21 18:07:48 -04:00
redisLog ( REDIS_WARNING , " I/O error writing to MASTER: %s " ,
strerror ( errno ) ) ;
2011-05-19 12:53:06 -04:00
goto error ;
2010-06-21 18:07:48 -04:00
}
2010-11-04 13:09:35 -04:00
/* Prepare a suitable temp file for bulk transfer */
2010-06-21 18:07:48 -04:00
while ( maxtries - - ) {
snprintf ( tmpfile , 256 ,
2012-03-27 11:39:58 -04:00
" temp-%d.%ld.rdb " , ( int ) server . unixtime , ( long int ) getpid ( ) ) ;
2010-06-21 18:07:48 -04:00
dfd = open ( tmpfile , O_CREAT | O_WRONLY | O_EXCL , 0644 ) ;
if ( dfd ! = - 1 ) break ;
sleep ( 1 ) ;
}
if ( dfd = = - 1 ) {
redisLog ( REDIS_WARNING , " Opening the temp file needed for MASTER <-> SLAVE synchronization: %s " , strerror ( errno ) ) ;
2011-05-19 12:53:06 -04:00
goto error ;
2010-06-21 18:07:48 -04:00
}
2010-11-04 12:29:53 -04:00
/* Setup the non blocking download of the bulk file. */
2011-05-19 12:53:06 -04:00
if ( aeCreateFileEvent ( server . el , fd , AE_READABLE , readSyncBulkPayload , NULL )
2010-11-04 12:35:03 -04:00
= = AE_ERR )
2010-11-04 12:29:53 -04:00
{
2013-01-03 08:22:55 -05:00
redisLog ( REDIS_WARNING ,
" Can't create readable event for SYNC: %s (fd=%d) " ,
strerror ( errno ) , fd ) ;
2011-05-19 12:53:06 -04:00
goto error ;
2010-06-21 18:07:48 -04:00
}
2011-05-19 12:53:06 -04:00
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_TRANSFER ;
2012-08-24 13:28:44 -04:00
server . repl_transfer_size = - 1 ;
server . repl_transfer_read = 0 ;
server . repl_transfer_last_fsync_off = 0 ;
2010-11-04 12:29:53 -04:00
server . repl_transfer_fd = dfd ;
2012-03-27 11:39:58 -04:00
server . repl_transfer_lastio = server . unixtime ;
2010-11-04 12:29:53 -04:00
server . repl_transfer_tmpfile = zstrdup ( tmpfile ) ;
2011-05-19 12:53:06 -04:00
return ;
error :
close ( fd ) ;
2012-08-31 09:32:57 -04:00
server . repl_transfer_s = - 1 ;
server . repl_state = REDIS_REPL_CONNECT ;
2011-05-19 12:53:06 -04:00
return ;
}
int connectWithMaster ( void ) {
int fd ;
fd = anetTcpNonBlockConnect ( NULL , server . masterhost , server . masterport ) ;
if ( fd = = - 1 ) {
redisLog ( REDIS_WARNING , " Unable to connect to MASTER: %s " ,
strerror ( errno ) ) ;
return REDIS_ERR ;
}
2011-06-09 09:35:07 -04:00
if ( aeCreateFileEvent ( server . el , fd , AE_READABLE | AE_WRITABLE , syncWithMaster , NULL ) = =
2011-05-22 06:41:24 -04:00
AE_ERR )
2011-05-19 12:53:06 -04:00
{
close ( fd ) ;
redisLog ( REDIS_WARNING , " Can't create readable event for SYNC " ) ;
return REDIS_ERR ;
}
2012-03-27 11:39:58 -04:00
server . repl_transfer_lastio = server . unixtime ;
2011-05-19 12:53:06 -04:00
server . repl_transfer_s = fd ;
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_CONNECTING ;
2010-06-21 18:07:48 -04:00
return REDIS_OK ;
}
2011-11-30 09:35:16 -05:00
/* This function can be called when a non blocking connection is currently
* in progress to undo it . */
void undoConnectWithMaster ( void ) {
int fd = server . repl_transfer_s ;
2012-08-31 09:32:57 -04:00
redisAssert ( server . repl_state = = REDIS_REPL_CONNECTING | |
server . repl_state = = REDIS_REPL_RECEIVE_PONG ) ;
2011-11-30 09:35:16 -05:00
aeDeleteFileEvent ( server . el , fd , AE_READABLE | AE_WRITABLE ) ;
close ( fd ) ;
server . repl_transfer_s = - 1 ;
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_CONNECT ;
2011-11-30 09:35:16 -05:00
}
2013-01-14 05:39:54 -05:00
/* This function aborts a non blocking replication attempt if there is one
* in progress , by canceling the non - blocking connect attempt or
* the initial bulk transfer .
*
* If there was a replication handshake in progress 1 is returned and
* the replication state ( server . repl_state ) set to REDIS_REPL_CONNECT .
*
* Otherwise zero is returned and no operation is perforemd at all . */
int cancelReplicationHandshake ( void ) {
if ( server . repl_state = = REDIS_REPL_TRANSFER ) {
replicationAbortSyncTransfer ( ) ;
} else if ( server . repl_state = = REDIS_REPL_CONNECTING | |
server . repl_state = = REDIS_REPL_RECEIVE_PONG )
{
undoConnectWithMaster ( ) ;
} else {
return 0 ;
}
return 1 ;
}
2010-06-21 18:07:48 -04:00
void slaveofCommand ( redisClient * c ) {
if ( ! strcasecmp ( c - > argv [ 1 ] - > ptr , " no " ) & &
! strcasecmp ( c - > argv [ 2 ] - > ptr , " one " ) ) {
if ( server . masterhost ) {
sdsfree ( server . masterhost ) ;
server . masterhost = NULL ;
if ( server . master ) freeClient ( server . master ) ;
2013-01-14 05:39:54 -05:00
cancelReplicationHandshake ( ) ;
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_NONE ;
2010-06-21 18:07:48 -04:00
redisLog ( REDIS_NOTICE , " MASTER MODE enabled (user request) " ) ;
}
} else {
2012-01-16 05:27:22 -05:00
long port ;
if ( ( getLongFromObjectOrReply ( c , c - > argv [ 2 ] , & port , NULL ) ! = REDIS_OK ) )
return ;
/* Check if we are already attached to the specified slave */
if ( server . masterhost & & ! strcasecmp ( server . masterhost , c - > argv [ 1 ] - > ptr )
& & server . masterport = = port ) {
redisLog ( REDIS_NOTICE , " SLAVE OF would result into synchronization with the master we are already connected with. No operation performed. " ) ;
addReplySds ( c , sdsnew ( " +OK Already connected to specified master \r \n " ) ) ;
return ;
}
/* There was no previous master or the user specified a different one,
* we can continue . */
2010-06-21 18:07:48 -04:00
sdsfree ( server . masterhost ) ;
server . masterhost = sdsdup ( c - > argv [ 1 ] - > ptr ) ;
2012-01-16 05:27:22 -05:00
server . masterport = port ;
2010-06-21 18:07:48 -04:00
if ( server . master ) freeClient ( server . master ) ;
2012-03-29 03:24:02 -04:00
disconnectSlaves ( ) ; /* Force our slaves to resync with us as well. */
2013-01-14 05:39:54 -05:00
cancelReplicationHandshake ( ) ;
2011-12-21 06:23:18 -05:00
server . repl_state = REDIS_REPL_CONNECT ;
2010-06-21 18:07:48 -04:00
redisLog ( REDIS_NOTICE , " SLAVE OF %s:%d enabled (user request) " ,
server . masterhost , server . masterport ) ;
}
addReply ( c , shared . ok ) ;
}
2010-11-04 12:29:53 -04:00
/* --------------------------- REPLICATION CRON ---------------------------- */
void replicationCron ( void ) {
2011-11-30 09:35:16 -05:00
/* Non blocking connection timeout? */
2012-08-31 09:32:57 -04:00
if ( server . masterhost & &
( server . repl_state = = REDIS_REPL_CONNECTING | |
server . repl_state = = REDIS_REPL_RECEIVE_PONG ) & &
2011-11-30 09:35:16 -05:00
( time ( NULL ) - server . repl_transfer_lastio ) > server . repl_timeout )
{
redisLog ( REDIS_WARNING , " Timeout connecting to the MASTER... " ) ;
undoConnectWithMaster ( ) ;
}
2010-11-04 12:29:53 -04:00
/* Bulk transfer I/O timeout? */
2011-12-21 06:23:18 -05:00
if ( server . masterhost & & server . repl_state = = REDIS_REPL_TRANSFER & &
2011-10-31 06:13:28 -04:00
( time ( NULL ) - server . repl_transfer_lastio ) > server . repl_timeout )
2010-11-04 12:29:53 -04:00
{
2012-10-04 05:49:17 -04:00
redisLog ( REDIS_WARNING , " Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value. " ) ;
2010-11-04 12:29:53 -04:00
replicationAbortSyncTransfer ( ) ;
}
2011-01-20 07:18:23 -05:00
/* Timed out master when we are an already connected slave? */
2011-12-21 06:23:18 -05:00
if ( server . masterhost & & server . repl_state = = REDIS_REPL_CONNECTED & &
2011-10-31 06:13:28 -04:00
( time ( NULL ) - server . master - > lastinteraction ) > server . repl_timeout )
2011-01-20 07:18:23 -05:00
{
redisLog ( REDIS_WARNING , " MASTER time out: no data nor PING received... " ) ;
freeClient ( server . master ) ;
}
2010-11-04 12:29:53 -04:00
/* Check if we should connect to a MASTER */
2011-12-21 06:23:18 -05:00
if ( server . repl_state = = REDIS_REPL_CONNECT ) {
2010-11-04 12:29:53 -04:00
redisLog ( REDIS_NOTICE , " Connecting to MASTER... " ) ;
2011-05-19 12:53:06 -04:00
if ( connectWithMaster ( ) = = REDIS_OK ) {
redisLog ( REDIS_NOTICE , " MASTER <-> SLAVE sync started " ) ;
2010-11-04 12:29:53 -04:00
}
}
2011-01-20 07:18:23 -05:00
/* If we have attached slaves, PING them from time to time.
* So slaves can implement an explicit timeout to masters , and will
* be able to detect a link disconnection even if the TCP connection
* will not actually go down . */
2012-12-14 11:10:40 -05:00
if ( ! ( server . cronloops % ( server . repl_ping_slave_period * server . hz ) ) ) {
2011-01-20 07:18:23 -05:00
listIter li ;
listNode * ln ;
listRewind ( server . slaves , & li ) ;
while ( ( ln = listNext ( & li ) ) ) {
redisClient * slave = ln - > value ;
/* Don't ping slaves that are in the middle of a bulk transfer
* with the master for first synchronization . */
if ( slave - > replstate = = REDIS_REPL_SEND_BULK ) continue ;
if ( slave - > replstate = = REDIS_REPL_ONLINE ) {
/* If the slave is online send a normal ping */
2012-02-29 10:33:54 -05:00
addReplySds ( slave , sdsnew ( " *1 \r \n $4 \r \n PING \r \n " ) ) ;
2011-01-20 07:18:23 -05:00
} else {
/* Otherwise we are in the pre-synchronization stage.
* Just a newline will do the work of refreshing the
* connection last interaction time , and at the same time
* we ' ll be sure that being a single char there are no
* short - write problems . */
2011-02-21 11:50:54 -05:00
if ( write ( slave - > fd , " \n " , 1 ) = = - 1 ) {
/* Don't worry, it's just a ping. */
}
2011-01-20 07:18:23 -05:00
}
}
}
2010-11-04 12:29:53 -04:00
}