2010-06-21 18:07:48 -04:00
# include "redis.h"
2011-09-13 12:27:08 -04:00
# include "bio.h"
2011-09-22 09:15:26 -04:00
# include "rio.h"
2010-06-21 18:07:48 -04:00
# include <signal.h>
# include <fcntl.h>
# include <sys/stat.h>
2010-07-01 15:13:38 -04:00
# include <sys/types.h>
# include <sys/time.h>
# include <sys/resource.h>
# include <sys/wait.h>
2010-06-21 18:07:48 -04:00
2011-06-10 06:39:23 -04:00
void aofUpdateCurrentSize ( void ) ;
2011-09-16 05:08:39 -04:00
void aof_background_fsync ( int fd ) {
2011-09-16 06:11:48 -04:00
bioCreateBackgroundJob ( REDIS_BIO_AOF_FSYNC , ( void * ) ( long ) fd , NULL , NULL ) ;
2011-09-16 05:08:39 -04:00
}
2010-06-21 18:07:48 -04:00
/* Called when the user switches from "appendonly yes" to "appendonly no"
* at runtime using the CONFIG command . */
void stopAppendOnly ( void ) {
2011-12-21 04:31:34 -05:00
redisAssert ( server . aof_state ! = REDIS_AOF_OFF ) ;
2011-09-16 06:35:12 -04:00
flushAppendOnlyFile ( 1 ) ;
2011-12-21 06:17:02 -05:00
aof_fsync ( server . aof_fd ) ;
close ( server . aof_fd ) ;
2010-06-21 18:07:48 -04:00
2011-12-21 06:17:02 -05:00
server . aof_fd = - 1 ;
server . aof_selected_db = - 1 ;
2011-12-21 04:31:34 -05:00
server . aof_state = REDIS_AOF_OFF ;
2010-06-21 18:07:48 -04:00
/* rewrite operation in progress? kill it, wait child exit */
2011-12-21 06:17:02 -05:00
if ( server . aof_child_pid ! = - 1 ) {
2010-06-21 18:07:48 -04:00
int statloc ;
2011-12-21 10:37:22 -05:00
redisLog ( REDIS_NOTICE , " Killing running AOF rewrite child: %ld " ,
( long ) server . aof_child_pid ) ;
2011-12-21 06:17:02 -05:00
if ( kill ( server . aof_child_pid , SIGKILL ) ! = - 1 )
2010-06-21 18:07:48 -04:00
wait3 ( & statloc , 0 , NULL ) ;
/* reset the buffer accumulating changes while the child saves */
2011-12-21 06:17:02 -05:00
sdsfree ( server . aof_rewrite_buf ) ;
server . aof_rewrite_buf = sdsempty ( ) ;
aofRemoveTempFile ( server . aof_child_pid ) ;
server . aof_child_pid = - 1 ;
2010-06-21 18:07:48 -04:00
}
}
/* Called when the user switches from "appendonly no" to "appendonly yes"
* at runtime using the CONFIG command . */
int startAppendOnly ( void ) {
2011-12-21 06:17:02 -05:00
server . aof_last_fsync = time ( NULL ) ;
server . aof_fd = open ( server . aof_filename , O_WRONLY | O_APPEND | O_CREAT , 0644 ) ;
2011-12-21 04:31:34 -05:00
redisAssert ( server . aof_state = = REDIS_AOF_OFF ) ;
2011-12-21 06:17:02 -05:00
if ( server . aof_fd = = - 1 ) {
2011-12-15 10:07:49 -05:00
redisLog ( REDIS_WARNING , " Redis needs to enable the AOF but can't open the append only file: %s " , strerror ( errno ) ) ;
2010-06-21 18:07:48 -04:00
return REDIS_ERR ;
}
if ( rewriteAppendOnlyFileBackground ( ) = = REDIS_ERR ) {
2011-12-21 06:17:02 -05:00
close ( server . aof_fd ) ;
2011-12-15 10:07:49 -05:00
redisLog ( REDIS_WARNING , " Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error. " ) ;
2010-06-21 18:07:48 -04:00
return REDIS_ERR ;
}
2011-12-15 10:07:49 -05:00
/* We correctly switched on AOF, now wait for the rerwite to be complete
* in order to append data on disk . */
2011-12-21 04:31:34 -05:00
server . aof_state = REDIS_AOF_WAIT_REWRITE ;
2010-06-21 18:07:48 -04:00
return REDIS_OK ;
}
/* Write the append only file buffer on disk.
*
* Since we are required to write the AOF before replying to the client ,
* and the only way the client socket can get a write is entering when the
* the event loop , we accumulate all the AOF writes in a memory
* buffer and write it on disk using this function just before entering
2011-09-16 06:35:12 -04:00
* the event loop again .
*
* About the ' force ' argument :
*
* When the fsync policy is set to ' everysec ' we may delay the flush if there
* is still an fsync ( ) going on in the background thread , since for instance
* on Linux write ( 2 ) will be blocked by the background fsync anyway .
* When this happens we remember that there is some aof buffer to be
* flushed ASAP , and will try to do that in the serverCron ( ) function .
*
* However if force is set to 1 we ' ll write regardless of the background
* fsync . */
void flushAppendOnlyFile ( int force ) {
2010-06-21 18:07:48 -04:00
ssize_t nwritten ;
2011-09-16 06:35:12 -04:00
int sync_in_progress = 0 ;
2010-06-21 18:07:48 -04:00
2011-12-21 06:17:02 -05:00
if ( sdslen ( server . aof_buf ) = = 0 ) return ;
2010-06-21 18:07:48 -04:00
2011-12-21 05:58:42 -05:00
if ( server . aof_fsync = = AOF_FSYNC_EVERYSEC )
2011-09-16 06:35:12 -04:00
sync_in_progress = bioPendingJobsOfType ( REDIS_BIO_AOF_FSYNC ) ! = 0 ;
2011-12-21 05:58:42 -05:00
if ( server . aof_fsync = = AOF_FSYNC_EVERYSEC & & ! force ) {
2011-09-16 06:35:12 -04:00
/* With this append fsync policy we do background fsyncing.
* If the fsync is still in progress we can try to delay
* the write for a couple of seconds . */
if ( sync_in_progress ) {
if ( server . aof_flush_postponed_start = = 0 ) {
/* No previous write postponinig, remember that we are
* postponing the flush and return . */
server . aof_flush_postponed_start = server . unixtime ;
return ;
} else if ( server . unixtime - server . aof_flush_postponed_start < 2 ) {
2011-09-19 11:49:50 -04:00
/* We were already waiting for fsync to finish, but for less
2011-09-16 06:35:12 -04:00
* than two seconds this is still ok . Postpone again . */
return ;
}
/* Otherwise fall trough, and go write since we can't wait
* over two seconds . */
2011-09-19 10:52:13 -04:00
redisLog ( REDIS_NOTICE , " Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis. " ) ;
2011-09-16 06:35:12 -04:00
}
}
/* If you are following this code path, then we are going to write so
* set reset the postponed flush sentinel to zero . */
server . aof_flush_postponed_start = 0 ;
2010-06-21 18:07:48 -04:00
/* We want to perform a single write. This should be guaranteed atomic
* at least if the filesystem we are writing is a real physical one .
* While this will save us against the server being killed I don ' t think
* there is much to do about the whole server stopping for power problems
* or alike */
2011-12-21 06:17:02 -05:00
nwritten = write ( server . aof_fd , server . aof_buf , sdslen ( server . aof_buf ) ) ;
if ( nwritten ! = ( signed ) sdslen ( server . aof_buf ) ) {
2010-06-21 18:07:48 -04:00
/* Ooops, we are in troubles. The best thing to do for now is
* aborting instead of giving the illusion that everything is
* working as expected . */
2011-08-18 06:27:34 -04:00
if ( nwritten = = - 1 ) {
2010-06-21 18:07:48 -04:00
redisLog ( REDIS_WARNING , " Exiting on error writing to the append-only file: %s " , strerror ( errno ) ) ;
2011-08-18 06:27:34 -04:00
} else {
2011-12-21 11:12:23 -05:00
redisLog ( REDIS_WARNING , " Exiting on short write while writing to "
" the append-only file: %s (nwritten=%ld, "
" expected=%ld) " ,
strerror ( errno ) ,
( long ) nwritten ,
( long ) sdslen ( server . aof_buf ) ) ;
2011-08-18 06:27:34 -04:00
}
exit ( 1 ) ;
2010-06-21 18:07:48 -04:00
}
2011-12-21 05:58:42 -05:00
server . aof_current_size + = nwritten ;
2010-06-21 18:07:48 -04:00
2011-08-18 06:44:30 -04:00
/* Re-use AOF buffer when it is small enough. The maximum comes from the
* arena size of 4 k minus some overhead ( but is otherwise arbitrary ) . */
2011-12-21 06:17:02 -05:00
if ( ( sdslen ( server . aof_buf ) + sdsavail ( server . aof_buf ) ) < 4000 ) {
sdsclear ( server . aof_buf ) ;
2011-08-18 06:44:30 -04:00
} else {
2011-12-21 06:17:02 -05:00
sdsfree ( server . aof_buf ) ;
server . aof_buf = sdsempty ( ) ;
2011-08-18 06:44:30 -04:00
}
2011-08-18 06:25:59 -04:00
/* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
* children doing I / O in the background . */
2011-12-21 05:58:42 -05:00
if ( server . aof_no_fsync_on_rewrite & &
2011-12-21 06:22:13 -05:00
( server . aof_child_pid ! = - 1 | | server . rdb_child_pid ! = - 1 ) )
2010-06-21 18:07:48 -04:00
return ;
2011-08-18 06:25:59 -04:00
/* Perform the fsync if needed. */
2011-12-21 05:58:42 -05:00
if ( server . aof_fsync = = AOF_FSYNC_ALWAYS ) {
2010-06-21 18:07:48 -04:00
/* aof_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata . */
2011-12-21 06:17:02 -05:00
aof_fsync ( server . aof_fd ) ; /* Let's try to get this data on the disk */
server . aof_last_fsync = server . unixtime ;
2011-12-21 05:58:42 -05:00
} else if ( ( server . aof_fsync = = AOF_FSYNC_EVERYSEC & &
2011-12-21 06:17:02 -05:00
server . unixtime > server . aof_last_fsync ) ) {
if ( ! sync_in_progress ) aof_background_fsync ( server . aof_fd ) ;
server . aof_last_fsync = server . unixtime ;
2010-06-21 18:07:48 -04:00
}
}
2011-08-18 07:03:04 -04:00
sds catAppendOnlyGenericCommand ( sds dst , int argc , robj * * argv ) {
char buf [ 32 ] ;
int len , j ;
robj * o ;
buf [ 0 ] = ' * ' ;
len = 1 + ll2string ( buf + 1 , sizeof ( buf ) - 1 , argc ) ;
buf [ len + + ] = ' \r ' ;
buf [ len + + ] = ' \n ' ;
dst = sdscatlen ( dst , buf , len ) ;
2010-06-21 18:07:48 -04:00
for ( j = 0 ; j < argc ; j + + ) {
2011-08-18 07:03:04 -04:00
o = getDecodedObject ( argv [ j ] ) ;
buf [ 0 ] = ' $ ' ;
len = 1 + ll2string ( buf + 1 , sizeof ( buf ) - 1 , sdslen ( o - > ptr ) ) ;
buf [ len + + ] = ' \r ' ;
buf [ len + + ] = ' \n ' ;
dst = sdscatlen ( dst , buf , len ) ;
dst = sdscatlen ( dst , o - > ptr , sdslen ( o - > ptr ) ) ;
dst = sdscatlen ( dst , " \r \n " , 2 ) ;
2010-06-21 18:07:48 -04:00
decrRefCount ( o ) ;
}
2011-08-18 07:03:04 -04:00
return dst ;
2010-06-21 18:07:48 -04:00
}
2011-11-10 11:52:02 -05:00
/* Create the sds representation of an PEXPIREAT command, using
* ' seconds ' as time to live and ' cmd ' to understand what command
* we are translating into a PEXPIREAT .
*
* This command is used in order to translate EXPIRE and PEXPIRE commands
* into PEXPIREAT command so that we retain precision in the append only
* file , and the time is always absolute and not relative . */
sds catAppendOnlyExpireAtCommand ( sds buf , struct redisCommand * cmd , robj * key , robj * seconds ) {
long long when ;
2010-06-21 18:07:48 -04:00
robj * argv [ 3 ] ;
/* Make sure we can use strtol */
seconds = getDecodedObject ( seconds ) ;
2011-11-10 11:52:02 -05:00
when = strtoll ( seconds - > ptr , NULL , 10 ) ;
/* Convert argument into milliseconds for EXPIRE, SETEX, EXPIREAT */
if ( cmd - > proc = = expireCommand | | cmd - > proc = = setexCommand | |
cmd - > proc = = expireatCommand )
{
when * = 1000 ;
}
/* Convert into absolute time for EXPIRE, PEXPIRE, SETEX, PSETEX */
if ( cmd - > proc = = expireCommand | | cmd - > proc = = pexpireCommand | |
cmd - > proc = = setexCommand | | cmd - > proc = = psetexCommand )
{
when + = mstime ( ) ;
}
2010-06-21 18:07:48 -04:00
decrRefCount ( seconds ) ;
2011-11-10 11:52:02 -05:00
argv [ 0 ] = createStringObject ( " PEXPIREAT " , 9 ) ;
2010-06-21 18:07:48 -04:00
argv [ 1 ] = key ;
2011-11-10 11:52:02 -05:00
argv [ 2 ] = createStringObjectFromLongLong ( when ) ;
buf = catAppendOnlyGenericCommand ( buf , 3 , argv ) ;
2010-06-21 18:07:48 -04:00
decrRefCount ( argv [ 0 ] ) ;
decrRefCount ( argv [ 2 ] ) ;
return buf ;
}
void feedAppendOnlyFile ( struct redisCommand * cmd , int dictid , robj * * argv , int argc ) {
2011-12-15 14:03:28 -05:00
sds buf = sdsempty ( ) ;
2010-06-21 18:07:48 -04:00
robj * tmpargv [ 3 ] ;
/* The DB this command was targetting is not the same as the last command
* we appendend . To issue a SELECT command is needed . */
2011-12-21 06:17:02 -05:00
if ( dictid ! = server . aof_selected_db ) {
2010-06-21 18:07:48 -04:00
char seldb [ 64 ] ;
snprintf ( seldb , sizeof ( seldb ) , " %d " , dictid ) ;
buf = sdscatprintf ( buf , " *2 \r \n $6 \r \n SELECT \r \n $%lu \r \n %s \r \n " ,
( unsigned long ) strlen ( seldb ) , seldb ) ;
2011-12-21 06:17:02 -05:00
server . aof_selected_db = dictid ;
2010-06-21 18:07:48 -04:00
}
2011-11-10 11:52:02 -05:00
if ( cmd - > proc = = expireCommand | | cmd - > proc = = pexpireCommand | |
cmd - > proc = = expireatCommand ) {
/* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */
buf = catAppendOnlyExpireAtCommand ( buf , cmd , argv [ 1 ] , argv [ 2 ] ) ;
} else if ( cmd - > proc = = setexCommand | | cmd - > proc = = psetexCommand ) {
/* Translate SETEX/PSETEX to SET and PEXPIREAT */
2010-06-21 18:07:48 -04:00
tmpargv [ 0 ] = createStringObject ( " SET " , 3 ) ;
tmpargv [ 1 ] = argv [ 1 ] ;
tmpargv [ 2 ] = argv [ 3 ] ;
buf = catAppendOnlyGenericCommand ( buf , 3 , tmpargv ) ;
decrRefCount ( tmpargv [ 0 ] ) ;
2011-11-10 11:52:02 -05:00
buf = catAppendOnlyExpireAtCommand ( buf , cmd , argv [ 1 ] , argv [ 2 ] ) ;
2010-06-21 18:07:48 -04:00
} else {
2011-11-10 11:52:02 -05:00
/* All the other commands don't need translation or need the
* same translation already operated in the command vector
* for the replication itself . */
2010-06-21 18:07:48 -04:00
buf = catAppendOnlyGenericCommand ( buf , argc , argv ) ;
}
/* Append to the AOF buffer. This will be flushed on disk just before
* of re - entering the event loop , so before the client will get a
2011-12-21 04:31:34 -05:00
* positive reply about the operation performed . */
if ( server . aof_state = = REDIS_AOF_ON )
2011-12-21 06:17:02 -05:00
server . aof_buf = sdscatlen ( server . aof_buf , buf , sdslen ( buf ) ) ;
2010-06-21 18:07:48 -04:00
/* If a background append only file rewriting is in progress we want to
* accumulate the differences between the child DB and the current one
* in a buffer , so that when the child process will do its work we
* can append the differences to the new append only file . */
2011-12-21 06:17:02 -05:00
if ( server . aof_child_pid ! = - 1 )
server . aof_rewrite_buf = sdscatlen ( server . aof_rewrite_buf , buf , sdslen ( buf ) ) ;
2010-06-21 18:07:48 -04:00
sdsfree ( buf ) ;
}
/* In Redis commands are always executed in the context of a client, so in
* order to load the append only file we need to create a fake client . */
struct redisClient * createFakeClient ( void ) {
struct redisClient * c = zmalloc ( sizeof ( * c ) ) ;
selectDb ( c , 0 ) ;
c - > fd = - 1 ;
c - > querybuf = sdsempty ( ) ;
c - > argc = 0 ;
c - > argv = NULL ;
2010-09-02 08:17:53 -04:00
c - > bufpos = 0 ;
2010-06-21 18:07:48 -04:00
c - > flags = 0 ;
/* We set the fake client as a slave waiting for the synchronization
* so that Redis will not try to send replies to this client . */
c - > replstate = REDIS_REPL_WAIT_BGSAVE_START ;
c - > reply = listCreate ( ) ;
2011-12-25 10:32:54 -05:00
c - > reply_bytes = 0 ;
2012-01-23 10:12:37 -05:00
c - > obuf_soft_limit_reached_time = 0 ;
2010-07-05 14:06:54 -04:00
c - > watched_keys = listCreate ( ) ;
2010-06-21 18:07:48 -04:00
listSetFreeMethod ( c - > reply , decrRefCount ) ;
listSetDupMethod ( c - > reply , dupClientReplyValue ) ;
initClientMultiState ( c ) ;
return c ;
}
void freeFakeClient ( struct redisClient * c ) {
sdsfree ( c - > querybuf ) ;
listRelease ( c - > reply ) ;
2010-07-05 14:06:54 -04:00
listRelease ( c - > watched_keys ) ;
2010-06-21 18:07:48 -04:00
freeClientMultiState ( c ) ;
zfree ( c ) ;
}
/* Replay the append log file. On error REDIS_OK is returned. On non fatal
* error ( the append only file is zero - length ) REDIS_ERR is returned . On
* fatal error an error message is logged and the program exists . */
int loadAppendOnlyFile ( char * filename ) {
struct redisClient * fakeClient ;
FILE * fp = fopen ( filename , " r " ) ;
struct redis_stat sb ;
2011-12-21 04:31:34 -05:00
int old_aof_state = server . aof_state ;
2010-11-08 05:52:03 -05:00
long loops = 0 ;
2010-06-21 18:07:48 -04:00
2011-03-04 10:13:54 -05:00
if ( fp & & redis_fstat ( fileno ( fp ) , & sb ) ! = - 1 & & sb . st_size = = 0 ) {
2011-12-21 05:58:42 -05:00
server . aof_current_size = 0 ;
2011-03-04 10:13:54 -05:00
fclose ( fp ) ;
2010-06-21 18:07:48 -04:00
return REDIS_ERR ;
2011-03-04 10:13:54 -05:00
}
2010-06-21 18:07:48 -04:00
if ( fp = = NULL ) {
redisLog ( REDIS_WARNING , " Fatal error: can't open the append log file for reading: %s " , strerror ( errno ) ) ;
exit ( 1 ) ;
}
/* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
* to the same file we ' re about to read . */
2011-12-21 04:31:34 -05:00
server . aof_state = REDIS_AOF_OFF ;
2010-06-21 18:07:48 -04:00
fakeClient = createFakeClient ( ) ;
2010-11-08 05:52:03 -05:00
startLoading ( fp ) ;
2010-06-21 18:07:48 -04:00
while ( 1 ) {
int argc , j ;
unsigned long len ;
robj * * argv ;
char buf [ 128 ] ;
sds argsds ;
struct redisCommand * cmd ;
2010-11-08 05:52:03 -05:00
/* Serve the clients from time to time */
if ( ! ( loops + + % 1000 ) ) {
loadingProgress ( ftello ( fp ) ) ;
aeProcessEvents ( server . el , AE_FILE_EVENTS | AE_DONT_WAIT ) ;
}
2010-06-21 18:07:48 -04:00
if ( fgets ( buf , sizeof ( buf ) , fp ) = = NULL ) {
if ( feof ( fp ) )
break ;
else
goto readerr ;
}
if ( buf [ 0 ] ! = ' * ' ) goto fmterr ;
argc = atoi ( buf + 1 ) ;
2011-08-02 09:05:04 -04:00
if ( argc < 1 ) goto fmterr ;
2010-06-21 18:07:48 -04:00
argv = zmalloc ( sizeof ( robj * ) * argc ) ;
for ( j = 0 ; j < argc ; j + + ) {
if ( fgets ( buf , sizeof ( buf ) , fp ) = = NULL ) goto readerr ;
if ( buf [ 0 ] ! = ' $ ' ) goto fmterr ;
len = strtol ( buf + 1 , NULL , 10 ) ;
argsds = sdsnewlen ( NULL , len ) ;
if ( len & & fread ( argsds , len , 1 , fp ) = = 0 ) goto fmterr ;
argv [ j ] = createObject ( REDIS_STRING , argsds ) ;
if ( fread ( buf , 2 , 1 , fp ) = = 0 ) goto fmterr ; /* discard CRLF */
}
/* Command lookup */
cmd = lookupCommand ( argv [ 0 ] - > ptr ) ;
if ( ! cmd ) {
redisLog ( REDIS_WARNING , " Unknown command '%s' reading the append only file " , argv [ 0 ] - > ptr ) ;
exit ( 1 ) ;
}
/* Run the command in the context of a fake client */
fakeClient - > argc = argc ;
fakeClient - > argv = argv ;
cmd - > proc ( fakeClient ) ;
2010-08-30 10:51:39 -04:00
/* The fake client should not have a reply */
redisAssert ( fakeClient - > bufpos = = 0 & & listLength ( fakeClient - > reply ) = = 0 ) ;
2011-06-29 10:10:28 -04:00
/* The fake client should never get blocked */
redisAssert ( ( fakeClient - > flags & REDIS_BLOCKED ) = = 0 ) ;
2010-08-30 10:51:39 -04:00
2011-04-22 03:44:06 -04:00
/* Clean up. Command code may have changed argv/argc so we use the
* argv / argc of the client instead of the local variables . */
for ( j = 0 ; j < fakeClient - > argc ; j + + )
decrRefCount ( fakeClient - > argv [ j ] ) ;
zfree ( fakeClient - > argv ) ;
2010-06-21 18:07:48 -04:00
}
/* This point can only be reached when EOF is reached without errors.
* If the client is in the middle of a MULTI / EXEC , log error and quit . */
if ( fakeClient - > flags & REDIS_MULTI ) goto readerr ;
fclose ( fp ) ;
freeFakeClient ( fakeClient ) ;
2011-12-21 04:31:34 -05:00
server . aof_state = old_aof_state ;
2010-11-08 05:52:03 -05:00
stopLoading ( ) ;
2011-06-10 06:39:23 -04:00
aofUpdateCurrentSize ( ) ;
2011-12-21 05:58:42 -05:00
server . aof_rewrite_base_size = server . aof_current_size ;
2010-06-21 18:07:48 -04:00
return REDIS_OK ;
readerr :
if ( feof ( fp ) ) {
redisLog ( REDIS_WARNING , " Unexpected end of file reading the append only file " ) ;
} else {
redisLog ( REDIS_WARNING , " Unrecoverable error reading the append only file: %s " , strerror ( errno ) ) ;
}
exit ( 1 ) ;
fmterr :
2010-09-14 09:18:18 -04:00
redisLog ( REDIS_WARNING , " Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename> " ) ;
2010-06-21 18:07:48 -04:00
exit ( 1 ) ;
}
2011-05-14 06:36:22 -04:00
/* Delegate writing an object to writing a bulk string or bulk long long.
* This is not placed in rio . c since that adds the redis . h dependency . */
int rioWriteBulkObject ( rio * r , robj * obj ) {
/* Avoid using getDecodedObject to help copy-on-write (we are often
* in a child process when this function is called ) . */
if ( obj - > encoding = = REDIS_ENCODING_INT ) {
return rioWriteBulkLongLong ( r , ( long ) obj - > ptr ) ;
} else if ( obj - > encoding = = REDIS_ENCODING_RAW ) {
return rioWriteBulkString ( r , obj - > ptr , sdslen ( obj - > ptr ) ) ;
} else {
redisPanic ( " Unknown string encoding " ) ;
}
}
2011-12-06 12:22:52 -05:00
/* Emit the commands needed to rebuild a list object.
* The function returns 0 on error , 1 on success . */
int rewriteListObject ( rio * r , robj * key , robj * o ) {
long long count = 0 , items = listTypeLength ( o ) ;
if ( o - > encoding = = REDIS_ENCODING_ZIPLIST ) {
unsigned char * zl = o - > ptr ;
unsigned char * p = ziplistIndex ( zl , 0 ) ;
unsigned char * vstr ;
unsigned int vlen ;
long long vlong ;
while ( ziplistGet ( p , & vstr , & vlen , & vlong ) ) {
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 09:57:51 -05:00
2011-12-06 12:22:52 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " RPUSH " , 5 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( vstr ) {
if ( rioWriteBulkString ( r , ( char * ) vstr , vlen ) = = 0 ) return 0 ;
} else {
if ( rioWriteBulkLongLong ( r , vlong ) = = 0 ) return 0 ;
}
p = ziplistNext ( zl , p ) ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-06 12:22:52 -05:00
items - - ;
}
} else if ( o - > encoding = = REDIS_ENCODING_LINKEDLIST ) {
list * list = o - > ptr ;
listNode * ln ;
listIter li ;
listRewind ( list , & li ) ;
while ( ( ln = listNext ( & li ) ) ) {
robj * eleobj = listNodeValue ( ln ) ;
2011-12-07 05:34:25 -05:00
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 09:57:51 -05:00
2011-12-07 05:34:25 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " RPUSH " , 5 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
2011-12-06 12:22:52 -05:00
if ( rioWriteBulkObject ( r , eleobj ) = = 0 ) return 0 ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-07 05:34:25 -05:00
items - - ;
2011-12-06 12:22:52 -05:00
}
} else {
redisPanic ( " Unknown list encoding " ) ;
}
return 1 ;
}
2011-12-12 09:57:51 -05:00
/* Emit the commands needed to rebuild a set object.
* The function returns 0 on error , 1 on success . */
int rewriteSetObject ( rio * r , robj * key , robj * o ) {
long long count = 0 , items = setTypeSize ( o ) ;
if ( o - > encoding = = REDIS_ENCODING_INTSET ) {
int ii = 0 ;
int64_t llval ;
while ( intsetGet ( o - > ptr , ii + + , & llval ) ) {
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 09:57:51 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " SADD " , 4 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( rioWriteBulkLongLong ( r , llval ) = = 0 ) return 0 ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-12 09:57:51 -05:00
items - - ;
}
} else if ( o - > encoding = = REDIS_ENCODING_HT ) {
dictIterator * di = dictGetIterator ( o - > ptr ) ;
dictEntry * de ;
while ( ( de = dictNext ( di ) ) ! = NULL ) {
robj * eleobj = dictGetKey ( de ) ;
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 09:57:51 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " SADD " , 4 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( rioWriteBulkObject ( r , eleobj ) = = 0 ) return 0 ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-12 09:57:51 -05:00
items - - ;
}
dictReleaseIterator ( di ) ;
} else {
redisPanic ( " Unknown set encoding " ) ;
}
return 1 ;
}
2011-12-12 11:27:39 -05:00
/* Emit the commands needed to rebuild a sorted set object.
* The function returns 0 on error , 1 on success . */
int rewriteSortedSetObject ( rio * r , robj * key , robj * o ) {
long long count = 0 , items = zsetLength ( o ) ;
if ( o - > encoding = = REDIS_ENCODING_ZIPLIST ) {
unsigned char * zl = o - > ptr ;
unsigned char * eptr , * sptr ;
unsigned char * vstr ;
unsigned int vlen ;
long long vll ;
double score ;
eptr = ziplistIndex ( zl , 0 ) ;
redisAssert ( eptr ! = NULL ) ;
sptr = ziplistNext ( zl , eptr ) ;
redisAssert ( sptr ! = NULL ) ;
while ( eptr ! = NULL ) {
redisAssert ( ziplistGet ( eptr , & vstr , & vlen , & vll ) ) ;
score = zzlGetScore ( sptr ) ;
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 11:27:39 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items * 2 ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " ZADD " , 4 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( rioWriteBulkDouble ( r , score ) = = 0 ) return 0 ;
if ( vstr ! = NULL ) {
if ( rioWriteBulkString ( r , ( char * ) vstr , vlen ) = = 0 ) return 0 ;
} else {
if ( rioWriteBulkLongLong ( r , vll ) = = 0 ) return 0 ;
}
zzlNext ( zl , & eptr , & sptr ) ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-12 11:27:39 -05:00
items - - ;
}
} else if ( o - > encoding = = REDIS_ENCODING_SKIPLIST ) {
zset * zs = o - > ptr ;
dictIterator * di = dictGetIterator ( zs - > dict ) ;
dictEntry * de ;
while ( ( de = dictNext ( di ) ) ! = NULL ) {
robj * eleobj = dictGetKey ( de ) ;
double * score = dictGetVal ( de ) ;
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 11:27:39 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items * 2 ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " ZADD " , 4 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( rioWriteBulkDouble ( r , * score ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , eleobj ) = = 0 ) return 0 ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-12 11:27:39 -05:00
items - - ;
}
dictReleaseIterator ( di ) ;
} else {
redisPanic ( " Unknown sorted zset encoding " ) ;
}
return 1 ;
}
2011-12-12 11:39:23 -05:00
/* Emit the commands needed to rebuild a hash object.
* The function returns 0 on error , 1 on success . */
int rewriteHashObject ( rio * r , robj * key , robj * o ) {
long long count = 0 , items = hashTypeLength ( o ) ;
if ( o - > encoding = = REDIS_ENCODING_ZIPMAP ) {
unsigned char * p = zipmapRewind ( o - > ptr ) ;
unsigned char * field , * val ;
unsigned int flen , vlen ;
while ( ( p = zipmapNext ( p , & field , & flen , & val , & vlen ) ) ! = NULL ) {
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 11:39:23 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items * 2 ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " HMSET " , 5 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( rioWriteBulkString ( r , ( char * ) field , flen ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , ( char * ) val , vlen ) = = 0 ) return 0 ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-12 11:39:23 -05:00
items - - ;
}
} else {
dictIterator * di = dictGetIterator ( o - > ptr ) ;
dictEntry * de ;
while ( ( de = dictNext ( di ) ) ! = NULL ) {
robj * field = dictGetKey ( de ) ;
robj * val = dictGetVal ( de ) ;
if ( count = = 0 ) {
2011-12-21 05:58:42 -05:00
int cmd_items = ( items > REDIS_AOF_REWRITE_ITEMS_PER_CMD ) ?
REDIS_AOF_REWRITE_ITEMS_PER_CMD : items ;
2011-12-12 11:39:23 -05:00
if ( rioWriteBulkCount ( r , ' * ' , 2 + cmd_items * 2 ) = = 0 ) return 0 ;
if ( rioWriteBulkString ( r , " HMSET " , 5 ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , key ) = = 0 ) return 0 ;
}
if ( rioWriteBulkObject ( r , field ) = = 0 ) return 0 ;
if ( rioWriteBulkObject ( r , val ) = = 0 ) return 0 ;
2011-12-21 05:58:42 -05:00
if ( + + count = = REDIS_AOF_REWRITE_ITEMS_PER_CMD ) count = 0 ;
2011-12-12 11:39:23 -05:00
items - - ;
}
dictReleaseIterator ( di ) ;
}
return 1 ;
}
2010-06-21 18:07:48 -04:00
/* Write a sequence of commands able to fully rebuild the dataset into
2011-12-06 12:22:52 -05:00
* " filename " . Used both by REWRITEAOF and BGREWRITEAOF .
*
* In order to minimize the number of commands needed in the rewritten
* log Redis uses variadic commands when possible , such as RPUSH , SADD
2011-12-21 05:58:42 -05:00
* and ZADD . However at max REDIS_AOF_REWRITE_ITEMS_PER_CMD items per time
2011-12-06 12:22:52 -05:00
* are inserted using a single command . */
2010-06-21 18:07:48 -04:00
int rewriteAppendOnlyFile ( char * filename ) {
dictIterator * di = NULL ;
dictEntry * de ;
2011-05-14 06:36:22 -04:00
rio aof ;
2010-06-21 18:07:48 -04:00
FILE * fp ;
char tmpfile [ 256 ] ;
int j ;
2011-11-11 19:04:27 -05:00
long long now = mstime ( ) ;
2010-06-21 18:07:48 -04:00
/* Note that we have to use a different temp name here compared to the
* one used by rewriteAppendOnlyFileBackground ( ) function . */
snprintf ( tmpfile , 256 , " temp-rewriteaof-%d.aof " , ( int ) getpid ( ) ) ;
fp = fopen ( tmpfile , " w " ) ;
if ( ! fp ) {
2011-12-21 11:12:23 -05:00
redisLog ( REDIS_WARNING , " Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s " , strerror ( errno ) ) ;
2010-06-21 18:07:48 -04:00
return REDIS_ERR ;
}
2011-05-14 06:36:22 -04:00
2011-09-22 10:00:40 -04:00
rioInitWithFile ( & aof , fp ) ;
2010-06-21 18:07:48 -04:00
for ( j = 0 ; j < server . dbnum ; j + + ) {
char selectcmd [ ] = " *2 \r \n $6 \r \n SELECT \r \n " ;
redisDb * db = server . db + j ;
dict * d = db - > dict ;
if ( dictSize ( d ) = = 0 ) continue ;
2011-06-17 09:40:55 -04:00
di = dictGetSafeIterator ( d ) ;
2010-06-21 18:07:48 -04:00
if ( ! di ) {
fclose ( fp ) ;
return REDIS_ERR ;
}
/* SELECT the new DB */
2011-05-14 06:36:22 -04:00
if ( rioWrite ( & aof , selectcmd , sizeof ( selectcmd ) - 1 ) = = 0 ) goto werr ;
if ( rioWriteBulkLongLong ( & aof , j ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
/* Iterate this DB writing every entry */
while ( ( de = dictNext ( di ) ) ! = NULL ) {
2011-05-10 04:07:04 -04:00
sds keystr ;
2010-06-21 18:07:48 -04:00
robj key , * o ;
2011-11-11 19:04:27 -05:00
long long expiretime ;
2010-06-21 18:07:48 -04:00
2011-11-08 11:07:55 -05:00
keystr = dictGetKey ( de ) ;
o = dictGetVal ( de ) ;
2010-06-21 18:07:48 -04:00
initStaticStringObject ( key , keystr ) ;
2010-12-28 12:06:40 -05:00
2010-06-21 18:07:48 -04:00
expiretime = getExpire ( db , & key ) ;
/* Save the key and associated value */
if ( o - > type = = REDIS_STRING ) {
/* Emit a SET command */
char cmd [ ] = " *3 \r \n $3 \r \n SET \r \n " ;
2011-05-14 06:36:22 -04:00
if ( rioWrite ( & aof , cmd , sizeof ( cmd ) - 1 ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
/* Key and value */
2011-05-14 06:36:22 -04:00
if ( rioWriteBulkObject ( & aof , & key ) = = 0 ) goto werr ;
if ( rioWriteBulkObject ( & aof , o ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
} else if ( o - > type = = REDIS_LIST ) {
2011-12-06 12:22:52 -05:00
if ( rewriteListObject ( & aof , & key , o ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
} else if ( o - > type = = REDIS_SET ) {
2011-12-12 09:57:51 -05:00
if ( rewriteSetObject ( & aof , & key , o ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
} else if ( o - > type = = REDIS_ZSET ) {
2011-12-12 11:27:39 -05:00
if ( rewriteSortedSetObject ( & aof , & key , o ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
} else if ( o - > type = = REDIS_HASH ) {
2011-12-12 11:39:23 -05:00
if ( rewriteHashObject ( & aof , & key , o ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
} else {
redisPanic ( " Unknown object type " ) ;
}
/* Save the expire time */
if ( expiretime ! = - 1 ) {
2011-11-10 11:52:02 -05:00
char cmd [ ] = " *3 \r \n $9 \r \n PEXPIREAT \r \n " ;
2010-06-21 18:07:48 -04:00
/* If this key is already expired skip it */
if ( expiretime < now ) continue ;
2011-05-14 06:36:22 -04:00
if ( rioWrite ( & aof , cmd , sizeof ( cmd ) - 1 ) = = 0 ) goto werr ;
if ( rioWriteBulkObject ( & aof , & key ) = = 0 ) goto werr ;
2011-11-09 11:20:14 -05:00
if ( rioWriteBulkLongLong ( & aof , expiretime ) = = 0 ) goto werr ;
2010-06-21 18:07:48 -04:00
}
}
dictReleaseIterator ( di ) ;
}
/* Make sure data will not remain on the OS's output buffers */
fflush ( fp ) ;
aof_fsync ( fileno ( fp ) ) ;
fclose ( fp ) ;
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok . */
if ( rename ( tmpfile , filename ) = = - 1 ) {
redisLog ( REDIS_WARNING , " Error moving temp append only file on the final destination: %s " , strerror ( errno ) ) ;
unlink ( tmpfile ) ;
return REDIS_ERR ;
}
redisLog ( REDIS_NOTICE , " SYNC append only file rewrite performed " ) ;
return REDIS_OK ;
werr :
fclose ( fp ) ;
unlink ( tmpfile ) ;
redisLog ( REDIS_WARNING , " Write error writing append only file on disk: %s " , strerror ( errno ) ) ;
if ( di ) dictReleaseIterator ( di ) ;
return REDIS_ERR ;
}
/* This is how rewriting of the append only file in background works:
*
* 1 ) The user calls BGREWRITEAOF
* 2 ) Redis calls this function , that forks ( ) :
* 2 a ) the child rewrite the append only file in a temp file .
2011-12-21 06:17:02 -05:00
* 2 b ) the parent accumulates differences in server . aof_rewrite_buf .
2010-06-21 18:07:48 -04:00
* 3 ) When the child finished ' 2 a ' exists .
* 4 ) The parent will trap the exit code , if it ' s OK , will append the
2011-12-21 06:17:02 -05:00
* data accumulated into server . aof_rewrite_buf into the temp file , and
2010-06-21 18:07:48 -04:00
* finally will rename ( 2 ) the temp file in the actual file name .
* The the new file is reopened as the new append only file . Profit !
*/
int rewriteAppendOnlyFileBackground ( void ) {
pid_t childpid ;
2011-05-29 09:17:29 -04:00
long long start ;
2010-06-21 18:07:48 -04:00
2011-12-21 06:17:02 -05:00
if ( server . aof_child_pid ! = - 1 ) return REDIS_ERR ;
2011-05-29 09:17:29 -04:00
start = ustime ( ) ;
2010-06-21 18:07:48 -04:00
if ( ( childpid = fork ( ) ) = = 0 ) {
char tmpfile [ 256 ] ;
2011-05-29 09:17:29 -04:00
/* Child */
2010-08-03 07:33:12 -04:00
if ( server . ipfd > 0 ) close ( server . ipfd ) ;
if ( server . sofd > 0 ) close ( server . sofd ) ;
2010-06-21 18:07:48 -04:00
snprintf ( tmpfile , 256 , " temp-rewriteaof-bg-%d.aof " , ( int ) getpid ( ) ) ;
if ( rewriteAppendOnlyFile ( tmpfile ) = = REDIS_OK ) {
_exit ( 0 ) ;
} else {
_exit ( 1 ) ;
}
} else {
/* Parent */
2011-05-29 09:17:29 -04:00
server . stat_fork_time = ustime ( ) - start ;
2010-06-21 18:07:48 -04:00
if ( childpid = = - 1 ) {
redisLog ( REDIS_WARNING ,
" Can't rewrite append only file in background: fork: %s " ,
strerror ( errno ) ) ;
return REDIS_ERR ;
}
redisLog ( REDIS_NOTICE ,
" Background append only file rewriting started by pid %d " , childpid ) ;
2011-12-21 05:58:42 -05:00
server . aof_rewrite_scheduled = 0 ;
2011-12-21 06:17:02 -05:00
server . aof_child_pid = childpid ;
2010-06-21 18:07:48 -04:00
updateDictResizePolicy ( ) ;
/* We set appendseldb to -1 in order to force the next call to the
* feedAppendOnlyFile ( ) to issue a SELECT command , so the differences
2011-12-21 06:17:02 -05:00
* accumulated by the parent into server . aof_rewrite_buf will start
2010-06-21 18:07:48 -04:00
* with a SELECT statement and it will be safe to merge . */
2011-12-21 06:17:02 -05:00
server . aof_selected_db = - 1 ;
2010-06-21 18:07:48 -04:00
return REDIS_OK ;
}
return REDIS_OK ; /* unreached */
}
void bgrewriteaofCommand ( redisClient * c ) {
2011-12-21 06:17:02 -05:00
if ( server . aof_child_pid ! = - 1 ) {
2010-09-02 13:52:24 -04:00
addReplyError ( c , " Background append only file rewriting already in progress " ) ;
2011-12-21 06:22:13 -05:00
} else if ( server . rdb_child_pid ! = - 1 ) {
2011-12-21 05:58:42 -05:00
server . aof_rewrite_scheduled = 1 ;
2011-06-10 12:35:16 -04:00
addReplyStatus ( c , " Background append only file rewriting scheduled " ) ;
2011-06-10 06:39:23 -04:00
} else if ( rewriteAppendOnlyFileBackground ( ) = = REDIS_OK ) {
2010-09-02 13:52:24 -04:00
addReplyStatus ( c , " Background append only file rewriting started " ) ;
2010-06-21 18:07:48 -04:00
} else {
addReply ( c , shared . err ) ;
}
}
void aofRemoveTempFile ( pid_t childpid ) {
char tmpfile [ 256 ] ;
snprintf ( tmpfile , 256 , " temp-rewriteaof-bg-%d.aof " , ( int ) childpid ) ;
unlink ( tmpfile ) ;
}
2011-12-21 05:58:42 -05:00
/* Update the server.aof_current_size filed explicitly using stat(2)
2011-06-10 06:39:23 -04:00
* to check the size of the file . This is useful after a rewrite or after
* a restart , normally the size is updated just adding the write length
* to the current lenght , that is much faster . */
void aofUpdateCurrentSize ( void ) {
struct redis_stat sb ;
2011-12-21 06:17:02 -05:00
if ( redis_fstat ( server . aof_fd , & sb ) = = - 1 ) {
2011-12-21 11:12:23 -05:00
redisLog ( REDIS_WARNING , " Unable to obtain the AOF file length. stat: %s " ,
2011-06-10 06:39:23 -04:00
strerror ( errno ) ) ;
} else {
2011-12-21 05:58:42 -05:00
server . aof_current_size = sb . st_size ;
2011-06-10 06:39:23 -04:00
}
}
2010-06-21 18:07:48 -04:00
/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
* Handle this . */
2011-01-07 12:15:14 -05:00
void backgroundRewriteDoneHandler ( int exitcode , int bysignal ) {
2010-06-21 18:07:48 -04:00
if ( ! bysignal & & exitcode = = 0 ) {
2011-08-18 09:49:06 -04:00
int newfd , oldfd ;
int nwritten ;
2010-06-21 18:07:48 -04:00
char tmpfile [ 256 ] ;
2011-08-18 09:49:06 -04:00
long long now = ustime ( ) ;
2010-06-21 18:07:48 -04:00
redisLog ( REDIS_NOTICE ,
2011-08-18 09:49:06 -04:00
" Background AOF rewrite terminated with success " ) ;
2011-09-13 12:27:08 -04:00
/* Flush the differences accumulated by the parent to the
* rewritten AOF . */
snprintf ( tmpfile , 256 , " temp-rewriteaof-bg-%d.aof " ,
2011-12-21 06:17:02 -05:00
( int ) server . aof_child_pid ) ;
2011-08-18 09:49:06 -04:00
newfd = open ( tmpfile , O_WRONLY | O_APPEND ) ;
if ( newfd = = - 1 ) {
redisLog ( REDIS_WARNING ,
" Unable to open the temporary AOF produced by the child: %s " , strerror ( errno ) ) ;
2010-06-21 18:07:48 -04:00
goto cleanup ;
}
2011-08-18 09:49:06 -04:00
2011-12-21 06:17:02 -05:00
nwritten = write ( newfd , server . aof_rewrite_buf , sdslen ( server . aof_rewrite_buf ) ) ;
if ( nwritten ! = ( signed ) sdslen ( server . aof_rewrite_buf ) ) {
2011-08-18 09:49:06 -04:00
if ( nwritten = = - 1 ) {
redisLog ( REDIS_WARNING ,
" Error trying to flush the parent diff to the rewritten AOF: %s " , strerror ( errno ) ) ;
} else {
redisLog ( REDIS_WARNING ,
" Short write trying to flush the parent diff to the rewritten AOF: %s " , strerror ( errno ) ) ;
}
close ( newfd ) ;
2010-06-21 18:07:48 -04:00
goto cleanup ;
}
2011-08-18 09:49:06 -04:00
redisLog ( REDIS_NOTICE ,
" Parent diff successfully flushed to the rewritten AOF (%lu bytes) " , nwritten ) ;
/* The only remaining thing to do is to rename the temporary file to
* the configured file and switch the file descriptor used to do AOF
2011-09-13 12:27:08 -04:00
* writes . We don ' t want close ( 2 ) or rename ( 2 ) calls to block the
* server on old file deletion .
*
* There are two possible scenarios :
2011-08-18 09:49:06 -04:00
*
* 1 ) AOF is DISABLED and this was a one time rewrite . The temporary
* file will be renamed to the configured file . When this file already
* exists , it will be unlinked , which may block the server .
*
* 2 ) AOF is ENABLED and the rewritten AOF will immediately start
* receiving writes . After the temporary file is renamed to the
* configured file , the original AOF file descriptor will be closed .
* Since this will be the last reference to that file , closing it
* causes the underlying file to be unlinked , which may block the
* server .
*
* To mitigate the blocking effect of the unlink operation ( either
* caused by rename ( 2 ) in scenario 1 , or by close ( 2 ) in scenario 2 ) , we
2011-09-13 12:27:08 -04:00
* use a background thread to take care of this . First , we
2011-08-18 09:49:06 -04:00
* make scenario 1 identical to scenario 2 by opening the target file
* when it exists . The unlink operation after the rename ( 2 ) will then
* be executed upon calling close ( 2 ) for its descriptor . Everything to
* guarantee atomicity for this switch has already happened by then , so
* we don ' t care what the outcome or duration of that close operation
* is , as long as the file descriptor is released again . */
2011-12-21 06:17:02 -05:00
if ( server . aof_fd = = - 1 ) {
2011-08-18 09:49:06 -04:00
/* AOF disabled */
2011-09-13 12:27:08 -04:00
/* Don't care if this fails: oldfd will be -1 and we handle that.
* One notable case of - 1 return is if the old file does
* not exist . */
2011-12-21 05:58:42 -05:00
oldfd = open ( server . aof_filename , O_RDONLY | O_NONBLOCK ) ;
2011-08-18 09:49:06 -04:00
} else {
/* AOF enabled */
2011-09-13 12:27:08 -04:00
oldfd = - 1 ; /* We'll set this to the current AOF filedes later. */
2011-08-18 09:49:06 -04:00
}
/* Rename the temporary file. This will not unlink the target file if
* it exists , because we reference it with " oldfd " . */
2011-12-21 05:58:42 -05:00
if ( rename ( tmpfile , server . aof_filename ) = = - 1 ) {
2011-08-18 09:49:06 -04:00
redisLog ( REDIS_WARNING ,
2011-12-21 11:12:23 -05:00
" Error trying to rename the temporary AOF file: %s " , strerror ( errno ) ) ;
2011-08-18 09:49:06 -04:00
close ( newfd ) ;
2011-09-13 12:27:08 -04:00
if ( oldfd ! = - 1 ) close ( oldfd ) ;
2010-06-21 18:07:48 -04:00
goto cleanup ;
}
2011-08-18 09:49:06 -04:00
2011-12-21 06:17:02 -05:00
if ( server . aof_fd = = - 1 ) {
2011-09-13 12:27:08 -04:00
/* AOF disabled, we don't need to set the AOF file descriptor
* to this new file , so we can close it . */
2011-08-18 09:49:06 -04:00
close ( newfd ) ;
} else {
2011-09-13 12:27:08 -04:00
/* AOF enabled, replace the old fd with the new one. */
2011-12-21 06:17:02 -05:00
oldfd = server . aof_fd ;
server . aof_fd = newfd ;
2011-12-21 05:58:42 -05:00
if ( server . aof_fsync = = AOF_FSYNC_ALWAYS )
2011-09-16 05:08:39 -04:00
aof_fsync ( newfd ) ;
2011-12-21 05:58:42 -05:00
else if ( server . aof_fsync = = AOF_FSYNC_EVERYSEC )
2011-09-16 05:08:39 -04:00
aof_background_fsync ( newfd ) ;
2011-12-21 06:17:02 -05:00
server . aof_selected_db = - 1 ; /* Make sure SELECT is re-issued */
2011-06-10 06:39:23 -04:00
aofUpdateCurrentSize ( ) ;
2011-12-21 05:58:42 -05:00
server . aof_rewrite_base_size = server . aof_current_size ;
2011-08-17 12:15:06 -04:00
/* Clear regular AOF buffer since its contents was just written to
* the new AOF from the background rewrite buffer . */
2011-12-21 06:17:02 -05:00
sdsfree ( server . aof_buf ) ;
server . aof_buf = sdsempty ( ) ;
2010-06-21 18:07:48 -04:00
}
2011-08-18 09:49:06 -04:00
2011-12-21 11:12:23 -05:00
redisLog ( REDIS_NOTICE , " Background AOF rewrite finished successfully " ) ;
2011-12-21 04:31:34 -05:00
/* Change state from WAIT_REWRITE to ON if needed */
if ( server . aof_state = = REDIS_AOF_WAIT_REWRITE )
server . aof_state = REDIS_AOF_ON ;
2011-08-18 09:49:06 -04:00
/* Asynchronously close the overwritten AOF. */
2011-09-15 09:46:29 -04:00
if ( oldfd ! = - 1 ) bioCreateBackgroundJob ( REDIS_BIO_CLOSE_FILE , ( void * ) ( long ) oldfd , NULL , NULL ) ;
2011-08-18 09:49:06 -04:00
redisLog ( REDIS_VERBOSE ,
" Background AOF rewrite signal handler took %lldus " , ustime ( ) - now ) ;
2010-06-21 18:07:48 -04:00
} else if ( ! bysignal & & exitcode ! = 0 ) {
2011-08-18 09:49:06 -04:00
redisLog ( REDIS_WARNING ,
" Background AOF rewrite terminated with error " ) ;
2010-06-21 18:07:48 -04:00
} else {
redisLog ( REDIS_WARNING ,
2011-08-18 09:49:06 -04:00
" Background AOF rewrite terminated by signal %d " , bysignal ) ;
2010-06-21 18:07:48 -04:00
}
2011-08-18 09:49:06 -04:00
2010-06-21 18:07:48 -04:00
cleanup :
2011-12-21 06:17:02 -05:00
sdsfree ( server . aof_rewrite_buf ) ;
server . aof_rewrite_buf = sdsempty ( ) ;
aofRemoveTempFile ( server . aof_child_pid ) ;
server . aof_child_pid = - 1 ;
2011-12-21 04:31:34 -05:00
/* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */
if ( server . aof_state = = REDIS_AOF_WAIT_REWRITE )
2011-12-21 05:58:42 -05:00
server . aof_rewrite_scheduled = 1 ;
2010-06-21 18:07:48 -04:00
}