Merge branch 'unstable' of github.com:antirez/redis-private into unstable

This commit is contained in:
antirez 2011-01-07 16:43:51 +01:00
commit cb9b35c8ca
10 changed files with 114 additions and 129 deletions

View File

@ -319,11 +319,6 @@ cache-flush-delay 0
############################### ADVANCED CONFIG ############################### ############################### ADVANCED CONFIG ###############################
# Glue small output buffers together in order to send small replies in a
# single TCP packet. Uses a bit more CPU but most of the times it is a win
# in terms of number of queries per second. Use 'yes' if unsure.
glueoutputbuf yes
# Hashes are encoded in a special way (much more memory efficient) when they # Hashes are encoded in a special way (much more memory efficient) when they
# have at max a given numer of elements, and the biggest element does not # have at max a given numer of elements, and the biggest element does not
# exceed a given threshold. You can configure this limits with the following # exceed a given threshold. You can configure this limits with the following

View File

@ -64,11 +64,11 @@ int anetNonBlock(char *err, int fd)
* Note that fcntl(2) for F_GETFL and F_SETFL can't be * Note that fcntl(2) for F_GETFL and F_SETFL can't be
* interrupted by a signal. */ * interrupted by a signal. */
if ((flags = fcntl(fd, F_GETFL)) == -1) { if ((flags = fcntl(fd, F_GETFL)) == -1) {
anetSetError(err, "fcntl(F_GETFL): %s\n", strerror(errno)); anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) { if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s\n", strerror(errno)); anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
return ANET_OK; return ANET_OK;
@ -79,7 +79,7 @@ int anetTcpNoDelay(char *err, int fd)
int yes = 1; int yes = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)) == -1) if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)) == -1)
{ {
anetSetError(err, "setsockopt TCP_NODELAY: %s\n", strerror(errno)); anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
return ANET_OK; return ANET_OK;
@ -89,7 +89,7 @@ int anetSetSendBuffer(char *err, int fd, int buffsize)
{ {
if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffsize, sizeof(buffsize)) == -1) if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffsize, sizeof(buffsize)) == -1)
{ {
anetSetError(err, "setsockopt SO_SNDBUF: %s\n", strerror(errno)); anetSetError(err, "setsockopt SO_SNDBUF: %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
return ANET_OK; return ANET_OK;
@ -99,7 +99,7 @@ int anetTcpKeepAlive(char *err, int fd)
{ {
int yes = 1; int yes = 1;
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &yes, sizeof(yes)) == -1) { if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &yes, sizeof(yes)) == -1) {
anetSetError(err, "setsockopt SO_KEEPALIVE: %s\n", strerror(errno)); anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
return ANET_OK; return ANET_OK;
@ -115,7 +115,7 @@ int anetResolve(char *err, char *host, char *ipbuf)
he = gethostbyname(host); he = gethostbyname(host);
if (he == NULL) { if (he == NULL) {
anetSetError(err, "can't resolve: %s\n", host); anetSetError(err, "can't resolve: %s", host);
return ANET_ERR; return ANET_ERR;
} }
memcpy(&sa.sin_addr, he->h_addr, sizeof(struct in_addr)); memcpy(&sa.sin_addr, he->h_addr, sizeof(struct in_addr));
@ -127,14 +127,14 @@ int anetResolve(char *err, char *host, char *ipbuf)
static int anetCreateSocket(char *err, int domain) { static int anetCreateSocket(char *err, int domain) {
int s, on = 1; int s, on = 1;
if ((s = socket(domain, SOCK_STREAM, 0)) == -1) { if ((s = socket(domain, SOCK_STREAM, 0)) == -1) {
anetSetError(err, "creating socket: %s\n", strerror(errno)); anetSetError(err, "creating socket: %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
/* Make sure connection-intensive things like the redis benckmark /* Make sure connection-intensive things like the redis benckmark
* will be able to close/open sockets a zillion of times */ * will be able to close/open sockets a zillion of times */
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) { if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) {
anetSetError(err, "setsockopt SO_REUSEADDR: %s\n", strerror(errno)); anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
return s; return s;
@ -157,7 +157,7 @@ static int anetTcpGenericConnect(char *err, char *addr, int port, int flags)
he = gethostbyname(addr); he = gethostbyname(addr);
if (he == NULL) { if (he == NULL) {
anetSetError(err, "can't resolve: %s\n", addr); anetSetError(err, "can't resolve: %s", addr);
close(s); close(s);
return ANET_ERR; return ANET_ERR;
} }
@ -172,7 +172,7 @@ static int anetTcpGenericConnect(char *err, char *addr, int port, int flags)
flags & ANET_CONNECT_NONBLOCK) flags & ANET_CONNECT_NONBLOCK)
return s; return s;
anetSetError(err, "connect: %s\n", strerror(errno)); anetSetError(err, "connect: %s", strerror(errno));
close(s); close(s);
return ANET_ERR; return ANET_ERR;
} }
@ -208,7 +208,7 @@ int anetUnixGenericConnect(char *err, char *path, int flags)
flags & ANET_CONNECT_NONBLOCK) flags & ANET_CONNECT_NONBLOCK)
return s; return s;
anetSetError(err, "connect: %s\n", strerror(errno)); anetSetError(err, "connect: %s", strerror(errno));
close(s); close(s);
return ANET_ERR; return ANET_ERR;
} }
@ -257,12 +257,12 @@ int anetWrite(int fd, char *buf, int count)
static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len) { static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len) {
if (bind(s,sa,len) == -1) { if (bind(s,sa,len) == -1) {
anetSetError(err, "bind: %s\n", strerror(errno)); anetSetError(err, "bind: %s", strerror(errno));
close(s); close(s);
return ANET_ERR; return ANET_ERR;
} }
if (listen(s, 511) == -1) { /* the magic 511 constant is from nginx */ if (listen(s, 511) == -1) { /* the magic 511 constant is from nginx */
anetSetError(err, "listen: %s\n", strerror(errno)); anetSetError(err, "listen: %s", strerror(errno));
close(s); close(s);
return ANET_ERR; return ANET_ERR;
} }
@ -282,7 +282,7 @@ int anetTcpServer(char *err, int port, char *bindaddr)
sa.sin_port = htons(port); sa.sin_port = htons(port);
sa.sin_addr.s_addr = htonl(INADDR_ANY); sa.sin_addr.s_addr = htonl(INADDR_ANY);
if (bindaddr && inet_aton(bindaddr, &sa.sin_addr) == 0) { if (bindaddr && inet_aton(bindaddr, &sa.sin_addr) == 0) {
anetSetError(err, "Invalid bind address\n"); anetSetError(err, "invalid bind address");
close(s); close(s);
return ANET_ERR; return ANET_ERR;
} }
@ -315,7 +315,7 @@ static int anetGenericAccept(char *err, int s, struct sockaddr *sa, socklen_t *l
if (errno == EINTR) if (errno == EINTR)
continue; continue;
else { else {
anetSetError(err, "accept: %s\n", strerror(errno)); anetSetError(err, "accept: %s", strerror(errno));
return ANET_ERR; return ANET_ERR;
} }
} }

View File

@ -194,10 +194,8 @@ void loadServerConfig(char *filename) {
if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr; err = "argument must be 'yes' or 'no'"; goto loaderr;
} }
} else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) { } else if (!strcasecmp(argv[0],"glueoutputbuf")) {
if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) { redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]);
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
if ((server.rdbcompression = yesnotoi(argv[1])) == -1) { if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr; err = "argument must be 'yes' or 'no'"; goto loaderr;

View File

@ -348,3 +348,50 @@ void dsFlushDb(int dbid) {
} }
} }
} }
int dsRdbSave(char *filename) {
char tmpfile[256];
int j, i;
time_t now = time(NULL);
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
if (!fp) {
redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
return REDIS_ERR;
}
if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
/* Scan all diskstore dirs looking for keys */
for (j = 0; j < 256; j++) {
for (i = 0; i < 256; i++) {
snprintf(buf,1024,"%s/%02x/%02x",server.ds_path,j,i);
/* Write the SELECT DB opcode */
if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
if (rdbSaveLen(fp,j) == -1) goto werr;
}
}
/* Make sure data will not remain on the OS's output buffers */
fflush(fp);
fsync(fileno(fp));
fclose(fp);
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
unlink(tmpfile);
return REDIS_ERR;
}
redisLog(REDIS_NOTICE,"DB saved on disk");
server.dirty = 0;
server.lastsave = time(NULL);
return REDIS_OK;
werr:
fclose(fp);
unlink(tmpfile);
redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
}

View File

@ -512,7 +512,7 @@ int processActiveIOJobs(int max) {
#if 0 #if 0
/* If there are new jobs we need to signal the thread to /* If there are new jobs we need to signal the thread to
* process the next one. */ * process the next one. FIXME: drop this if useless. */
redisLog(REDIS_DEBUG,"waitEmptyIOJobsQueue: new %d, processing %d", redisLog(REDIS_DEBUG,"waitEmptyIOJobsQueue: new %d, processing %d",
listLength(server.io_newjobs), listLength(server.io_newjobs),
listLength(server.io_processing)); listLength(server.io_processing));
@ -576,7 +576,21 @@ void queueIOJob(iojob *j) {
spawnIOThread(); spawnIOThread();
} }
void dsCreateIOJob(int type, redisDb *db, robj *key, robj *val) { /* Consume all the IO scheduled operations, and all the thread IO jobs
* so that eventually the state of diskstore is a point-in-time snapshot.
*
* This is useful when we need to BGSAVE with diskstore enabled. */
void cacheForcePointInTime(void) {
redisLog(REDIS_NOTICE,"Diskstore: synching on disk to reach point-in-time state.");
while (listLength(server.cache_io_queue) != 0) {
cacheScheduleIOPushJobs(REDIS_IO_ASAP);
processActiveIOJobs(1);
}
waitEmptyIOJobsQueue();
processAllPendingIOJobs();
}
void cacheCreateIOJob(int type, redisDb *db, robj *key, robj *val) {
iojob *j; iojob *j;
j = zmalloc(sizeof(*j)); j = zmalloc(sizeof(*j));
@ -762,7 +776,7 @@ int cacheScheduleIOPushJobs(int flags) {
op->type == REDIS_IO_LOAD ? "load" : "save", op->key->ptr); op->type == REDIS_IO_LOAD ? "load" : "save", op->key->ptr);
if (op->type == REDIS_IO_LOAD) { if (op->type == REDIS_IO_LOAD) {
dsCreateIOJob(REDIS_IOJOB_LOAD,op->db,op->key,NULL); cacheCreateIOJob(REDIS_IOJOB_LOAD,op->db,op->key,NULL);
} else { } else {
/* Lookup the key, in order to put the current value in the IO /* Lookup the key, in order to put the current value in the IO
* Job. Otherwise if the key does not exists we schedule a disk * Job. Otherwise if the key does not exists we schedule a disk
@ -775,7 +789,7 @@ int cacheScheduleIOPushJobs(int flags) {
* the key on disk. */ * the key on disk. */
val = NULL; val = NULL;
} }
dsCreateIOJob(REDIS_IOJOB_SAVE,op->db,op->key,val); cacheCreateIOJob(REDIS_IOJOB_SAVE,op->db,op->key,val);
} }
/* Mark the operation as in progress. */ /* Mark the operation as in progress. */
cacheScheduleIODelFlag(op->db,op->key,op->type); cacheScheduleIODelFlag(op->db,op->key,op->type);

View File

@ -515,15 +515,6 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
REDIS_NOTUSED(el); REDIS_NOTUSED(el);
REDIS_NOTUSED(mask); REDIS_NOTUSED(mask);
/* Use writev() if we have enough buffers to send */
if (!server.glueoutputbuf &&
listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
!(c->flags & REDIS_MASTER))
{
sendReplyToClientWritev(el, fd, privdata, mask);
return;
}
while(c->bufpos > 0 || listLength(c->reply)) { while(c->bufpos > 0 || listLength(c->reply)) {
if (c->bufpos > 0) { if (c->bufpos > 0) {
if (c->flags & REDIS_MASTER) { if (c->flags & REDIS_MASTER) {
@ -594,84 +585,6 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
} }
} }
void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
{
redisClient *c = privdata;
int nwritten = 0, totwritten = 0, objlen, willwrite;
robj *o;
struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
int offset, ion = 0;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
listNode *node;
while (listLength(c->reply)) {
offset = c->sentlen;
ion = 0;
willwrite = 0;
/* fill-in the iov[] array */
for(node = listFirst(c->reply); node; node = listNextNode(node)) {
o = listNodeValue(node);
objlen = sdslen(o->ptr);
if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
break;
if(ion == REDIS_WRITEV_IOVEC_COUNT)
break; /* no more iovecs */
iov[ion].iov_base = ((char*)o->ptr) + offset;
iov[ion].iov_len = objlen - offset;
willwrite += objlen - offset;
offset = 0; /* just for the first item */
ion++;
}
if(willwrite == 0)
break;
/* write all collected blocks at once */
if((nwritten = writev(fd, iov, ion)) < 0) {
if (errno != EAGAIN) {
redisLog(REDIS_VERBOSE,
"Error writing to client: %s", strerror(errno));
freeClient(c);
return;
}
break;
}
totwritten += nwritten;
offset = c->sentlen;
/* remove written robjs from c->reply */
while (nwritten && listLength(c->reply)) {
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o->ptr);
if(nwritten >= objlen - offset) {
listDelNode(c->reply, listFirst(c->reply));
nwritten -= objlen - offset;
c->sentlen = 0;
} else {
/* partial write */
c->sentlen += nwritten;
break;
}
offset = 0;
}
}
if (totwritten > 0)
c->lastinteraction = time(NULL);
if (listLength(c->reply) == 0) {
c->sentlen = 0;
aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
}
}
/* resetClient prepare the client to process the next command */ /* resetClient prepare the client to process the next command */
void resetClient(redisClient *c) { void resetClient(redisClient *c) {
freeClientArgv(c); freeClientArgv(c);

View File

@ -429,8 +429,10 @@ int rdbSave(char *filename) {
int j; int j;
time_t now = time(NULL); time_t now = time(NULL);
/* FIXME: implement .rdb save for disk store properly */ if (server.ds_enabled) {
redisAssert(server.ds_enabled == 0); cacheForcePointInTime();
return dsRdbSave(filename);
}
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid()); snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w"); fp = fopen(tmpfile,"w");
@ -495,17 +497,22 @@ int rdbSaveBackground(char *filename) {
pid_t childpid; pid_t childpid;
if (server.bgsavechildpid != -1) return REDIS_ERR; if (server.bgsavechildpid != -1) return REDIS_ERR;
redisAssert(server.ds_enabled == 0);
server.dirty_before_bgsave = server.dirty; server.dirty_before_bgsave = server.dirty;
if ((childpid = fork()) == 0) { if ((childpid = fork()) == 0) {
int retval;
/* Child */ /* Child */
if (server.ipfd > 0) close(server.ipfd); if (server.ipfd > 0) close(server.ipfd);
if (server.sofd > 0) close(server.sofd); if (server.sofd > 0) close(server.sofd);
if (rdbSave(filename) == REDIS_OK) { if (server.ds_enabled) {
_exit(0); cacheForcePointInTime();
dsRdbSave(filename);
} else { } else {
_exit(1); rdbSave(filename);
} }
_exit((retval == REDIS_OK) ? 0 : 1);
} else { } else {
/* Parent */ /* Parent */
if (childpid == -1) { if (childpid == -1) {

View File

@ -749,7 +749,6 @@ void initServerConfig() {
server.syslog_enabled = 0; server.syslog_enabled = 0;
server.syslog_ident = zstrdup("redis"); server.syslog_ident = zstrdup("redis");
server.syslog_facility = LOG_LOCAL0; server.syslog_facility = LOG_LOCAL0;
server.glueoutputbuf = 1;
server.daemonize = 0; server.daemonize = 0;
server.appendonly = 0; server.appendonly = 0;
server.appendfsync = APPENDFSYNC_EVERYSEC; server.appendfsync = APPENDFSYNC_EVERYSEC;

View File

@ -50,11 +50,6 @@
#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */ #define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */
#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */ #define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
#define REDIS_WRITEV_THRESHOLD 3
/* Max number of iovecs used for each writev call */
#define REDIS_WRITEV_IOVEC_COUNT 256
/* Hash table parameters */ /* Hash table parameters */
#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */ #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
@ -388,7 +383,6 @@ struct redisServer {
long long stat_keyspace_misses; /* number of failed lookups of keys */ long long stat_keyspace_misses; /* number of failed lookups of keys */
/* Configuration */ /* Configuration */
int verbosity; int verbosity;
int glueoutputbuf;
int maxidletime; int maxidletime;
int dbnum; int dbnum;
int daemonize; int daemonize;
@ -633,7 +627,6 @@ void closeTimedoutClients(void);
void freeClient(redisClient *c); void freeClient(redisClient *c);
void resetClient(redisClient *c); void resetClient(redisClient *c);
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask); void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj); void addReply(redisClient *c, robj *obj);
void *addDeferredMultiBulkLength(redisClient *c); void *addDeferredMultiBulkLength(redisClient *c);
void setDeferredMultiBulkLength(redisClient *c, void *node, long length); void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
@ -820,6 +813,7 @@ void cacheCron(void);
int cacheKeyMayExist(redisDb *db, robj *key); int cacheKeyMayExist(redisDb *db, robj *key);
void cacheSetKeyMayExist(redisDb *db, robj *key); void cacheSetKeyMayExist(redisDb *db, robj *key);
void cacheSetKeyDoesNotExist(redisDb *db, robj *key); void cacheSetKeyDoesNotExist(redisDb *db, robj *key);
void cacheForcePointInTime(void);
/* Set data type */ /* Set data type */
robj *setTypeCreate(robj *value); robj *setTypeCreate(robj *value);

18
utils/whatisdoing.sh Executable file
View File

@ -0,0 +1,18 @@
# This script is from http://poormansprofiler.org/
#!/bin/bash
nsamples=1
sleeptime=0
pid=$(pidof redis-server)
for x in $(seq 1 $nsamples)
do
gdb -ex "set pagination 0" -ex "thread apply all bt" -batch -p $pid
sleep $sleeptime
done | \
awk '
BEGIN { s = ""; }
/Thread/ { print s; s = ""; }
/^\#/ { if (s != "" ) { s = s "," $4} else { s = $4 } }
END { print s }' | \
sort | uniq -c | sort -r -n -k 1,1