Cluster: atomic update of nodes.conf file.

The way the file was generated was unsafe and leaded to nodes.conf file
corruption (zero length file) on server stop/crash during the creation
of the file.

The previous file update method was as simple as open with O_TRUNC
followed by the write call. While the write call was a single one with
the full payload, ensuring no half-written files for POSIX semantics,
stopping the server just after the open call resulted into a zero-length
file (all the nodes information lost!).
This commit is contained in:
antirez 2014-01-15 10:31:12 +01:00
parent 28273394cb
commit 6c63df3031

View File

@ -38,6 +38,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <unistd.h> #include <unistd.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/stat.h>
clusterNode *createClusterNode(char *nodename, int flags); clusterNode *createClusterNode(char *nodename, int flags);
int clusterAddNode(clusterNode *node); int clusterAddNode(clusterNode *node);
@ -226,20 +227,45 @@ fmterr:
/* Cluster node configuration is exactly the same as CLUSTER NODES output. /* Cluster node configuration is exactly the same as CLUSTER NODES output.
* *
* This function writes the node config and returns 0, on error -1 * This function writes the node config and returns 0, on error -1
* is returned. */ * is returned.
*
* Note: we need to write the file in an atomic way from the point of view
* of the POSIX filesystem semantics, so that if the server is stopped
* or crashes during the write, we'll end with either the old file or the
* new one. Since we have the full payload to write available we can use
* a single write to write the whole file. If the pre-existing file was
* bigger we pad our payload with newlines that are anyway ignored and truncate
* the file afterward. */
int clusterSaveConfig(int do_fsync) { int clusterSaveConfig(int do_fsync) {
sds ci = clusterGenNodesDescription(REDIS_NODE_HANDSHAKE); sds ci = clusterGenNodesDescription(REDIS_NODE_HANDSHAKE);
size_t content_size = sdslen(ci);
struct stat sb;
int fd; int fd;
if ((fd = open(server.cluster_configfile,O_WRONLY|O_CREAT|O_TRUNC,0644)) if ((fd = open(server.cluster_configfile,O_WRONLY|O_CREAT,0644))
== -1) goto err; == -1) goto err;
/* Pad the new payload if the existing file length is greater. */
if (fstat(fd,&sb) != -1) {
if (sb.st_size > content_size) {
ci = sdsgrowzero(ci,sb.st_size);
memset(ci+content_size,'\n',sb.st_size-content_size);
}
}
if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err; if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err;
if (do_fsync) fsync(fd); if (do_fsync) fsync(fd);
/* Truncate the file if needed to remove the final \n padding that
* is just garbage. */
if (content_size != sdslen(ci) && ftruncate(fd,content_size) == -1) {
/* ftruncate() failing is not a critical error. */
}
close(fd); close(fd);
sdsfree(ci); sdsfree(ci);
return 0; return 0;
err: err:
if (fd != -1) close(fd);
sdsfree(ci); sdsfree(ci);
return -1; return -1;
} }