redict/src/anet.c
Yossi Gottlieb bb7cd97439
Add hostname support in Sentinel. (#8282)
This is both a bugfix and an enhancement.

Internally, Sentinel relies entirely on IP addresses to identify
instances. When configured with a new master, it also requires users to
specify and IP and not hostname.

However, replicas may use the replica-announce-ip configuration to
announce a hostname. When that happens, Sentinel fails to match the
announced hostname with the expected IP and considers that a different
instance, triggering reconfiguration, etc.

Another use case is where TLS is used and clients are expected to match
the hostname to connect to with the certificate's SAN attribute. To
properly implement this configuration, it is necessary for Sentinel to
redirect clients to a hostname rather than an IP address.

The new 'resolve-hostnames' configuration parameter determines if
Sentinel is willing to accept hostnames. It is set by default to no,
which maintains backwards compatibility and avoids unexpected DNS
resolution delays on systems with DNS configuration issues.

Internally, Sentinel continues to identify instances by their resolved
IP address and will also report the IP by default. The new
'announce-hostnames' parameter determines if Sentinel should prefer to
announce a hostname, when available, rather than an IP address. This
applies to addresses returned to clients, as well as their
representation in the configuration file, REPLICAOF configuration
commands, etc.

This commit also introduces SENTINEL CONFIG GET and SENTINEL CONFIG SET
which can be used to introspect or configure global Sentinel
configuration that was previously was only possible by directly
accessing the configuration file and possibly restarting the instance.

Co-authored-by: myl1024 <myl92916@qq.com>
Co-authored-by: sundb <sundbcn@gmail.com>
2021-01-28 12:09:11 +02:00

669 lines
20 KiB
C

/* anet.c -- Basic TCP socket stuff made a bit less boring
*
* Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fmacros.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <sys/time.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <netdb.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include "anet.h"
static void anetSetError(char *err, const char *fmt, ...)
{
va_list ap;
if (!err) return;
va_start(ap, fmt);
vsnprintf(err, ANET_ERR_LEN, fmt, ap);
va_end(ap);
}
int anetSetBlock(char *err, int fd, int non_block) {
int flags;
/* Set the socket blocking (if non_block is zero) or non-blocking.
* Note that fcntl(2) for F_GETFL and F_SETFL can't be
* interrupted by a signal. */
if ((flags = fcntl(fd, F_GETFL)) == -1) {
anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno));
return ANET_ERR;
}
/* Check if this flag has been set or unset, if so,
* then there is no need to call fcntl to set/unset it again. */
if (!!(flags & O_NONBLOCK) == !!non_block)
return ANET_OK;
if (non_block)
flags |= O_NONBLOCK;
else
flags &= ~O_NONBLOCK;
if (fcntl(fd, F_SETFL, flags) == -1) {
anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
int anetNonBlock(char *err, int fd) {
return anetSetBlock(err,fd,1);
}
int anetBlock(char *err, int fd) {
return anetSetBlock(err,fd,0);
}
/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks.
* This function should be invoked for fd's on specific places
* where fork + execve system calls are called. */
int anetCloexec(int fd) {
int r;
int flags;
do {
r = fcntl(fd, F_GETFD);
} while (r == -1 && errno == EINTR);
if (r == -1 || (r & FD_CLOEXEC))
return r;
flags = r | FD_CLOEXEC;
do {
r = fcntl(fd, F_SETFD, flags);
} while (r == -1 && errno == EINTR);
return r;
}
/* Set TCP keep alive option to detect dead peers. The interval option
* is only used for Linux as we are using Linux-specific APIs to set
* the probe send time, interval, and count. */
int anetKeepAlive(char *err, int fd, int interval)
{
int val = 1;
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1)
{
anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
return ANET_ERR;
}
#ifdef __linux__
/* Default settings are more or less garbage, with the keepalive time
* set to 7200 by default on Linux. Modify settings to make the feature
* actually useful. */
/* Send first probe after interval. */
val = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
return ANET_ERR;
}
/* Send next probes after the specified interval. Note that we set the
* delay as interval / 3, as we send three probes before detecting
* an error (see the next setsockopt call). */
val = interval/3;
if (val == 0) val = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) {
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
return ANET_ERR;
}
/* Consider the socket in error state after three we send three ACK
* probes without getting a reply. */
val = 3;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) {
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
return ANET_ERR;
}
#else
((void) interval); /* Avoid unused var warning for non Linux systems. */
#endif
return ANET_OK;
}
static int anetSetTcpNoDelay(char *err, int fd, int val)
{
if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) == -1)
{
anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
int anetEnableTcpNoDelay(char *err, int fd)
{
return anetSetTcpNoDelay(err, fd, 1);
}
int anetDisableTcpNoDelay(char *err, int fd)
{
return anetSetTcpNoDelay(err, fd, 0);
}
int anetSetSendBuffer(char *err, int fd, int buffsize)
{
if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffsize, sizeof(buffsize)) == -1)
{
anetSetError(err, "setsockopt SO_SNDBUF: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
int anetTcpKeepAlive(char *err, int fd)
{
int yes = 1;
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &yes, sizeof(yes)) == -1) {
anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
/* Set the socket send timeout (SO_SNDTIMEO socket option) to the specified
* number of milliseconds, or disable it if the 'ms' argument is zero. */
int anetSendTimeout(char *err, int fd, long long ms) {
struct timeval tv;
tv.tv_sec = ms/1000;
tv.tv_usec = (ms%1000)*1000;
if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
anetSetError(err, "setsockopt SO_SNDTIMEO: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
/* Set the socket receive timeout (SO_RCVTIMEO socket option) to the specified
* number of milliseconds, or disable it if the 'ms' argument is zero. */
int anetRecvTimeout(char *err, int fd, long long ms) {
struct timeval tv;
tv.tv_sec = ms/1000;
tv.tv_usec = (ms%1000)*1000;
if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
anetSetError(err, "setsockopt SO_RCVTIMEO: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
/* Resolve the hostname "host" and set the string representation of the
* IP address into the buffer pointed by "ipbuf".
*
* If flags is set to ANET_IP_ONLY the function only resolves hostnames
* that are actually already IPv4 or IPv6 addresses. This turns the function
* into a validating / normalizing function. */
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
int flags)
{
struct addrinfo hints, *info;
int rv;
memset(&hints,0,sizeof(hints));
if (flags & ANET_IP_ONLY) hints.ai_flags = AI_NUMERICHOST;
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM; /* specify socktype to avoid dups */
if ((rv = getaddrinfo(host, NULL, &hints, &info)) != 0) {
anetSetError(err, "%s", gai_strerror(rv));
return ANET_ERR;
}
if (info->ai_family == AF_INET) {
struct sockaddr_in *sa = (struct sockaddr_in *)info->ai_addr;
inet_ntop(AF_INET, &(sa->sin_addr), ipbuf, ipbuf_len);
} else {
struct sockaddr_in6 *sa = (struct sockaddr_in6 *)info->ai_addr;
inet_ntop(AF_INET6, &(sa->sin6_addr), ipbuf, ipbuf_len);
}
freeaddrinfo(info);
return ANET_OK;
}
static int anetSetReuseAddr(char *err, int fd) {
int yes = 1;
/* Make sure connection-intensive things like the redis benchmark
* will be able to close/open sockets a zillion of times */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) {
anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
static int anetCreateSocket(char *err, int domain) {
int s;
if ((s = socket(domain, SOCK_STREAM, 0)) == -1) {
anetSetError(err, "creating socket: %s", strerror(errno));
return ANET_ERR;
}
/* Make sure connection-intensive things like the redis benchmark
* will be able to close/open sockets a zillion of times */
if (anetSetReuseAddr(err,s) == ANET_ERR) {
close(s);
return ANET_ERR;
}
return s;
}
#define ANET_CONNECT_NONE 0
#define ANET_CONNECT_NONBLOCK 1
#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
static int anetTcpGenericConnect(char *err, const char *addr, int port,
const char *source_addr, int flags)
{
int s = ANET_ERR, rv;
char portstr[6]; /* strlen("65535") + 1; */
struct addrinfo hints, *servinfo, *bservinfo, *p, *b;
snprintf(portstr,sizeof(portstr),"%d",port);
memset(&hints,0,sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
if ((rv = getaddrinfo(addr,portstr,&hints,&servinfo)) != 0) {
anetSetError(err, "%s", gai_strerror(rv));
return ANET_ERR;
}
for (p = servinfo; p != NULL; p = p->ai_next) {
/* Try to create the socket and to connect it.
* If we fail in the socket() call, or on connect(), we retry with
* the next entry in servinfo. */
if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
continue;
if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
if (flags & ANET_CONNECT_NONBLOCK && anetNonBlock(err,s) != ANET_OK)
goto error;
if (source_addr) {
int bound = 0;
/* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */
if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0)
{
anetSetError(err, "%s", gai_strerror(rv));
goto error;
}
for (b = bservinfo; b != NULL; b = b->ai_next) {
if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
bound = 1;
break;
}
}
freeaddrinfo(bservinfo);
if (!bound) {
anetSetError(err, "bind: %s", strerror(errno));
goto error;
}
}
if (connect(s,p->ai_addr,p->ai_addrlen) == -1) {
/* If the socket is non-blocking, it is ok for connect() to
* return an EINPROGRESS error here. */
if (errno == EINPROGRESS && flags & ANET_CONNECT_NONBLOCK)
goto end;
close(s);
s = ANET_ERR;
continue;
}
/* If we ended an iteration of the for loop without errors, we
* have a connected socket. Let's return to the caller. */
goto end;
}
if (p == NULL)
anetSetError(err, "creating socket: %s", strerror(errno));
error:
if (s != ANET_ERR) {
close(s);
s = ANET_ERR;
}
end:
freeaddrinfo(servinfo);
/* Handle best effort binding: if a binding address was used, but it is
* not possible to create a socket, try again without a binding address. */
if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) {
return anetTcpGenericConnect(err,addr,port,NULL,flags);
} else {
return s;
}
}
int anetTcpConnect(char *err, const char *addr, int port)
{
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONE);
}
int anetTcpNonBlockConnect(char *err, const char *addr, int port)
{
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK);
}
int anetTcpNonBlockBindConnect(char *err, const char *addr, int port,
const char *source_addr)
{
return anetTcpGenericConnect(err,addr,port,source_addr,
ANET_CONNECT_NONBLOCK);
}
int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port,
const char *source_addr)
{
return anetTcpGenericConnect(err,addr,port,source_addr,
ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING);
}
int anetUnixGenericConnect(char *err, const char *path, int flags)
{
int s;
struct sockaddr_un sa;
if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR)
return ANET_ERR;
sa.sun_family = AF_LOCAL;
strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1);
if (flags & ANET_CONNECT_NONBLOCK) {
if (anetNonBlock(err,s) != ANET_OK) {
close(s);
return ANET_ERR;
}
}
if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) {
if (errno == EINPROGRESS &&
flags & ANET_CONNECT_NONBLOCK)
return s;
anetSetError(err, "connect: %s", strerror(errno));
close(s);
return ANET_ERR;
}
return s;
}
int anetUnixConnect(char *err, const char *path)
{
return anetUnixGenericConnect(err,path,ANET_CONNECT_NONE);
}
int anetUnixNonBlockConnect(char *err, const char *path)
{
return anetUnixGenericConnect(err,path,ANET_CONNECT_NONBLOCK);
}
/* Like read(2) but make sure 'count' is read before to return
* (unless error or EOF condition is encountered) */
int anetRead(int fd, char *buf, int count)
{
ssize_t nread, totlen = 0;
while(totlen != count) {
nread = read(fd,buf,count-totlen);
if (nread == 0) return totlen;
if (nread == -1) return -1;
totlen += nread;
buf += nread;
}
return totlen;
}
/* Like write(2) but make sure 'count' is written before to return
* (unless error is encountered) */
int anetWrite(int fd, char *buf, int count)
{
ssize_t nwritten, totlen = 0;
while(totlen != count) {
nwritten = write(fd,buf,count-totlen);
if (nwritten == 0) return totlen;
if (nwritten == -1) return -1;
totlen += nwritten;
buf += nwritten;
}
return totlen;
}
static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len, int backlog) {
if (bind(s,sa,len) == -1) {
anetSetError(err, "bind: %s", strerror(errno));
close(s);
return ANET_ERR;
}
if (listen(s, backlog) == -1) {
anetSetError(err, "listen: %s", strerror(errno));
close(s);
return ANET_ERR;
}
return ANET_OK;
}
static int anetV6Only(char *err, int s) {
int yes = 1;
if (setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,&yes,sizeof(yes)) == -1) {
anetSetError(err, "setsockopt: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog)
{
int s = -1, rv;
char _port[6]; /* strlen("65535") */
struct addrinfo hints, *servinfo, *p;
snprintf(_port,6,"%d",port);
memset(&hints,0,sizeof(hints));
hints.ai_family = af;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE; /* No effect if bindaddr != NULL */
if (bindaddr && !strcmp("*", bindaddr))
bindaddr = NULL;
if (af == AF_INET6 && bindaddr && !strcmp("::*", bindaddr))
bindaddr = NULL;
if ((rv = getaddrinfo(bindaddr,_port,&hints,&servinfo)) != 0) {
anetSetError(err, "%s", gai_strerror(rv));
return ANET_ERR;
}
for (p = servinfo; p != NULL; p = p->ai_next) {
if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
continue;
if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error;
if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog) == ANET_ERR) s = ANET_ERR;
goto end;
}
if (p == NULL) {
anetSetError(err, "unable to bind socket, errno: %d", errno);
goto error;
}
error:
if (s != -1) close(s);
s = ANET_ERR;
end:
freeaddrinfo(servinfo);
return s;
}
int anetTcpServer(char *err, int port, char *bindaddr, int backlog)
{
return _anetTcpServer(err, port, bindaddr, AF_INET, backlog);
}
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog)
{
return _anetTcpServer(err, port, bindaddr, AF_INET6, backlog);
}
int anetUnixServer(char *err, char *path, mode_t perm, int backlog)
{
int s;
struct sockaddr_un sa;
if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR)
return ANET_ERR;
memset(&sa,0,sizeof(sa));
sa.sun_family = AF_LOCAL;
strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1);
if (anetListen(err,s,(struct sockaddr*)&sa,sizeof(sa),backlog) == ANET_ERR)
return ANET_ERR;
if (perm)
chmod(sa.sun_path, perm);
return s;
}
static int anetGenericAccept(char *err, int s, struct sockaddr *sa, socklen_t *len) {
int fd;
while(1) {
fd = accept(s,sa,len);
if (fd == -1) {
if (errno == EINTR)
continue;
else {
anetSetError(err, "accept: %s", strerror(errno));
return ANET_ERR;
}
}
break;
}
return fd;
}
int anetTcpAccept(char *err, int s, char *ip, size_t ip_len, int *port) {
int fd;
struct sockaddr_storage sa;
socklen_t salen = sizeof(sa);
if ((fd = anetGenericAccept(err,s,(struct sockaddr*)&sa,&salen)) == -1)
return ANET_ERR;
if (sa.ss_family == AF_INET) {
struct sockaddr_in *s = (struct sockaddr_in *)&sa;
if (ip) inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len);
if (port) *port = ntohs(s->sin_port);
} else {
struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa;
if (ip) inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len);
if (port) *port = ntohs(s->sin6_port);
}
return fd;
}
int anetUnixAccept(char *err, int s) {
int fd;
struct sockaddr_un sa;
socklen_t salen = sizeof(sa);
if ((fd = anetGenericAccept(err,s,(struct sockaddr*)&sa,&salen)) == -1)
return ANET_ERR;
return fd;
}
int anetFdToString(int fd, char *ip, size_t ip_len, int *port, int fd_to_str_type) {
struct sockaddr_storage sa;
socklen_t salen = sizeof(sa);
if (fd_to_str_type == FD_TO_PEER_NAME) {
if (getpeername(fd, (struct sockaddr *)&sa, &salen) == -1) goto error;
} else {
if (getsockname(fd, (struct sockaddr *)&sa, &salen) == -1) goto error;
}
if (ip_len == 0) goto error;
if (sa.ss_family == AF_INET) {
struct sockaddr_in *s = (struct sockaddr_in *)&sa;
if (ip) inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len);
if (port) *port = ntohs(s->sin_port);
} else if (sa.ss_family == AF_INET6) {
struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa;
if (ip) inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len);
if (port) *port = ntohs(s->sin6_port);
} else if (sa.ss_family == AF_UNIX) {
if (ip) snprintf(ip, ip_len, "/unixsocket");
if (port) *port = 0;
} else {
goto error;
}
return 0;
error:
if (ip) {
if (ip_len >= 2) {
ip[0] = '?';
ip[1] = '\0';
} else if (ip_len == 1) {
ip[0] = '\0';
}
}
if (port) *port = 0;
return -1;
}
/* Format an IP,port pair into something easy to parse. If IP is IPv6
* (matches for ":"), the ip is surrounded by []. IP and port are just
* separated by colons. This the standard to display addresses within Redis. */
int anetFormatAddr(char *buf, size_t buf_len, char *ip, int port) {
return snprintf(buf,buf_len, strchr(ip,':') ?
"[%s]:%d" : "%s:%d", ip, port);
}
/* Like anetFormatAddr() but extract ip and port from the socket's peer/sockname. */
int anetFormatFdAddr(int fd, char *buf, size_t buf_len, int fd_to_str_type) {
char ip[INET6_ADDRSTRLEN];
int port;
anetFdToString(fd,ip,sizeof(ip),&port,fd_to_str_type);
return anetFormatAddr(buf, buf_len, ip, port);
}