redict/tests/support/server.tcl
Chen Tianjie 22a29935ff
Support TLS service when "tls-cluster" is not enabled and persist both plain and TLS port in nodes.conf (#12233)
Originally, when "tls-cluster" is enabled, `port` is set to TLS port. In order to support non-TLS clients, `pport` is used to propagate TCP port across cluster nodes. However when "tls-cluster" is disabled, `port` is set to TCP port, and `pport` is not used, which means the cluster cannot provide TLS service unless "tls-cluster" is on.
```
typedef struct {
    // ...
    uint16_t port;  /* Latest known clients port (TLS or plain). */
    uint16_t pport; /* Latest known clients plaintext port. Only used if the main clients port is for TLS. */
    // ...
} clusterNode;
```
```
typedef struct {
    // ...
    uint16_t port;   /* TCP base port number. */
    uint16_t pport;  /* Sender TCP plaintext port, if base port is TLS */
    // ...
} clusterMsg;
```
This PR renames `port` and `pport` in `clusterNode` to `tcp_port` and `tls_port`, to record both ports no matter "tls-cluster" is enabled or disabled.

This allows to provide TLS service to clients when "tls-cluster" is disabled: when displaying cluster topology, or giving `MOVED` error, server can provide TLS or TCP port according to client's connection type, no matter what type of connection cluster bus is using.

For backwards compatibility, `port` and `pport` in `clusterMsg` are preserved, when "tls-cluster" is enabled, `port` is set to TLS port and `pport` is set to TCP port, when "tls-cluster" is disabled, `port` is set to TCP port and `pport` is set to TLS port (instead of 0).

Also, in the nodes.conf file, a new aux field displaying an extra port is added to complete the persisted info. We may have `tls_port=xxxxx` or `tcp_port=xxxxx` in the aux field, to complete the cluster topology, while the other port is stored in the normal `<ip>:<port>` field. The format is shown below.
```
<node-id> <ip>:<tcp_port>@<cport>,<hostname>,shard-id=...,tls-port=6379 myself,master - 0 0 0 connected 0-1000
```
Or we can switch the position of two ports, both can be correctly resolved.
```
<node-id> <ip>:<tls_port>@<cport>,<hostname>,shard-id=...,tcp-port=6379 myself,master - 0 0 0 connected 0-1000
```
2023-06-26 07:43:38 -07:00

790 lines
25 KiB
Tcl

set ::global_overrides {}
set ::tags {}
set ::valgrind_errors {}
proc start_server_error {config_file error} {
set err {}
append err "Can't start the Redis server\n"
append err "CONFIGURATION:"
append err [exec cat $config_file]
append err "\nERROR:"
append err [string trim $error]
send_data_packet $::test_server_fd err $err
}
proc check_valgrind_errors stderr {
set res [find_valgrind_errors $stderr true]
if {$res != ""} {
send_data_packet $::test_server_fd err "Valgrind error: $res\n"
}
}
proc check_sanitizer_errors stderr {
set res [sanitizer_errors_from_file $stderr]
if {$res != ""} {
send_data_packet $::test_server_fd err "Sanitizer error: $res\n"
}
}
proc clean_persistence config {
# we may wanna keep the logs for later, but let's clean the persistence
# files right away, since they can accumulate and take up a lot of space
set config [dict get $config "config"]
set dir [dict get $config "dir"]
set rdb [format "%s/%s" $dir "dump.rdb"]
if {[dict exists $config "appenddirname"]} {
set aofdir [dict get $config "appenddirname"]
} else {
set aofdir "appendonlydir"
}
set aof_dirpath [format "%s/%s" $dir $aofdir]
clean_aof_persistence $aof_dirpath
catch {exec rm -rf $rdb}
}
proc kill_server config {
# nothing to kill when running against external server
if {$::external} return
# Close client connection if exists
if {[dict exists $config "client"]} {
[dict get $config "client"] close
}
# nevermind if its already dead
if {![is_alive $config]} {
# Check valgrind errors if needed
if {$::valgrind} {
check_valgrind_errors [dict get $config stderr]
}
check_sanitizer_errors [dict get $config stderr]
return
}
set pid [dict get $config pid]
# check for leaks
if {![dict exists $config "skipleaks"]} {
catch {
if {[string match {*Darwin*} [exec uname -a]]} {
tags {"leaks"} {
test "Check for memory leaks (pid $pid)" {
set output {0 leaks}
catch {exec leaks $pid} output option
# In a few tests we kill the server process, so leaks will not find it.
# It'll exits with exit code >1 on error, so we ignore these.
if {[dict exists $option -errorcode]} {
set details [dict get $option -errorcode]
if {[lindex $details 0] eq "CHILDSTATUS"} {
set status [lindex $details 2]
if {$status > 1} {
set output "0 leaks"
}
}
}
set output
} {*0 leaks*}
}
}
}
}
# kill server and wait for the process to be totally exited
send_data_packet $::test_server_fd server-killing $pid
catch {exec kill $pid}
# Node might have been stopped in the test
catch {exec kill -SIGCONT $pid}
if {$::valgrind} {
set max_wait 120000
} else {
set max_wait 10000
}
while {[is_alive $config]} {
incr wait 10
if {$wait == $max_wait} {
puts "Forcing process $pid to crash..."
catch {exec kill -SEGV $pid}
} elseif {$wait >= $max_wait * 2} {
puts "Forcing process $pid to exit..."
catch {exec kill -KILL $pid}
} elseif {$wait % 1000 == 0} {
puts "Waiting for process $pid to exit..."
}
after 10
}
# Check valgrind errors if needed
if {$::valgrind} {
check_valgrind_errors [dict get $config stderr]
}
check_sanitizer_errors [dict get $config stderr]
# Remove this pid from the set of active pids in the test server.
send_data_packet $::test_server_fd server-killed $pid
}
proc is_alive config {
set pid [dict get $config pid]
if {[catch {exec kill -0 $pid} err]} {
return 0
} else {
return 1
}
}
proc ping_server {host port} {
set retval 0
if {[catch {
if {$::tls} {
set fd [::tls::socket $host $port]
} else {
set fd [socket $host $port]
}
fconfigure $fd -translation binary
puts $fd "PING\r\n"
flush $fd
set reply [gets $fd]
if {[string range $reply 0 0] eq {+} ||
[string range $reply 0 0] eq {-}} {
set retval 1
}
close $fd
} e]} {
if {$::verbose} {
puts -nonewline "."
}
} else {
if {$::verbose} {
puts -nonewline "ok"
}
}
return $retval
}
# Return 1 if the server at the specified addr is reachable by PING, otherwise
# returns 0. Performs a try every 50 milliseconds for the specified number
# of retries.
proc server_is_up {host port retrynum} {
after 10 ;# Use a small delay to make likely a first-try success.
set retval 0
while {[incr retrynum -1]} {
if {[catch {ping_server $host $port} ping]} {
set ping 0
}
if {$ping} {return 1}
after 50
}
return 0
}
# Check if current ::tags match requested tags. If ::allowtags are used,
# there must be some intersection. If ::denytags are used, no intersection
# is allowed. Returns 1 if tags are acceptable or 0 otherwise, in which
# case err_return names a return variable for the message to be logged.
proc tags_acceptable {tags err_return} {
upvar $err_return err
# If tags are whitelisted, make sure there's match
if {[llength $::allowtags] > 0} {
set matched 0
foreach tag $::allowtags {
if {[lsearch $tags $tag] >= 0} {
incr matched
}
}
if {$matched < 1} {
set err "Tag: none of the tags allowed"
return 0
}
}
foreach tag $::denytags {
if {[lsearch $tags $tag] >= 0} {
set err "Tag: $tag denied"
return 0
}
}
# some units mess with the client output buffer so we can't really use the req-res logging mechanism.
if {$::log_req_res && [lsearch $tags "logreqres:skip"] >= 0} {
set err "Not supported when running in log-req-res mode"
return 0
}
if {$::external && [lsearch $tags "external:skip"] >= 0} {
set err "Not supported on external server"
return 0
}
if {$::singledb && [lsearch $tags "singledb:skip"] >= 0} {
set err "Not supported on singledb"
return 0
}
if {$::cluster_mode && [lsearch $tags "cluster:skip"] >= 0} {
set err "Not supported in cluster mode"
return 0
}
if {$::tls && [lsearch $tags "tls:skip"] >= 0} {
set err "Not supported in tls mode"
return 0
}
if {!$::large_memory && [lsearch $tags "large-memory"] >= 0} {
set err "large memory flag not provided"
return 0
}
return 1
}
# doesn't really belong here, but highly coupled to code in start_server
proc tags {tags code} {
# If we 'tags' contain multiple tags, quoted and separated by spaces,
# we want to get rid of the quotes in order to have a proper list
set tags [string map { \" "" } $tags]
set ::tags [concat $::tags $tags]
if {![tags_acceptable $::tags err]} {
incr ::num_aborted
send_data_packet $::test_server_fd ignore $err
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
uplevel 1 $code
set ::tags [lrange $::tags 0 end-[llength $tags]]
}
# Write the configuration in the dictionary 'config' in the specified
# file name.
proc create_server_config_file {filename config config_lines} {
set fp [open $filename w+]
foreach directive [dict keys $config] {
puts -nonewline $fp "$directive "
puts $fp [dict get $config $directive]
}
foreach {config_line_directive config_line_args} $config_lines {
puts $fp "$config_line_directive $config_line_args"
}
close $fp
}
proc spawn_server {config_file stdout stderr args} {
set cmd [list src/redis-server $config_file]
set args {*}$args
if {[llength $args] > 0} {
lappend cmd {*}$args
}
if {$::valgrind} {
set pid [exec valgrind --track-origins=yes --trace-children=yes --suppressions=[pwd]/src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full {*}$cmd >> $stdout 2>> $stderr &]
} elseif ($::stack_logging) {
set pid [exec /usr/bin/env MallocStackLogging=1 MallocLogFile=/tmp/malloc_log.txt {*}$cmd >> $stdout 2>> $stderr &]
} else {
# ASAN_OPTIONS environment variable is for address sanitizer. If a test
# tries to allocate huge memory area and expects allocator to return
# NULL, address sanitizer throws an error without this setting.
set pid [exec /usr/bin/env ASAN_OPTIONS=allocator_may_return_null=1 {*}$cmd >> $stdout 2>> $stderr &]
}
if {$::wait_server} {
set msg "server started PID: $pid. press any key to continue..."
puts $msg
read stdin 1
}
# Tell the test server about this new instance.
send_data_packet $::test_server_fd server-spawned $pid
return $pid
}
# Wait for actual startup, return 1 if port is busy, 0 otherwise
proc wait_server_started {config_file stdout pid} {
set checkperiod 100; # Milliseconds
set maxiter [expr {120*1000/$checkperiod}] ; # Wait up to 2 minutes.
set port_busy 0
while 1 {
if {[regexp -- " PID: $pid.*Server initialized" [exec cat $stdout]]} {
break
}
after $checkperiod
incr maxiter -1
if {$maxiter == 0} {
start_server_error $config_file "No PID detected in log $stdout"
puts "--- LOG CONTENT ---"
puts [exec cat $stdout]
puts "-------------------"
break
}
# Check if the port is actually busy and the server failed
# for this reason.
if {[regexp {Failed listening on port} [exec cat $stdout]]} {
set port_busy 1
break
}
}
return $port_busy
}
proc dump_server_log {srv} {
set pid [dict get $srv "pid"]
puts "\n===== Start of server log (pid $pid) =====\n"
puts [exec cat [dict get $srv "stdout"]]
puts "===== End of server log (pid $pid) =====\n"
puts "\n===== Start of server stderr log (pid $pid) =====\n"
puts [exec cat [dict get $srv "stderr"]]
puts "===== End of server stderr log (pid $pid) =====\n"
}
proc run_external_server_test {code overrides} {
set srv {}
dict set srv "host" $::host
dict set srv "port" $::port
set client [redis $::host $::port 0 $::tls]
dict set srv "client" $client
if {!$::singledb} {
$client select 9
}
set config {}
dict set config "port" $::port
dict set srv "config" $config
# append the server to the stack
lappend ::servers $srv
if {[llength $::servers] > 1} {
if {$::verbose} {
puts "Notice: nested start_server statements in external server mode, test must be aware of that!"
}
}
r flushall
r function flush
# store overrides
set saved_config {}
foreach {param val} $overrides {
dict set saved_config $param [lindex [r config get $param] 1]
r config set $param $val
# If we enable appendonly, wait for for rewrite to complete. This is
# required for tests that begin with a bg* command which will fail if
# the rewriteaof operation is not completed at this point.
if {$param == "appendonly" && $val == "yes"} {
waitForBgrewriteaof r
}
}
if {[catch {set retval [uplevel 2 $code]} error]} {
if {$::durable} {
set msg [string range $error 10 end]
lappend details $msg
lappend details $::errorInfo
lappend ::tests_failed $details
incr ::num_failed
send_data_packet $::test_server_fd err [join $details "\n"]
} else {
# Re-raise, let handler up the stack take care of this.
error $error $::errorInfo
}
}
# restore overrides
dict for {param val} $saved_config {
r config set $param $val
}
set srv [lpop ::servers]
if {[dict exists $srv "client"]} {
[dict get $srv "client"] close
}
}
proc start_server {options {code undefined}} {
# setup defaults
set baseconfig "default.conf"
set overrides {}
set omit {}
set tags {}
set args {}
set keep_persistence false
set config_lines {}
# parse options
foreach {option value} $options {
switch $option {
"config" {
set baseconfig $value
}
"overrides" {
set overrides [concat $overrides $value]
}
"config_lines" {
set config_lines $value
}
"args" {
set args $value
}
"omit" {
set omit $value
}
"tags" {
# If we 'tags' contain multiple tags, quoted and separated by spaces,
# we want to get rid of the quotes in order to have a proper list
set tags [string map { \" "" } $value]
set ::tags [concat $::tags $tags]
}
"keep_persistence" {
set keep_persistence $value
}
default {
error "Unknown option $option"
}
}
}
# We skip unwanted tags
if {![tags_acceptable $::tags err]} {
incr ::num_aborted
send_data_packet $::test_server_fd ignore $err
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
# If we are running against an external server, we just push the
# host/port pair in the stack the first time
if {$::external} {
run_external_server_test $code $overrides
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
set data [split [exec cat "tests/assets/$baseconfig"] "\n"]
set config {}
if {$::tls} {
if {$::tls_module} {
lappend config_lines [list "loadmodule" [format "%s/src/redis-tls.so" [pwd]]]
}
dict set config "tls-cert-file" [format "%s/tests/tls/server.crt" [pwd]]
dict set config "tls-key-file" [format "%s/tests/tls/server.key" [pwd]]
dict set config "tls-client-cert-file" [format "%s/tests/tls/client.crt" [pwd]]
dict set config "tls-client-key-file" [format "%s/tests/tls/client.key" [pwd]]
dict set config "tls-dh-params-file" [format "%s/tests/tls/redis.dh" [pwd]]
dict set config "tls-ca-cert-file" [format "%s/tests/tls/ca.crt" [pwd]]
dict set config "loglevel" "debug"
}
foreach line $data {
if {[string length $line] > 0 && [string index $line 0] ne "#"} {
set elements [split $line " "]
set directive [lrange $elements 0 0]
set arguments [lrange $elements 1 end]
dict set config $directive $arguments
}
}
# use a different directory every time a server is started
dict set config dir [tmpdir server]
# start every server on a different port
set port [find_available_port $::baseport $::portcount]
if {$::tls} {
set pport [find_available_port $::baseport $::portcount]
dict set config "port" $pport
dict set config "tls-port" $port
dict set config "tls-cluster" "yes"
dict set config "tls-replication" "yes"
} else {
dict set config port $port
}
set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]]
dict set config "unixsocket" $unixsocket
# apply overrides from global space and arguments
foreach {directive arguments} [concat $::global_overrides $overrides] {
dict set config $directive $arguments
}
# remove directives that are marked to be omitted
foreach directive $omit {
dict unset config $directive
}
if {$::log_req_res} {
dict set config "req-res-logfile" "stdout.reqres"
}
if {$::force_resp3} {
dict set config "client-default-resp" "3"
}
# write new configuration to temporary file
set config_file [tmpfile redis.conf]
create_server_config_file $config_file $config $config_lines
set stdout [format "%s/%s" [dict get $config "dir"] "stdout"]
set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
# if we're inside a test, write the test name to the server log file
if {[info exists ::cur_test]} {
set fd [open $stdout "a+"]
puts $fd "### Starting server for test $::cur_test"
close $fd
if {$::verbose > 1} {
puts "### Starting server $stdout for test - $::cur_test"
}
}
# We may have a stdout left over from the previous tests, so we need
# to get the current count of ready logs
set previous_ready_count [count_message_lines $stdout "Ready to accept"]
# We need a loop here to retry with different ports.
set server_started 0
while {$server_started == 0} {
if {$::verbose} {
puts -nonewline "=== ($tags) Starting server ${::host}:${port} "
}
send_data_packet $::test_server_fd "server-spawning" "port $port"
set pid [spawn_server $config_file $stdout $stderr $args]
# check that the server actually started
set port_busy [wait_server_started $config_file $stdout $pid]
# Sometimes we have to try a different port, even if we checked
# for availability. Other test clients may grab the port before we
# are able to do it for example.
if {$port_busy} {
puts "Port $port was already busy, trying another port..."
set port [find_available_port $::baseport $::portcount]
if {$::tls} {
set pport [find_available_port $::baseport $::portcount]
dict set config port $pport
dict set config "tls-port" $port
} else {
dict set config port $port
}
create_server_config_file $config_file $config $config_lines
# Truncate log so wait_server_started will not be looking at
# output of the failed server.
close [open $stdout "w"]
continue; # Try again
}
if {$::valgrind} {set retrynum 1000} else {set retrynum 100}
if {$code ne "undefined"} {
set serverisup [server_is_up $::host $port $retrynum]
} else {
set serverisup 1
}
if {$::verbose} {
puts ""
}
if {!$serverisup} {
set err {}
append err [exec cat $stdout] "\n" [exec cat $stderr]
start_server_error $config_file $err
return
}
set server_started 1
}
# setup properties to be able to initialize a client object
set port_param [expr $::tls ? {"tls-port"} : {"port"}]
set host $::host
if {[dict exists $config bind]} { set host [dict get $config bind] }
if {[dict exists $config $port_param]} { set port [dict get $config $port_param] }
# setup config dict
dict set srv "config_file" $config_file
dict set srv "config" $config
dict set srv "pid" $pid
dict set srv "host" $host
dict set srv "port" $port
dict set srv "stdout" $stdout
dict set srv "stderr" $stderr
dict set srv "unixsocket" $unixsocket
if {$::tls} {
dict set srv "pport" $pport
}
# if a block of code is supplied, we wait for the server to become
# available, create a client object and kill the server afterwards
if {$code ne "undefined"} {
set line [exec head -n1 $stdout]
if {[string match {*already in use*} $line]} {
error_and_quit $config_file $line
}
while 1 {
# check that the server actually started and is ready for connections
if {[count_message_lines $stdout "Ready to accept"] > $previous_ready_count} {
break
}
after 10
}
# append the server to the stack
lappend ::servers $srv
# connect client (after server dict is put on the stack)
reconnect
# remember previous num_failed to catch new errors
set prev_num_failed $::num_failed
# execute provided block
set num_tests $::num_tests
if {[catch { uplevel 1 $code } error]} {
set backtrace $::errorInfo
set assertion [string match "assertion:*" $error]
# fetch srv back from the server list, in case it was restarted by restart_server (new PID)
set srv [lindex $::servers end]
# pop the server object
set ::servers [lrange $::servers 0 end-1]
# Kill the server without checking for leaks
dict set srv "skipleaks" 1
kill_server $srv
if {$::dump_logs && $assertion} {
# if we caught an assertion ($::num_failed isn't incremented yet)
# this happens when the test spawns a server and not the other way around
dump_server_log $srv
} else {
# Print crash report from log
set crashlog [crashlog_from_file [dict get $srv "stdout"]]
if {[string length $crashlog] > 0} {
puts [format "\nLogged crash report (pid %d):" [dict get $srv "pid"]]
puts "$crashlog"
puts ""
}
set sanitizerlog [sanitizer_errors_from_file [dict get $srv "stderr"]]
if {[string length $sanitizerlog] > 0} {
puts [format "\nLogged sanitizer errors (pid %d):" [dict get $srv "pid"]]
puts "$sanitizerlog"
puts ""
}
}
if {!$assertion && $::durable} {
# durable is meant to prevent the whole tcl test from exiting on
# an exception. an assertion will be caught by the test proc.
set msg [string range $error 10 end]
lappend details $msg
lappend details $backtrace
lappend ::tests_failed $details
incr ::num_failed
send_data_packet $::test_server_fd err [join $details "\n"]
} else {
# Re-raise, let handler up the stack take care of this.
error $error $backtrace
}
} else {
if {$::dump_logs && $prev_num_failed != $::num_failed} {
dump_server_log $srv
}
}
# fetch srv back from the server list, in case it was restarted by restart_server (new PID)
set srv [lindex $::servers end]
# Don't do the leak check when no tests were run
if {$num_tests == $::num_tests} {
dict set srv "skipleaks" 1
}
# pop the server object
set ::servers [lrange $::servers 0 end-1]
set ::tags [lrange $::tags 0 end-[llength $tags]]
kill_server $srv
if {!$keep_persistence} {
clean_persistence $srv
}
set _ ""
} else {
set ::tags [lrange $::tags 0 end-[llength $tags]]
set _ $srv
}
}
# Start multiple servers with the same options, run code, then stop them.
proc start_multiple_servers {num options code} {
for {set i 0} {$i < $num} {incr i} {
set code [list start_server $options $code]
}
uplevel 1 $code
}
proc restart_server {level wait_ready rotate_logs {reconnect 1} {shutdown sigterm}} {
set srv [lindex $::servers end+$level]
if {$shutdown ne {sigterm}} {
catch {[dict get $srv "client"] shutdown $shutdown}
}
# Kill server doesn't mind if the server is already dead
kill_server $srv
# Remove the default client from the server
dict unset srv "client"
set pid [dict get $srv "pid"]
set stdout [dict get $srv "stdout"]
set stderr [dict get $srv "stderr"]
if {$rotate_logs} {
set ts [clock format [clock seconds] -format %y%m%d%H%M%S]
file rename $stdout $stdout.$ts.$pid
file rename $stderr $stderr.$ts.$pid
}
set prev_ready_count [count_message_lines $stdout "Ready to accept"]
# if we're inside a test, write the test name to the server log file
if {[info exists ::cur_test]} {
set fd [open $stdout "a+"]
puts $fd "### Restarting server for test $::cur_test"
close $fd
}
set config_file [dict get $srv "config_file"]
set pid [spawn_server $config_file $stdout $stderr {}]
# check that the server actually started
wait_server_started $config_file $stdout $pid
# update the pid in the servers list
dict set srv "pid" $pid
# re-set $srv in the servers list
lset ::servers end+$level $srv
if {$wait_ready} {
while 1 {
# check that the server actually started and is ready for connections
if {[count_message_lines $stdout "Ready to accept"] > $prev_ready_count} {
break
}
after 10
}
}
if {$reconnect} {
reconnect $level
}
}