Improve valgrind support for cluster tests (#7725)

- redirect valgrind reports to a dedicated file rather than console
- try to avoid killing instances with SIGKILL so that we get the memory
  leak report (killing with SIGTERM before resorting to SIGKILL)
- search for valgrind reports when done, print them and fail the tests
- add --dont-clean option to keep the logs on exit
- fix exit error code when crash is found (would have exited with 0)

changes that affect the normal redis test suite:
- refactor check_valgrind_errors into two functions one to search and
  one to report
- move the search half into util.tcl to serve the cluster tests too
- ignore "address range perms" valgrind warnings which seem non relevant.
This commit is contained in:
Oran Agra 2020-09-06 11:11:49 +03:00 committed by GitHub
parent fe5da2e60d
commit 2b998de460
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 23 deletions

View File

@ -19,6 +19,7 @@ set ::verbose 0
set ::valgrind 0
set ::tls 0
set ::pause_on_error 0
set ::dont_clean 0
set ::simulate_error 0
set ::failed 0
set ::sentinel_instances {}
@ -38,7 +39,7 @@ if {[catch {cd tmp}]} {
# Execute the specified instance of the server specified by 'type', using
# the provided configuration file. Returns the PID of the process.
proc exec_instance {type cfgfile} {
proc exec_instance {type dirname cfgfile} {
if {$type eq "redis"} {
set prgname redis-server
} elseif {$type eq "sentinel"} {
@ -47,8 +48,9 @@ proc exec_instance {type cfgfile} {
error "Unknown instance type."
}
set errfile [file join $dirname err.txt]
if {$::valgrind} {
set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &]
set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile 2>> $errfile &]
} else {
set pid [exec ../../../src/${prgname} $cfgfile &]
}
@ -93,7 +95,7 @@ proc spawn_instance {type base_port count {conf {}}} {
# Finally exec it and remember the pid for later cleanup.
set retry 100
while {$retry} {
set pid [exec_instance $type $cfgfile]
set pid [exec_instance $type $dirname $cfgfile]
# Check availability
if {[server_is_up 127.0.0.1 $port 100] == 0} {
@ -144,16 +146,60 @@ proc log_crashes {} {
puts "\n*** Crash report found in $log ***"
set found 1
}
if {$found} {puts $line}
if {$found} {
puts $line
incr ::failed
}
}
}
set logs [glob */err.txt]
foreach log $logs {
set res [find_valgrind_errors $log]
if {$res != ""} {
puts $res
incr ::failed
}
}
}
proc is_alive pid {
if {[catch {exec ps -p $pid} err]} {
return 0
} else {
return 1
}
}
proc stop_instance pid {
catch {exec kill $pid}
if {$::valgrind} {
set max_wait 60000
} else {
set max_wait 10000
}
while {[is_alive $pid]} {
incr wait 10
if {$wait >= $max_wait} {
puts "Forcing process $pid to exit..."
catch {exec kill -KILL $pid}
} elseif {$wait % 1000 == 0} {
puts "Waiting for process $pid to exit..."
}
after 10
}
}
proc cleanup {} {
puts "Cleaning up..."
log_crashes
foreach pid $::pids {
catch {exec kill -9 $pid}
puts "killing stale instance $pid"
stop_instance $pid
}
log_crashes
if {$::dont_clean} {
return
}
foreach dir $::dirs {
catch {exec rm -rf $dir}
@ -178,6 +224,8 @@ proc parse_options {} {
set ::run_matching "*${val}*"
} elseif {$opt eq "--pause-on-error"} {
set ::pause_on_error 1
} elseif {$opt eq {--dont-clean}} {
set ::dont_clean 1
} elseif {$opt eq "--fail"} {
set ::simulate_error 1
} elseif {$opt eq {--valgrind}} {
@ -191,6 +239,7 @@ proc parse_options {} {
set ::tls 1
} elseif {$opt eq "--help"} {
puts "--single <pattern> Only runs tests specified by pattern."
puts "--dont-clean Keep log files on exit."
puts "--pause-on-error Pause for manual inspection on error."
puts "--fail Simulate a test failure."
puts "--valgrind Run with valgrind."
@ -486,7 +535,7 @@ proc kill_instance {type id} {
error "You tried to kill $type $id twice."
}
exec kill -9 $pid
stop_instance $pid
set_instance_attrib $type $id pid -1
set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
@ -521,7 +570,7 @@ proc restart_instance {type id} {
# Execute the instance with its old setup and append the new pid
# file for cleanup.
set pid [exec_instance $type $cfgfile]
set pid [exec_instance $type $dirname $cfgfile]
set_instance_attrib $type $id pid $pid
lappend ::pids $pid

View File

@ -13,21 +13,9 @@ proc start_server_error {config_file error} {
}
proc check_valgrind_errors stderr {
set fd [open $stderr]
set buf [read $fd]
close $fd
# look for stack trace and other errors, or the absense of a leak free summary
if {[regexp -- { at 0x} $buf] ||
[regexp -- {Warning} $buf] ||
[regexp -- {Invalid} $buf] ||
[regexp -- {Mismatched} $buf] ||
[regexp -- {uninitialized} $buf] ||
[regexp -- {has a fishy} $buf] ||
[regexp -- {overlap} $buf] ||
(![regexp -- {definitely lost: 0 bytes} $buf] &&
![regexp -- {no leaks are possible} $buf])} {
send_data_packet $::test_server_fd err "Valgrind error: $buf\n"
set res [find_valgrind_errors $stderr]
if {$res != ""} {
send_data_packet $::test_server_fd err "Valgrind error: $res\n"
}
}

View File

@ -432,6 +432,29 @@ proc colorstr {color str} {
}
}
proc find_valgrind_errors {stderr} {
set fd [open $stderr]
set buf [read $fd]
close $fd
# Look for stack trace (" at 0x") and other errors (Invalid, Mismatched, etc).
# Look for "Warnings", but not the "set address range perms". These don't indicate any real concern.
# Look for the absense of a leak free summary (happens when redis isn't terminated properly).
if {[regexp -- { at 0x} $buf] ||
[regexp -- {^(?=.*Warning)(?:(?!set address range perms).)*$} $buf] ||
[regexp -- {Invalid} $buf] ||
[regexp -- {Mismatched} $buf] ||
[regexp -- {uninitialized} $buf] ||
[regexp -- {has a fishy} $buf] ||
[regexp -- {overlap} $buf] ||
(![regexp -- {definitely lost: 0 bytes} $buf] &&
![regexp -- {no leaks are possible} $buf])} {
return $buf
}
return ""
}
# Execute a background process writing random data for the specified number
# of seconds to the specified Redis instance.
proc start_write_load {host port seconds} {