redict/tests/integration/dismiss-mem.tcl

# The tests of this file aim to get coverage on all the "dismiss" methods
# that dismiss all data-types memory in the fork child. like client query
# buffer, client output buffer and replication backlog.
# Actually, we may not have many asserts in the test, since we just check for
# crashes and the dump file inconsistencies.

start_server {tags {"dismiss external:skip"}} {
    # In other tests, although we test child process dumping RDB file, but
    # memory allocations of key/values are usually small, they couldn't cover
    # the "dismiss" object methods, in this test, we create big size key/values
    # to satisfy the conditions for release memory pages, especially, we assume
    # the page size of OS is 4KB in some cases.
    test {dismiss all data types memory} {
        set bigstr [string repeat A 8192]
        set 64bytes [string repeat A 64]

        # string
        populate 100 bigstring 8192

        # list
        r lpush biglist1 $bigstr            ; # uncompressed ziplist node
        r config set list-compress-depth 1  ; # compressed ziplist nodes
        for {set i 0} {$i < 16} {incr i} {
            r lpush biglist2 $bigstr
        }

        # set
        r sadd bigset1 $bigstr              ; # hash encoding
        set biginteger [string repeat 1 19]
        for {set i 0} {$i < 512} {incr i} {
            r sadd bigset2 $biginteger      ; # intset encoding
        }

        # zset
        r zadd bigzset1 1.0 $bigstr         ; # skiplist encoding
        for {set i 0} {$i < 128} {incr i} {
            r zadd bigzset2 1.0 $64bytes    ; # ziplist encoding
        }

        # hash
        r hset bighash1 field1 $bigstr      ; # hash encoding
        for {set i 0} {$i < 128} {incr i} {
            r hset bighash2 $i $64bytes     ; # ziplist encoding
        }

        # stream
        r xadd bigstream * entry1 $bigstr entry2 $bigstr

        set digest [debug_digest]
        r config set aof-use-rdb-preamble no
        r bgrewriteaof
        waitForBgrewriteaof r
        r debug loadaof
        set newdigest [debug_digest]
        assert {$digest eq $newdigest}
    }

    test {dismiss client output buffer} {
        # Big output buffer
        set item [string repeat "x" 100000]
        for {set i 0} {$i < 100} {incr i} {
            r lpush mylist $item
        }
        set rd [redis_deferring_client]
        $rd lrange mylist 0 -1
        $rd flush
        after 100

        r bgsave
        waitForBgsave r
        assert_equal $item [r lpop mylist]
    }

    test {dismiss client query buffer} {
        # Big pending query buffer
        set bigstr [string repeat A 8192]
        set rd [redis_deferring_client]
        $rd write "*2\r\n\$8192\r\n"
        $rd write $bigstr\r\n
        $rd flush
        after 100

        r bgsave
        waitForBgsave r
    }

    test {dismiss replication backlog} {
        set master [srv 0 client]
        start_server {} {
            r slaveof [srv -1 host] [srv -1 port]
            wait_for_sync r

            set bigstr [string repeat A 8192]
            for {set i 0} {$i < 20} {incr i} {
                $master set $i $bigstr
            }
            $master bgsave
            waitForBgsave $master
        }
    }
}
Use madvise(MADV_DONTNEED) to release memory to reduce COW (#8974) ## Backgroud As we know, after `fork`, one process will copy pages when writing data to these pages(CoW), and another process still keep old pages, they totally cost more memory. For redis, we suffered that redis consumed much memory when the fork child is serializing key/values, even that maybe cause OOM. But actually we find, in redis fork child process, the child process don't need to keep some memory and parent process may write or update that, for example, child process will never access the key-value that is serialized but users may update it in parent process. So we think it may reduce COW if the child process release memory that it is not needed. ## Implementation For releasing key value in child process, we may think we call `decrRefCount` to free memory, but i find the fork child process still use much memory when we don't write any data to redis, and it costs much more time that slows down bgsave. Maybe because memory allocator doesn't really release memory to OS, and it may modify some inner data for this free operation, especially when we free small objects. Moreover, CoW is based on pages, so it is a easy way that we only free the memory bulk that is not less than kernel page size. madvise(MADV_DONTNEED) can quickly release specified region pages to OS bypassing memory allocator, and allocator still consider that this memory still is used and don't change its inner data. There are some buffers we can release in the fork child process: - Serialized key-values the fork child process never access serialized key-values, so we try to free them. Because we only can release big bulk memory, and it is time consumed to iterate all items/members/fields/entries of complex data type. So we decide to iterate them and try to release them only when their average size of item/member/field/entry is more than page size of OS. - Replication backlog Because replication backlog is a cycle buffer, it will be changed quickly if redis has heavy write traffic, but in fork child process, we don't need to access that. - Client buffers If clients have requests during having the fork child process, clients' buffer also be changed frequently. The memory includes client query buffer, output buffer, and client struct used memory. To get child process peak private dirty memory, we need to count peak memory instead of last used memory, because the child process may continue to release memory (since COW used to only grow till now, the last was equivalent to the peak). Also we're adding a new `current_cow_peak` info variable (to complement the existing `current_cow_size`) Co-authored-by: Oran Agra <oran@redislabs.com> 2021-08-04 16:01:46 -04:00			`# The tests of this file aim to get coverage on all the "dismiss" methods`
			`# that dismiss all data-types memory in the fork child. like client query`
			`# buffer, client output buffer and replication backlog.`
			`# Actually, we may not have many asserts in the test, since we just check for`
			`# crashes and the dump file inconsistencies.`

			`start_server {tags {"dismiss external:skip"}} {`
			`# In other tests, although we test child process dumping RDB file, but`
			`# memory allocations of key/values are usually small, they couldn't cover`
			`# the "dismiss" object methods, in this test, we create big size key/values`
			`# to satisfy the conditions for release memory pages, especially, we assume`
			`# the page size of OS is 4KB in some cases.`
			`test {dismiss all data types memory} {`
			`set bigstr [string repeat A 8192]`
			`set 64bytes [string repeat A 64]`

			`# string`
			`populate 100 bigstring 8192`

			`# list`
			`r lpush biglist1 $bigstr ; # uncompressed ziplist node`
			`r config set list-compress-depth 1 ; # compressed ziplist nodes`
			`for {set i 0} {$i < 16} {incr i} {`
			`r lpush biglist2 $bigstr`
			`}`

			`# set`
			`r sadd bigset1 $bigstr ; # hash encoding`
			`set biginteger [string repeat 1 19]`
			`for {set i 0} {$i < 512} {incr i} {`
			`r sadd bigset2 $biginteger ; # intset encoding`
			`}`

			`# zset`
			`r zadd bigzset1 1.0 $bigstr ; # skiplist encoding`
			`for {set i 0} {$i < 128} {incr i} {`
			`r zadd bigzset2 1.0 $64bytes ; # ziplist encoding`
			`}`

			`# hash`
			`r hset bighash1 field1 $bigstr ; # hash encoding`
			`for {set i 0} {$i < 128} {incr i} {`
			`r hset bighash2 $i $64bytes ; # ziplist encoding`
			`}`

			`# stream`
			`r xadd bigstream * entry1 $bigstr entry2 $bigstr`

Add external test that runs without debug command (#9964) - add needs:debug flag for some tests - disable "save" in external tests (speedup?) - use debug_digest proc instead of debug command directly so it can be skipped - use OBJECT ENCODING instead of DEBUG OBJECT to get encoding - add a proc for OBJECT REFCOUNT so it can be skipped - move a bunch of tests in latency_monitor tests to happen later so that latency monitor has some values in it - add missing close_replication_stream calls - make sure to close the temp client if DEBUG LOG fails 2021-12-19 10:41:51 -05:00			`set digest [debug_digest]`
Use madvise(MADV_DONTNEED) to release memory to reduce COW (#8974) ## Backgroud As we know, after `fork`, one process will copy pages when writing data to these pages(CoW), and another process still keep old pages, they totally cost more memory. For redis, we suffered that redis consumed much memory when the fork child is serializing key/values, even that maybe cause OOM. But actually we find, in redis fork child process, the child process don't need to keep some memory and parent process may write or update that, for example, child process will never access the key-value that is serialized but users may update it in parent process. So we think it may reduce COW if the child process release memory that it is not needed. ## Implementation For releasing key value in child process, we may think we call `decrRefCount` to free memory, but i find the fork child process still use much memory when we don't write any data to redis, and it costs much more time that slows down bgsave. Maybe because memory allocator doesn't really release memory to OS, and it may modify some inner data for this free operation, especially when we free small objects. Moreover, CoW is based on pages, so it is a easy way that we only free the memory bulk that is not less than kernel page size. madvise(MADV_DONTNEED) can quickly release specified region pages to OS bypassing memory allocator, and allocator still consider that this memory still is used and don't change its inner data. There are some buffers we can release in the fork child process: - Serialized key-values the fork child process never access serialized key-values, so we try to free them. Because we only can release big bulk memory, and it is time consumed to iterate all items/members/fields/entries of complex data type. So we decide to iterate them and try to release them only when their average size of item/member/field/entry is more than page size of OS. - Replication backlog Because replication backlog is a cycle buffer, it will be changed quickly if redis has heavy write traffic, but in fork child process, we don't need to access that. - Client buffers If clients have requests during having the fork child process, clients' buffer also be changed frequently. The memory includes client query buffer, output buffer, and client struct used memory. To get child process peak private dirty memory, we need to count peak memory instead of last used memory, because the child process may continue to release memory (since COW used to only grow till now, the last was equivalent to the peak). Also we're adding a new `current_cow_peak` info variable (to complement the existing `current_cow_size`) Co-authored-by: Oran Agra <oran@redislabs.com> 2021-08-04 16:01:46 -04:00			`r config set aof-use-rdb-preamble no`
			`r bgrewriteaof`
			`waitForBgrewriteaof r`
			`r debug loadaof`
Add external test that runs without debug command (#9964) - add needs:debug flag for some tests - disable "save" in external tests (speedup?) - use debug_digest proc instead of debug command directly so it can be skipped - use OBJECT ENCODING instead of DEBUG OBJECT to get encoding - add a proc for OBJECT REFCOUNT so it can be skipped - move a bunch of tests in latency_monitor tests to happen later so that latency monitor has some values in it - add missing close_replication_stream calls - make sure to close the temp client if DEBUG LOG fails 2021-12-19 10:41:51 -05:00			`set newdigest [debug_digest]`
Use madvise(MADV_DONTNEED) to release memory to reduce COW (#8974) ## Backgroud As we know, after `fork`, one process will copy pages when writing data to these pages(CoW), and another process still keep old pages, they totally cost more memory. For redis, we suffered that redis consumed much memory when the fork child is serializing key/values, even that maybe cause OOM. But actually we find, in redis fork child process, the child process don't need to keep some memory and parent process may write or update that, for example, child process will never access the key-value that is serialized but users may update it in parent process. So we think it may reduce COW if the child process release memory that it is not needed. ## Implementation For releasing key value in child process, we may think we call `decrRefCount` to free memory, but i find the fork child process still use much memory when we don't write any data to redis, and it costs much more time that slows down bgsave. Maybe because memory allocator doesn't really release memory to OS, and it may modify some inner data for this free operation, especially when we free small objects. Moreover, CoW is based on pages, so it is a easy way that we only free the memory bulk that is not less than kernel page size. madvise(MADV_DONTNEED) can quickly release specified region pages to OS bypassing memory allocator, and allocator still consider that this memory still is used and don't change its inner data. There are some buffers we can release in the fork child process: - Serialized key-values the fork child process never access serialized key-values, so we try to free them. Because we only can release big bulk memory, and it is time consumed to iterate all items/members/fields/entries of complex data type. So we decide to iterate them and try to release them only when their average size of item/member/field/entry is more than page size of OS. - Replication backlog Because replication backlog is a cycle buffer, it will be changed quickly if redis has heavy write traffic, but in fork child process, we don't need to access that. - Client buffers If clients have requests during having the fork child process, clients' buffer also be changed frequently. The memory includes client query buffer, output buffer, and client struct used memory. To get child process peak private dirty memory, we need to count peak memory instead of last used memory, because the child process may continue to release memory (since COW used to only grow till now, the last was equivalent to the peak). Also we're adding a new `current_cow_peak` info variable (to complement the existing `current_cow_size`) Co-authored-by: Oran Agra <oran@redislabs.com> 2021-08-04 16:01:46 -04:00			`assert {$digest eq $newdigest}`
			`}`

			`test {dismiss client output buffer} {`
			`# Big output buffer`
			`set item [string repeat "x" 100000]`
			`for {set i 0} {$i < 100} {incr i} {`
			`r lpush mylist $item`
			`}`
			`set rd [redis_deferring_client]`
			`$rd lrange mylist 0 -1`
			`$rd flush`
			`after 100`

			`r bgsave`
			`waitForBgsave r`
			`assert_equal $item [r lpop mylist]`
			`}`

			`test {dismiss client query buffer} {`
			`# Big pending query buffer`
			`set bigstr [string repeat A 8192]`
			`set rd [redis_deferring_client]`
			`$rd write "*2\r\n\$8192\r\n"`
			`$rd write $bigstr\r\n`
			`$rd flush`
			`after 100`

			`r bgsave`
			`waitForBgsave r`
			`}`

			`test {dismiss replication backlog} {`
			`set master [srv 0 client]`
			`start_server {} {`
			`r slaveof [srv -1 host] [srv -1 port]`
			`wait_for_sync r`

			`set bigstr [string repeat A 8192]`
			`for {set i 0} {$i < 20} {incr i} {`
			`$master set $i $bigstr`
			`}`
			`$master bgsave`
			`waitForBgsave $master`
			`}`
			`}`
			`}`