287 lines
7.3 KiB
C
287 lines
7.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#define _GNU_SOURCE
|
|
|
|
#include <linux/limits.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <signal.h>
|
|
#include <sys/sysinfo.h>
|
|
#include <string.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include "../kselftest.h"
|
|
#include "cgroup_util.h"
|
|
|
|
static int read_int(const char *path, size_t *value)
|
|
{
|
|
FILE *file;
|
|
int ret = 0;
|
|
|
|
file = fopen(path, "r");
|
|
if (!file)
|
|
return -1;
|
|
if (fscanf(file, "%ld", value) != 1)
|
|
ret = -1;
|
|
fclose(file);
|
|
return ret;
|
|
}
|
|
|
|
static int set_min_free_kb(size_t value)
|
|
{
|
|
FILE *file;
|
|
int ret;
|
|
|
|
file = fopen("/proc/sys/vm/min_free_kbytes", "w");
|
|
if (!file)
|
|
return -1;
|
|
ret = fprintf(file, "%ld\n", value);
|
|
fclose(file);
|
|
return ret;
|
|
}
|
|
|
|
static int read_min_free_kb(size_t *value)
|
|
{
|
|
return read_int("/proc/sys/vm/min_free_kbytes", value);
|
|
}
|
|
|
|
static int get_zswap_stored_pages(size_t *value)
|
|
{
|
|
return read_int("/sys/kernel/debug/zswap/stored_pages", value);
|
|
}
|
|
|
|
static int get_zswap_written_back_pages(size_t *value)
|
|
{
|
|
return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
|
|
}
|
|
|
|
static int allocate_bytes(const char *cgroup, void *arg)
|
|
{
|
|
size_t size = (size_t)arg;
|
|
char *mem = (char *)malloc(size);
|
|
|
|
if (!mem)
|
|
return -1;
|
|
for (int i = 0; i < size; i += 4095)
|
|
mem[i] = 'a';
|
|
free(mem);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* When trying to store a memcg page in zswap, if the memcg hits its memory
|
|
* limit in zswap, writeback should not be triggered.
|
|
*
|
|
* This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
|
|
* not zswap"). Needs to be revised when a per memcg writeback mechanism is
|
|
* implemented.
|
|
*/
|
|
static int test_no_invasive_cgroup_shrink(const char *root)
|
|
{
|
|
size_t written_back_before, written_back_after;
|
|
int ret = KSFT_FAIL;
|
|
char *test_group;
|
|
|
|
/* Set up */
|
|
test_group = cg_name(root, "no_shrink_test");
|
|
if (!test_group)
|
|
goto out;
|
|
if (cg_create(test_group))
|
|
goto out;
|
|
if (cg_write(test_group, "memory.max", "1M"))
|
|
goto out;
|
|
if (cg_write(test_group, "memory.zswap.max", "10K"))
|
|
goto out;
|
|
if (get_zswap_written_back_pages(&written_back_before))
|
|
goto out;
|
|
|
|
/* Allocate 10x memory.max to push memory into zswap */
|
|
if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
|
|
goto out;
|
|
|
|
/* Verify that no writeback happened because of the memcg allocation */
|
|
if (get_zswap_written_back_pages(&written_back_after))
|
|
goto out;
|
|
if (written_back_after == written_back_before)
|
|
ret = KSFT_PASS;
|
|
out:
|
|
cg_destroy(test_group);
|
|
free(test_group);
|
|
return ret;
|
|
}
|
|
|
|
struct no_kmem_bypass_child_args {
|
|
size_t target_alloc_bytes;
|
|
size_t child_allocated;
|
|
};
|
|
|
|
static int no_kmem_bypass_child(const char *cgroup, void *arg)
|
|
{
|
|
struct no_kmem_bypass_child_args *values = arg;
|
|
void *allocation;
|
|
|
|
allocation = malloc(values->target_alloc_bytes);
|
|
if (!allocation) {
|
|
values->child_allocated = true;
|
|
return -1;
|
|
}
|
|
for (long i = 0; i < values->target_alloc_bytes; i += 4095)
|
|
((char *)allocation)[i] = 'a';
|
|
values->child_allocated = true;
|
|
pause();
|
|
free(allocation);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* When pages owned by a memcg are pushed to zswap by kswapd, they should be
|
|
* charged to that cgroup. This wasn't the case before commit
|
|
* cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
|
|
*
|
|
* The test first allocates memory in a memcg, then raises min_free_kbytes to
|
|
* a very high value so that the allocation falls below low wm, then makes
|
|
* another allocation to trigger kswapd that should push the memcg-owned pages
|
|
* to zswap and verifies that the zswap pages are correctly charged.
|
|
*
|
|
* To be run on a VM with at most 4G of memory.
|
|
*/
|
|
static int test_no_kmem_bypass(const char *root)
|
|
{
|
|
size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
|
|
struct no_kmem_bypass_child_args *values;
|
|
size_t trigger_allocation_size;
|
|
int wait_child_iteration = 0;
|
|
long stored_pages_threshold;
|
|
struct sysinfo sys_info;
|
|
int ret = KSFT_FAIL;
|
|
int child_status;
|
|
char *test_group;
|
|
pid_t child_pid;
|
|
|
|
/* Read sys info and compute test values accordingly */
|
|
if (sysinfo(&sys_info) != 0)
|
|
return KSFT_FAIL;
|
|
if (sys_info.totalram > 5000000000)
|
|
return KSFT_SKIP;
|
|
values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
|
|
PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
|
if (values == MAP_FAILED)
|
|
return KSFT_FAIL;
|
|
if (read_min_free_kb(&min_free_kb_original))
|
|
return KSFT_FAIL;
|
|
min_free_kb_high = sys_info.totalram / 2000;
|
|
min_free_kb_low = sys_info.totalram / 500000;
|
|
values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
|
|
sys_info.totalram * 5 / 100;
|
|
stored_pages_threshold = sys_info.totalram / 5 / 4096;
|
|
trigger_allocation_size = sys_info.totalram / 20;
|
|
|
|
/* Set up test memcg */
|
|
if (cg_write(root, "cgroup.subtree_control", "+memory"))
|
|
goto out;
|
|
test_group = cg_name(root, "kmem_bypass_test");
|
|
if (!test_group)
|
|
goto out;
|
|
|
|
/* Spawn memcg child and wait for it to allocate */
|
|
set_min_free_kb(min_free_kb_low);
|
|
if (cg_create(test_group))
|
|
goto out;
|
|
values->child_allocated = false;
|
|
child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
|
|
if (child_pid < 0)
|
|
goto out;
|
|
while (!values->child_allocated && wait_child_iteration++ < 10000)
|
|
usleep(1000);
|
|
|
|
/* Try to wakeup kswapd and let it push child memory to zswap */
|
|
set_min_free_kb(min_free_kb_high);
|
|
for (int i = 0; i < 20; i++) {
|
|
size_t stored_pages;
|
|
char *trigger_allocation = malloc(trigger_allocation_size);
|
|
|
|
if (!trigger_allocation)
|
|
break;
|
|
for (int i = 0; i < trigger_allocation_size; i += 4095)
|
|
trigger_allocation[i] = 'b';
|
|
usleep(100000);
|
|
free(trigger_allocation);
|
|
if (get_zswap_stored_pages(&stored_pages))
|
|
break;
|
|
if (stored_pages < 0)
|
|
break;
|
|
/* If memory was pushed to zswap, verify it belongs to memcg */
|
|
if (stored_pages > stored_pages_threshold) {
|
|
int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
|
|
int delta = stored_pages * 4096 - zswapped;
|
|
int result_ok = delta < stored_pages * 4096 / 4;
|
|
|
|
ret = result_ok ? KSFT_PASS : KSFT_FAIL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
kill(child_pid, SIGTERM);
|
|
waitpid(child_pid, &child_status, 0);
|
|
out:
|
|
set_min_free_kb(min_free_kb_original);
|
|
cg_destroy(test_group);
|
|
free(test_group);
|
|
return ret;
|
|
}
|
|
|
|
#define T(x) { x, #x }
|
|
struct zswap_test {
|
|
int (*fn)(const char *root);
|
|
const char *name;
|
|
} tests[] = {
|
|
T(test_no_kmem_bypass),
|
|
T(test_no_invasive_cgroup_shrink),
|
|
};
|
|
#undef T
|
|
|
|
static bool zswap_configured(void)
|
|
{
|
|
return access("/sys/module/zswap", F_OK) == 0;
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
char root[PATH_MAX];
|
|
int i, ret = EXIT_SUCCESS;
|
|
|
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
|
|
|
if (!zswap_configured())
|
|
ksft_exit_skip("zswap isn't configured\n");
|
|
|
|
/*
|
|
* Check that memory controller is available:
|
|
* memory is listed in cgroup.controllers
|
|
*/
|
|
if (cg_read_strstr(root, "cgroup.controllers", "memory"))
|
|
ksft_exit_skip("memory controller isn't available\n");
|
|
|
|
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
|
|
if (cg_write(root, "cgroup.subtree_control", "+memory"))
|
|
ksft_exit_skip("Failed to set memory controller\n");
|
|
|
|
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
|
switch (tests[i].fn(root)) {
|
|
case KSFT_PASS:
|
|
ksft_test_result_pass("%s\n", tests[i].name);
|
|
break;
|
|
case KSFT_SKIP:
|
|
ksft_test_result_skip("%s\n", tests[i].name);
|
|
break;
|
|
default:
|
|
ret = EXIT_FAILURE;
|
|
ksft_test_result_fail("%s\n", tests[i].name);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|