1608 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1608 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| #define _GNU_SOURCE
 | |
| 
 | |
| #include <linux/limits.h>
 | |
| #include <linux/oom.h>
 | |
| #include <fcntl.h>
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| #include <sys/stat.h>
 | |
| #include <sys/types.h>
 | |
| #include <unistd.h>
 | |
| #include <sys/socket.h>
 | |
| #include <sys/wait.h>
 | |
| #include <arpa/inet.h>
 | |
| #include <netinet/in.h>
 | |
| #include <netdb.h>
 | |
| #include <errno.h>
 | |
| #include <sys/mman.h>
 | |
| 
 | |
| #include "../kselftest.h"
 | |
| #include "cgroup_util.h"
 | |
| 
 | |
| static bool has_localevents;
 | |
| static bool has_recursiveprot;
 | |
| 
 | |
| /*
 | |
|  * This test creates two nested cgroups with and without enabling
 | |
|  * the memory controller.
 | |
|  */
 | |
| static int test_memcg_subtree_control(const char *root)
 | |
| {
 | |
| 	char *parent, *child, *parent2 = NULL, *child2 = NULL;
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char buf[PAGE_SIZE];
 | |
| 
 | |
| 	/* Create two nested cgroups with the memory controller enabled */
 | |
| 	parent = cg_name(root, "memcg_test_0");
 | |
| 	child = cg_name(root, "memcg_test_0/memcg_test_1");
 | |
| 	if (!parent || !child)
 | |
| 		goto cleanup_free;
 | |
| 
 | |
| 	if (cg_create(parent))
 | |
| 		goto cleanup_free;
 | |
| 
 | |
| 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
 | |
| 		goto cleanup_parent;
 | |
| 
 | |
| 	if (cg_create(child))
 | |
| 		goto cleanup_parent;
 | |
| 
 | |
| 	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
 | |
| 		goto cleanup_child;
 | |
| 
 | |
| 	/* Create two nested cgroups without enabling memory controller */
 | |
| 	parent2 = cg_name(root, "memcg_test_1");
 | |
| 	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
 | |
| 	if (!parent2 || !child2)
 | |
| 		goto cleanup_free2;
 | |
| 
 | |
| 	if (cg_create(parent2))
 | |
| 		goto cleanup_free2;
 | |
| 
 | |
| 	if (cg_create(child2))
 | |
| 		goto cleanup_parent2;
 | |
| 
 | |
| 	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
 | |
| 		goto cleanup_all;
 | |
| 
 | |
| 	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
 | |
| 		goto cleanup_all;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup_all:
 | |
| 	cg_destroy(child2);
 | |
| cleanup_parent2:
 | |
| 	cg_destroy(parent2);
 | |
| cleanup_free2:
 | |
| 	free(parent2);
 | |
| 	free(child2);
 | |
| cleanup_child:
 | |
| 	cg_destroy(child);
 | |
| cleanup_parent:
 | |
| 	cg_destroy(parent);
 | |
| cleanup_free:
 | |
| 	free(parent);
 | |
| 	free(child);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int alloc_anon_50M_check(const char *cgroup, void *arg)
 | |
| {
 | |
| 	size_t size = MB(50);
 | |
| 	char *buf, *ptr;
 | |
| 	long anon, current;
 | |
| 	int ret = -1;
 | |
| 
 | |
| 	buf = malloc(size);
 | |
| 	if (buf == NULL) {
 | |
| 		fprintf(stderr, "malloc() failed\n");
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 | |
| 		*ptr = 0;
 | |
| 
 | |
| 	current = cg_read_long(cgroup, "memory.current");
 | |
| 	if (current < size)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!values_close(size, current, 3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
 | |
| 	if (anon < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!values_close(anon, current, 3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = 0;
 | |
| cleanup:
 | |
| 	free(buf);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
 | |
| {
 | |
| 	size_t size = MB(50);
 | |
| 	int ret = -1;
 | |
| 	long current, file;
 | |
| 	int fd;
 | |
| 
 | |
| 	fd = get_temp_fd();
 | |
| 	if (fd < 0)
 | |
| 		return -1;
 | |
| 
 | |
| 	if (alloc_pagecache(fd, size))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	current = cg_read_long(cgroup, "memory.current");
 | |
| 	if (current < size)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	file = cg_read_key_long(cgroup, "memory.stat", "file ");
 | |
| 	if (file < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!values_close(file, current, 10))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = 0;
 | |
| 
 | |
| cleanup:
 | |
| 	close(fd);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test create a memory cgroup, allocates
 | |
|  * some anonymous memory and some pagecache
 | |
|  * and checks memory.current, memory.peak, and some memory.stat values.
 | |
|  */
 | |
| static int test_memcg_current_peak(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	long current, peak, peak_reset;
 | |
| 	char *memcg;
 | |
| 	bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
 | |
| 	int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
 | |
| 	struct stat ss;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	current = cg_read_long(memcg, "memory.current");
 | |
| 	if (current != 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.peak");
 | |
| 	if (peak != 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_run(memcg, alloc_anon_50M_check, NULL))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.peak");
 | |
| 	if (peak < MB(50))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/*
 | |
| 	 * We'll open a few FDs for the same memory.peak file to exercise the free-path
 | |
| 	 * We need at least three to be closed in a different order than writes occurred to test
 | |
| 	 * the linked-list handling.
 | |
| 	 */
 | |
| 	peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 | |
| 
 | |
| 	if (peak_fd == -1) {
 | |
| 		if (errno == ENOENT)
 | |
| 			ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Before we try to use memory.peak's fd, try to figure out whether
 | |
| 	 * this kernel supports writing to that file in the first place. (by
 | |
| 	 * checking the writable bit on the file's st_mode)
 | |
| 	 */
 | |
| 	if (fstat(peak_fd, &ss))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if ((ss.st_mode & S_IWUSR) == 0) {
 | |
| 		ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 | |
| 
 | |
| 	if (peak_fd2 == -1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 | |
| 
 | |
| 	if (peak_fd3 == -1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* any non-empty string resets, but make it clear */
 | |
| 	static const char reset_string[] = "reset\n";
 | |
| 
 | |
| 	peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* Make sure a completely independent read isn't affected by our  FD-local reset above*/
 | |
| 	peak = cg_read_long(memcg, "memory.peak");
 | |
| 	if (peak < MB(50))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	fd2_closed = true;
 | |
| 	if (close(peak_fd2))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 | |
| 
 | |
| 	if (peak_fd4 == -1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(peak_fd);
 | |
| 	if (peak > MB(30) || peak < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.peak");
 | |
| 	if (peak < MB(50))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* Make sure everything is back to normal */
 | |
| 	peak = cg_read_long_fd(peak_fd);
 | |
| 	if (peak < MB(50))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(peak_fd4);
 | |
| 	if (peak < MB(50))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	fd3_closed = true;
 | |
| 	if (close(peak_fd3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	fd4_closed = true;
 | |
| 	if (close(peak_fd4))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	close(peak_fd);
 | |
| 	if (!fd2_closed)
 | |
| 		close(peak_fd2);
 | |
| 	if (!fd3_closed)
 | |
| 		close(peak_fd3);
 | |
| 	if (!fd4_closed)
 | |
| 		close(peak_fd4);
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
 | |
| {
 | |
| 	int fd = (long)arg;
 | |
| 	int ppid = getppid();
 | |
| 
 | |
| 	if (alloc_pagecache(fd, MB(50)))
 | |
| 		return -1;
 | |
| 
 | |
| 	while (getppid() == ppid)
 | |
| 		sleep(1);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static int alloc_anon_noexit(const char *cgroup, void *arg)
 | |
| {
 | |
| 	int ppid = getppid();
 | |
| 	size_t size = (unsigned long)arg;
 | |
| 	char *buf, *ptr;
 | |
| 
 | |
| 	buf = malloc(size);
 | |
| 	if (buf == NULL) {
 | |
| 		fprintf(stderr, "malloc() failed\n");
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 | |
| 		*ptr = 0;
 | |
| 
 | |
| 	while (getppid() == ppid)
 | |
| 		sleep(1);
 | |
| 
 | |
| 	free(buf);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Wait until processes are killed asynchronously by the OOM killer
 | |
|  * If we exceed a timeout, fail.
 | |
|  */
 | |
| static int cg_test_proc_killed(const char *cgroup)
 | |
| {
 | |
| 	int limit;
 | |
| 
 | |
| 	for (limit = 10; limit > 0; limit--) {
 | |
| 		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
 | |
| 			return 0;
 | |
| 
 | |
| 		usleep(100000);
 | |
| 	}
 | |
| 	return -1;
 | |
| }
 | |
| 
 | |
| static bool reclaim_until(const char *memcg, long goal);
 | |
| 
 | |
| /*
 | |
|  * First, this test creates the following hierarchy:
 | |
|  * A       memory.min = 0,    memory.max = 200M
 | |
|  * A/B     memory.min = 50M
 | |
|  * A/B/C   memory.min = 75M,  memory.current = 50M
 | |
|  * A/B/D   memory.min = 25M,  memory.current = 50M
 | |
|  * A/B/E   memory.min = 0,    memory.current = 50M
 | |
|  * A/B/F   memory.min = 500M, memory.current = 0
 | |
|  *
 | |
|  * (or memory.low if we test soft protection)
 | |
|  *
 | |
|  * Usages are pagecache and the test keeps a running
 | |
|  * process in every leaf cgroup.
 | |
|  * Then it creates A/G and creates a significant
 | |
|  * memory pressure in A.
 | |
|  *
 | |
|  * Then it checks actual memory usages and expects that:
 | |
|  * A/B    memory.current ~= 50M
 | |
|  * A/B/C  memory.current ~= 29M
 | |
|  * A/B/D  memory.current ~= 21M
 | |
|  * A/B/E  memory.current ~= 0
 | |
|  * A/B/F  memory.current  = 0
 | |
|  * (for origin of the numbers, see model in memcg_protection.m.)
 | |
|  *
 | |
|  * After that it tries to allocate more than there is
 | |
|  * unprotected memory in A available, and checks that:
 | |
|  * a) memory.min protects pagecache even in this case,
 | |
|  * b) memory.low allows reclaiming page cache with low events.
 | |
|  *
 | |
|  * Then we try to reclaim from A/B/C using memory.reclaim until its
 | |
|  * usage reaches 10M.
 | |
|  * This makes sure that:
 | |
|  * (a) We ignore the protection of the reclaim target memcg.
 | |
|  * (b) The previously calculated emin value (~29M) should be dismissed.
 | |
|  */
 | |
| static int test_memcg_protection(const char *root, bool min)
 | |
| {
 | |
| 	int ret = KSFT_FAIL, rc;
 | |
| 	char *parent[3] = {NULL};
 | |
| 	char *children[4] = {NULL};
 | |
| 	const char *attribute = min ? "memory.min" : "memory.low";
 | |
| 	long c[4];
 | |
| 	long current;
 | |
| 	int i, attempts;
 | |
| 	int fd;
 | |
| 
 | |
| 	fd = get_temp_fd();
 | |
| 	if (fd < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	parent[0] = cg_name(root, "memcg_test_0");
 | |
| 	if (!parent[0])
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	parent[1] = cg_name(parent[0], "memcg_test_1");
 | |
| 	if (!parent[1])
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	parent[2] = cg_name(parent[0], "memcg_test_2");
 | |
| 	if (!parent[2])
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(parent[0]))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(parent[0], attribute)) {
 | |
| 		/* No memory.min on older kernels is fine */
 | |
| 		if (min)
 | |
| 			ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent[0], "memory.max", "200M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent[0], "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(parent[1]))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(parent[2]))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	for (i = 0; i < ARRAY_SIZE(children); i++) {
 | |
| 		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
 | |
| 		if (!children[i])
 | |
| 			goto cleanup;
 | |
| 
 | |
| 		if (cg_create(children[i]))
 | |
| 			goto cleanup;
 | |
| 
 | |
| 		if (i > 2)
 | |
| 			continue;
 | |
| 
 | |
| 		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
 | |
| 			      (void *)(long)fd);
 | |
| 	}
 | |
| 
 | |
| 	if (cg_write(parent[1],   attribute, "50M"))
 | |
| 		goto cleanup;
 | |
| 	if (cg_write(children[0], attribute, "75M"))
 | |
| 		goto cleanup;
 | |
| 	if (cg_write(children[1], attribute, "25M"))
 | |
| 		goto cleanup;
 | |
| 	if (cg_write(children[2], attribute, "0"))
 | |
| 		goto cleanup;
 | |
| 	if (cg_write(children[3], attribute, "500M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	attempts = 0;
 | |
| 	while (!values_close(cg_read_long(parent[1], "memory.current"),
 | |
| 			     MB(150), 3)) {
 | |
| 		if (attempts++ > 5)
 | |
| 			break;
 | |
| 		sleep(1);
 | |
| 	}
 | |
| 
 | |
| 	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	for (i = 0; i < ARRAY_SIZE(children); i++)
 | |
| 		c[i] = cg_read_long(children[i], "memory.current");
 | |
| 
 | |
| 	if (!values_close(c[0], MB(29), 10))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!values_close(c[1], MB(21), 10))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (c[3] != 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
 | |
| 	if (min && !rc)
 | |
| 		goto cleanup;
 | |
| 	else if (!min && rc) {
 | |
| 		fprintf(stderr,
 | |
| 			"memory.low prevents from allocating anon memory\n");
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	current = min ? MB(50) : MB(30);
 | |
| 	if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!reclaim_until(children[0], MB(10)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (min) {
 | |
| 		ret = KSFT_PASS;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	for (i = 0; i < ARRAY_SIZE(children); i++) {
 | |
| 		int no_low_events_index = 1;
 | |
| 		long low, oom;
 | |
| 
 | |
| 		oom = cg_read_key_long(children[i], "memory.events", "oom ");
 | |
| 		low = cg_read_key_long(children[i], "memory.events", "low ");
 | |
| 
 | |
| 		if (oom)
 | |
| 			goto cleanup;
 | |
| 		if (i <= no_low_events_index && low <= 0)
 | |
| 			goto cleanup;
 | |
| 		if (i > no_low_events_index && low)
 | |
| 			goto cleanup;
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
 | |
| 		if (!children[i])
 | |
| 			continue;
 | |
| 
 | |
| 		cg_destroy(children[i]);
 | |
| 		free(children[i]);
 | |
| 	}
 | |
| 
 | |
| 	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
 | |
| 		if (!parent[i])
 | |
| 			continue;
 | |
| 
 | |
| 		cg_destroy(parent[i]);
 | |
| 		free(parent[i]);
 | |
| 	}
 | |
| 	close(fd);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int test_memcg_min(const char *root)
 | |
| {
 | |
| 	return test_memcg_protection(root, true);
 | |
| }
 | |
| 
 | |
| static int test_memcg_low(const char *root)
 | |
| {
 | |
| 	return test_memcg_protection(root, false);
 | |
| }
 | |
| 
 | |
| static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 | |
| {
 | |
| 	size_t size = MB(50);
 | |
| 	int ret = -1;
 | |
| 	long current, high, max;
 | |
| 	int fd;
 | |
| 
 | |
| 	high = cg_read_long(cgroup, "memory.high");
 | |
| 	max = cg_read_long(cgroup, "memory.max");
 | |
| 	if (high != MB(30) && max != MB(30))
 | |
| 		return -1;
 | |
| 
 | |
| 	fd = get_temp_fd();
 | |
| 	if (fd < 0)
 | |
| 		return -1;
 | |
| 
 | |
| 	if (alloc_pagecache(fd, size))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	current = cg_read_long(cgroup, "memory.current");
 | |
| 	if (!values_close(current, MB(30), 5))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = 0;
 | |
| 
 | |
| cleanup:
 | |
| 	close(fd);
 | |
| 	return ret;
 | |
| 
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test checks that memory.high limits the amount of
 | |
|  * memory which can be consumed by either anonymous memory
 | |
|  * or pagecache.
 | |
|  */
 | |
| static int test_memcg_high(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *memcg;
 | |
| 	long high;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.high", "30M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_run(memcg, alloc_anon, (void *)MB(31)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	high = cg_read_key_long(memcg, "memory.events", "high ");
 | |
| 	if (high <= 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int alloc_anon_mlock(const char *cgroup, void *arg)
 | |
| {
 | |
| 	size_t size = (size_t)arg;
 | |
| 	void *buf;
 | |
| 
 | |
| 	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
 | |
| 		   0, 0);
 | |
| 	if (buf == MAP_FAILED)
 | |
| 		return -1;
 | |
| 
 | |
| 	mlock(buf, size);
 | |
| 	munmap(buf, size);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test checks that memory.high is able to throttle big single shot
 | |
|  * allocation i.e. large allocation within one kernel entry.
 | |
|  */
 | |
| static int test_memcg_high_sync(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL, pid, fd = -1;
 | |
| 	char *memcg;
 | |
| 	long pre_high, pre_max;
 | |
| 	long post_high, post_max;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	pre_high = cg_read_key_long(memcg, "memory.events", "high ");
 | |
| 	pre_max = cg_read_key_long(memcg, "memory.events", "max ");
 | |
| 	if (pre_high < 0 || pre_max < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.high", "30M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.max", "140M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	fd = memcg_prepare_for_wait(memcg);
 | |
| 	if (fd < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
 | |
| 	if (pid < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	cg_wait_for(fd);
 | |
| 
 | |
| 	post_high = cg_read_key_long(memcg, "memory.events", "high ");
 | |
| 	post_max = cg_read_key_long(memcg, "memory.events", "max ");
 | |
| 	if (post_high < 0 || post_max < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (pre_high == post_high || pre_max != post_max)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	if (fd >= 0)
 | |
| 		close(fd);
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test checks that memory.max limits the amount of
 | |
|  * memory which can be consumed by either anonymous memory
 | |
|  * or pagecache.
 | |
|  */
 | |
| static int test_memcg_max(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *memcg;
 | |
| 	long current, max;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.max", "30M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* Should be killed by OOM killer */
 | |
| 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	current = cg_read_long(memcg, "memory.current");
 | |
| 	if (current > MB(30) || !current)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	max = cg_read_key_long(memcg, "memory.events", "max ");
 | |
| 	if (max <= 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Reclaim from @memcg until usage reaches @goal by writing to
 | |
|  * memory.reclaim.
 | |
|  *
 | |
|  * This function will return false if the usage is already below the
 | |
|  * goal.
 | |
|  *
 | |
|  * This function assumes that writing to memory.reclaim is the only
 | |
|  * source of change in memory.current (no concurrent allocations or
 | |
|  * reclaim).
 | |
|  *
 | |
|  * This function makes sure memory.reclaim is sane. It will return
 | |
|  * false if memory.reclaim's error codes do not make sense, even if
 | |
|  * the usage goal was satisfied.
 | |
|  */
 | |
| static bool reclaim_until(const char *memcg, long goal)
 | |
| {
 | |
| 	char buf[64];
 | |
| 	int retries, err;
 | |
| 	long current, to_reclaim;
 | |
| 	bool reclaimed = false;
 | |
| 
 | |
| 	for (retries = 5; retries > 0; retries--) {
 | |
| 		current = cg_read_long(memcg, "memory.current");
 | |
| 
 | |
| 		if (current < goal || values_close(current, goal, 3))
 | |
| 			break;
 | |
| 		/* Did memory.reclaim return 0 incorrectly? */
 | |
| 		else if (reclaimed)
 | |
| 			return false;
 | |
| 
 | |
| 		to_reclaim = current - goal;
 | |
| 		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
 | |
| 		err = cg_write(memcg, "memory.reclaim", buf);
 | |
| 		if (!err)
 | |
| 			reclaimed = true;
 | |
| 		else if (err != -EAGAIN)
 | |
| 			return false;
 | |
| 	}
 | |
| 	return reclaimed;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test checks that memory.reclaim reclaims the given
 | |
|  * amount of memory (from both anon and file, if possible).
 | |
|  */
 | |
| static int test_memcg_reclaim(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	int fd = -1;
 | |
| 	int retries;
 | |
| 	char *memcg;
 | |
| 	long current, expected_usage;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	current = cg_read_long(memcg, "memory.current");
 | |
| 	if (current != 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	fd = get_temp_fd();
 | |
| 	if (fd < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
 | |
| 
 | |
| 	/*
 | |
| 	 * If swap is enabled, try to reclaim from both anon and file, else try
 | |
| 	 * to reclaim from file only.
 | |
| 	 */
 | |
| 	if (is_swap_enabled()) {
 | |
| 		cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
 | |
| 		expected_usage = MB(100);
 | |
| 	} else
 | |
| 		expected_usage = MB(50);
 | |
| 
 | |
| 	/*
 | |
| 	 * Wait until current usage reaches the expected usage (or we run out of
 | |
| 	 * retries).
 | |
| 	 */
 | |
| 	retries = 5;
 | |
| 	while (!values_close(cg_read_long(memcg, "memory.current"),
 | |
| 			    expected_usage, 10)) {
 | |
| 		if (retries--) {
 | |
| 			sleep(1);
 | |
| 			continue;
 | |
| 		} else {
 | |
| 			fprintf(stderr,
 | |
| 				"failed to allocate %ld for memcg reclaim test\n",
 | |
| 				expected_usage);
 | |
| 			goto cleanup;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Reclaim until current reaches 30M, this makes sure we hit both anon
 | |
| 	 * and file if swap is enabled.
 | |
| 	 */
 | |
| 	if (!reclaim_until(memcg, MB(30)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| cleanup:
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 	close(fd);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
 | |
| {
 | |
| 	long mem_max = (long)arg;
 | |
| 	size_t size = MB(50);
 | |
| 	char *buf, *ptr;
 | |
| 	long mem_current, swap_current;
 | |
| 	int ret = -1;
 | |
| 
 | |
| 	buf = malloc(size);
 | |
| 	if (buf == NULL) {
 | |
| 		fprintf(stderr, "malloc() failed\n");
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 | |
| 		*ptr = 0;
 | |
| 
 | |
| 	mem_current = cg_read_long(cgroup, "memory.current");
 | |
| 	if (!mem_current || !values_close(mem_current, mem_max, 3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	swap_current = cg_read_long(cgroup, "memory.swap.current");
 | |
| 	if (!swap_current ||
 | |
| 	    !values_close(mem_current + swap_current, size, 3))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = 0;
 | |
| cleanup:
 | |
| 	free(buf);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test checks that memory.swap.max limits the amount of
 | |
|  * anonymous memory which can be swapped out. Additionally, it verifies that
 | |
|  * memory.swap.peak reflects the high watermark and can be reset.
 | |
|  */
 | |
| static int test_memcg_swap_max_peak(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *memcg;
 | |
| 	long max, peak;
 | |
| 	struct stat ss;
 | |
| 	int swap_peak_fd = -1, mem_peak_fd = -1;
 | |
| 
 | |
| 	/* any non-empty string resets */
 | |
| 	static const char reset_string[] = "foobarbaz";
 | |
| 
 | |
| 	if (!is_swap_enabled())
 | |
| 		return KSFT_SKIP;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(memcg, "memory.swap.current")) {
 | |
| 		ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	swap_peak_fd = cg_open(memcg, "memory.swap.peak",
 | |
| 			       O_RDWR | O_APPEND | O_CLOEXEC);
 | |
| 
 | |
| 	if (swap_peak_fd == -1) {
 | |
| 		if (errno == ENOENT)
 | |
| 			ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Before we try to use memory.swap.peak's fd, try to figure out
 | |
| 	 * whether this kernel supports writing to that file in the first
 | |
| 	 * place. (by checking the writable bit on the file's st_mode)
 | |
| 	 */
 | |
| 	if (fstat(swap_peak_fd, &ss))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if ((ss.st_mode & S_IWUSR) == 0) {
 | |
| 		ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
 | |
| 
 | |
| 	if (mem_peak_fd == -1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(memcg, "memory.swap.peak"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long_fd(swap_peak_fd))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* switch the swap and mem fds into local-peak tracking mode*/
 | |
| 	int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
 | |
| 
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long_fd(swap_peak_fd))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(memcg, "memory.peak"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long_fd(mem_peak_fd))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long_fd(mem_peak_fd))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.swap.max", "30M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.max", "30M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* Should be killed by OOM killer */
 | |
| 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.peak");
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.swap.peak");
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(mem_peak_fd);
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(swap_peak_fd);
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/*
 | |
| 	 * open, reset and close the peak swap on another FD to make sure
 | |
| 	 * multiple extant fds don't corrupt the linked-list
 | |
| 	 */
 | |
| 	peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
 | |
| 	if (peak_reset)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
 | |
| 	if (peak_reset)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/* actually reset on the fds */
 | |
| 	peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
 | |
| 	if (peak_reset != sizeof(reset_string))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(swap_peak_fd);
 | |
| 	if (peak > MB(10))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	/*
 | |
| 	 * The cgroup is now empty, but there may be a page or two associated
 | |
| 	 * with the open FD accounted to it.
 | |
| 	 */
 | |
| 	peak = cg_read_long_fd(mem_peak_fd);
 | |
| 	if (peak > MB(1))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(memcg, "memory.peak") < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	max = cg_read_key_long(memcg, "memory.events", "max ");
 | |
| 	if (max <= 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.peak");
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long(memcg, "memory.swap.peak");
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(mem_peak_fd);
 | |
| 	if (peak < MB(29))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	peak = cg_read_long_fd(swap_peak_fd);
 | |
| 	if (peak < MB(19))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	if (mem_peak_fd != -1 && close(mem_peak_fd))
 | |
| 		ret = KSFT_FAIL;
 | |
| 	if (swap_peak_fd != -1 && close(swap_peak_fd))
 | |
| 		ret = KSFT_FAIL;
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test disables swapping and tries to allocate anonymous memory
 | |
|  * up to OOM. Then it checks for oom and oom_kill events in
 | |
|  * memory.events.
 | |
|  */
 | |
| static int test_memcg_oom_events(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *memcg;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.max", "30M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| struct tcp_server_args {
 | |
| 	unsigned short port;
 | |
| 	int ctl[2];
 | |
| };
 | |
| 
 | |
| static int tcp_server(const char *cgroup, void *arg)
 | |
| {
 | |
| 	struct tcp_server_args *srv_args = arg;
 | |
| 	struct sockaddr_in6 saddr = { 0 };
 | |
| 	socklen_t slen = sizeof(saddr);
 | |
| 	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
 | |
| 
 | |
| 	close(srv_args->ctl[0]);
 | |
| 	ctl_fd = srv_args->ctl[1];
 | |
| 
 | |
| 	saddr.sin6_family = AF_INET6;
 | |
| 	saddr.sin6_addr = in6addr_any;
 | |
| 	saddr.sin6_port = htons(srv_args->port);
 | |
| 
 | |
| 	sk = socket(AF_INET6, SOCK_STREAM, 0);
 | |
| 	if (sk < 0)
 | |
| 		return ret;
 | |
| 
 | |
| 	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
 | |
| 		write(ctl_fd, &errno, sizeof(errno));
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	if (listen(sk, 1))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = 0;
 | |
| 	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
 | |
| 		ret = -1;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	client_sk = accept(sk, NULL, NULL);
 | |
| 	if (client_sk < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = -1;
 | |
| 	for (;;) {
 | |
| 		uint8_t buf[0x100000];
 | |
| 
 | |
| 		if (write(client_sk, buf, sizeof(buf)) <= 0) {
 | |
| 			if (errno == ECONNRESET)
 | |
| 				ret = 0;
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	close(client_sk);
 | |
| 
 | |
| cleanup:
 | |
| 	close(sk);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static int tcp_client(const char *cgroup, unsigned short port)
 | |
| {
 | |
| 	const char server[] = "localhost";
 | |
| 	struct addrinfo *ai;
 | |
| 	char servport[6];
 | |
| 	int retries = 0x10; /* nice round number */
 | |
| 	int sk, ret;
 | |
| 	long allocated;
 | |
| 
 | |
| 	allocated = cg_read_long(cgroup, "memory.current");
 | |
| 	snprintf(servport, sizeof(servport), "%hd", port);
 | |
| 	ret = getaddrinfo(server, servport, NULL, &ai);
 | |
| 	if (ret)
 | |
| 		return ret;
 | |
| 
 | |
| 	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
 | |
| 	if (sk < 0)
 | |
| 		goto free_ainfo;
 | |
| 
 | |
| 	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
 | |
| 	if (ret < 0)
 | |
| 		goto close_sk;
 | |
| 
 | |
| 	ret = KSFT_FAIL;
 | |
| 	while (retries--) {
 | |
| 		uint8_t buf[0x100000];
 | |
| 		long current, sock;
 | |
| 
 | |
| 		if (read(sk, buf, sizeof(buf)) <= 0)
 | |
| 			goto close_sk;
 | |
| 
 | |
| 		current = cg_read_long(cgroup, "memory.current");
 | |
| 		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
 | |
| 
 | |
| 		if (current < 0 || sock < 0)
 | |
| 			goto close_sk;
 | |
| 
 | |
| 		/* exclude the memory not related to socket connection */
 | |
| 		if (values_close(current - allocated, sock, 10)) {
 | |
| 			ret = KSFT_PASS;
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| close_sk:
 | |
| 	close(sk);
 | |
| free_ainfo:
 | |
| 	freeaddrinfo(ai);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test checks socket memory accounting.
 | |
|  * The test forks a TCP server listens on a random port between 1000
 | |
|  * and 61000. Once it gets a client connection, it starts writing to
 | |
|  * its socket.
 | |
|  * The TCP client interleaves reads from the socket with check whether
 | |
|  * memory.current and memory.stat.sock are similar.
 | |
|  */
 | |
| static int test_memcg_sock(const char *root)
 | |
| {
 | |
| 	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
 | |
| 	unsigned short port;
 | |
| 	char *memcg;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test");
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	while (bind_retries--) {
 | |
| 		struct tcp_server_args args;
 | |
| 
 | |
| 		if (pipe(args.ctl))
 | |
| 			goto cleanup;
 | |
| 
 | |
| 		port = args.port = 1000 + rand() % 60000;
 | |
| 
 | |
| 		pid = cg_run_nowait(memcg, tcp_server, &args);
 | |
| 		if (pid < 0)
 | |
| 			goto cleanup;
 | |
| 
 | |
| 		close(args.ctl[1]);
 | |
| 		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
 | |
| 			goto cleanup;
 | |
| 		close(args.ctl[0]);
 | |
| 
 | |
| 		if (!err)
 | |
| 			break;
 | |
| 		if (err != EADDRINUSE)
 | |
| 			goto cleanup;
 | |
| 
 | |
| 		waitpid(pid, NULL, 0);
 | |
| 	}
 | |
| 
 | |
| 	if (err == EADDRINUSE) {
 | |
| 		ret = KSFT_SKIP;
 | |
| 		goto cleanup;
 | |
| 	}
 | |
| 
 | |
| 	if (tcp_client(memcg, port) != KSFT_PASS)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	waitpid(pid, &err, 0);
 | |
| 	if (WEXITSTATUS(err))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_long(memcg, "memory.current") < 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(memcg, "memory.stat", "sock "))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test disables swapping and tries to allocate anonymous memory
 | |
|  * up to OOM with memory.group.oom set. Then it checks that all
 | |
|  * processes in the leaf were killed. It also checks that oom_events
 | |
|  * were propagated to the parent level.
 | |
|  */
 | |
| static int test_memcg_oom_group_leaf_events(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *parent, *child;
 | |
| 	long parent_oom_events;
 | |
| 
 | |
| 	parent = cg_name(root, "memcg_test_0");
 | |
| 	child = cg_name(root, "memcg_test_0/memcg_test_1");
 | |
| 
 | |
| 	if (!parent || !child)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(parent))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(child))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(child, "memory.max", "50M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(child, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(child, "memory.oom.group", "1"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
 | |
| 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
 | |
| 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
 | |
| 	if (!cg_run(child, alloc_anon, (void *)MB(100)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_test_proc_killed(child))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	parent_oom_events = cg_read_key_long(
 | |
| 			parent, "memory.events", "oom_kill ");
 | |
| 	/*
 | |
| 	 * If memory_localevents is not enabled (the default), the parent should
 | |
| 	 * count OOM events in its children groups. Otherwise, it should not
 | |
| 	 * have observed any events.
 | |
| 	 */
 | |
| 	if (has_localevents && parent_oom_events != 0)
 | |
| 		goto cleanup;
 | |
| 	else if (!has_localevents && parent_oom_events <= 0)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	if (child)
 | |
| 		cg_destroy(child);
 | |
| 	if (parent)
 | |
| 		cg_destroy(parent);
 | |
| 	free(child);
 | |
| 	free(parent);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test disables swapping and tries to allocate anonymous memory
 | |
|  * up to OOM with memory.group.oom set. Then it checks that all
 | |
|  * processes in the parent and leaf were killed.
 | |
|  */
 | |
| static int test_memcg_oom_group_parent_events(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *parent, *child;
 | |
| 
 | |
| 	parent = cg_name(root, "memcg_test_0");
 | |
| 	child = cg_name(root, "memcg_test_0/memcg_test_1");
 | |
| 
 | |
| 	if (!parent || !child)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(parent))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(child))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent, "memory.max", "80M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(parent, "memory.oom.group", "1"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
 | |
| 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
 | |
| 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
 | |
| 
 | |
| 	if (!cg_run(child, alloc_anon, (void *)MB(100)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_test_proc_killed(child))
 | |
| 		goto cleanup;
 | |
| 	if (cg_test_proc_killed(parent))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	if (child)
 | |
| 		cg_destroy(child);
 | |
| 	if (parent)
 | |
| 		cg_destroy(parent);
 | |
| 	free(child);
 | |
| 	free(parent);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * This test disables swapping and tries to allocate anonymous memory
 | |
|  * up to OOM with memory.group.oom set. Then it checks that all
 | |
|  * processes were killed except those set with OOM_SCORE_ADJ_MIN
 | |
|  */
 | |
| static int test_memcg_oom_group_score_events(const char *root)
 | |
| {
 | |
| 	int ret = KSFT_FAIL;
 | |
| 	char *memcg;
 | |
| 	int safe_pid;
 | |
| 
 | |
| 	memcg = cg_name(root, "memcg_test_0");
 | |
| 
 | |
| 	if (!memcg)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_create(memcg))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.max", "50M"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.swap.max", "0"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_write(memcg, "memory.oom.group", "1"))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
 | |
| 	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
 | |
| 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	if (kill(safe_pid, SIGKILL))
 | |
| 		goto cleanup;
 | |
| 
 | |
| 	ret = KSFT_PASS;
 | |
| 
 | |
| cleanup:
 | |
| 	if (memcg)
 | |
| 		cg_destroy(memcg);
 | |
| 	free(memcg);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| #define T(x) { x, #x }
 | |
| struct memcg_test {
 | |
| 	int (*fn)(const char *root);
 | |
| 	const char *name;
 | |
| } tests[] = {
 | |
| 	T(test_memcg_subtree_control),
 | |
| 	T(test_memcg_current_peak),
 | |
| 	T(test_memcg_min),
 | |
| 	T(test_memcg_low),
 | |
| 	T(test_memcg_high),
 | |
| 	T(test_memcg_high_sync),
 | |
| 	T(test_memcg_max),
 | |
| 	T(test_memcg_reclaim),
 | |
| 	T(test_memcg_oom_events),
 | |
| 	T(test_memcg_swap_max_peak),
 | |
| 	T(test_memcg_sock),
 | |
| 	T(test_memcg_oom_group_leaf_events),
 | |
| 	T(test_memcg_oom_group_parent_events),
 | |
| 	T(test_memcg_oom_group_score_events),
 | |
| };
 | |
| #undef T
 | |
| 
 | |
| int main(int argc, char **argv)
 | |
| {
 | |
| 	char root[PATH_MAX];
 | |
| 	int i, proc_status, ret = EXIT_SUCCESS;
 | |
| 
 | |
| 	if (cg_find_unified_root(root, sizeof(root), NULL))
 | |
| 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 | |
| 
 | |
| 	/*
 | |
| 	 * Check that memory controller is available:
 | |
| 	 * memory is listed in cgroup.controllers
 | |
| 	 */
 | |
| 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
 | |
| 		ksft_exit_skip("memory controller isn't available\n");
 | |
| 
 | |
| 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
 | |
| 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
 | |
| 			ksft_exit_skip("Failed to set memory controller\n");
 | |
| 
 | |
| 	proc_status = proc_mount_contains("memory_recursiveprot");
 | |
| 	if (proc_status < 0)
 | |
| 		ksft_exit_skip("Failed to query cgroup mount option\n");
 | |
| 	has_recursiveprot = proc_status;
 | |
| 
 | |
| 	proc_status = proc_mount_contains("memory_localevents");
 | |
| 	if (proc_status < 0)
 | |
| 		ksft_exit_skip("Failed to query cgroup mount option\n");
 | |
| 	has_localevents = proc_status;
 | |
| 
 | |
| 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 | |
| 		switch (tests[i].fn(root)) {
 | |
| 		case KSFT_PASS:
 | |
| 			ksft_test_result_pass("%s\n", tests[i].name);
 | |
| 			break;
 | |
| 		case KSFT_SKIP:
 | |
| 			ksft_test_result_skip("%s\n", tests[i].name);
 | |
| 			break;
 | |
| 		default:
 | |
| 			ret = EXIT_FAILURE;
 | |
| 			ksft_test_result_fail("%s\n", tests[i].name);
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return ret;
 | |
| }
 |