xref: /linux/tools/testing/selftests/cgroup/test_zswap.c (revision d7b4e3287ca3a7baf66efd9158498e551a9550da)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15 
16 static int read_int(const char *path, size_t *value)
17 {
18 	FILE *file;
19 	int ret = 0;
20 
21 	file = fopen(path, "r");
22 	if (!file)
23 		return -1;
24 	if (fscanf(file, "%ld", value) != 1)
25 		ret = -1;
26 	fclose(file);
27 	return ret;
28 }
29 
30 static int set_min_free_kb(size_t value)
31 {
32 	FILE *file;
33 	int ret;
34 
35 	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 	if (!file)
37 		return -1;
38 	ret = fprintf(file, "%ld\n", value);
39 	fclose(file);
40 	return ret;
41 }
42 
43 static int read_min_free_kb(size_t *value)
44 {
45 	return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47 
48 static int get_zswap_stored_pages(size_t *value)
49 {
50 	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52 
53 static int get_cg_wb_count(const char *cg)
54 {
55 	return cg_read_key_long(cg, "memory.stat", "zswp_wb");
56 }
57 
58 static long get_zswpout(const char *cgroup)
59 {
60 	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61 }
62 
63 static int allocate_bytes(const char *cgroup, void *arg)
64 {
65 	size_t size = (size_t)arg;
66 	char *mem = (char *)malloc(size);
67 
68 	if (!mem)
69 		return -1;
70 	for (int i = 0; i < size; i += 4095)
71 		mem[i] = 'a';
72 	free(mem);
73 	return 0;
74 }
75 
76 static char *setup_test_group_1M(const char *root, const char *name)
77 {
78 	char *group_name = cg_name(root, name);
79 
80 	if (!group_name)
81 		return NULL;
82 	if (cg_create(group_name))
83 		goto fail;
84 	if (cg_write(group_name, "memory.max", "1M")) {
85 		cg_destroy(group_name);
86 		goto fail;
87 	}
88 	return group_name;
89 fail:
90 	free(group_name);
91 	return NULL;
92 }
93 
94 /*
95  * Sanity test to check that pages are written into zswap.
96  */
97 static int test_zswap_usage(const char *root)
98 {
99 	long zswpout_before, zswpout_after;
100 	int ret = KSFT_FAIL;
101 	char *test_group;
102 
103 	/* Set up */
104 	test_group = cg_name(root, "no_shrink_test");
105 	if (!test_group)
106 		goto out;
107 	if (cg_create(test_group))
108 		goto out;
109 	if (cg_write(test_group, "memory.max", "1M"))
110 		goto out;
111 
112 	zswpout_before = get_zswpout(test_group);
113 	if (zswpout_before < 0) {
114 		ksft_print_msg("Failed to get zswpout\n");
115 		goto out;
116 	}
117 
118 	/* Allocate more than memory.max to push memory into zswap */
119 	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
120 		goto out;
121 
122 	/* Verify that pages come into zswap */
123 	zswpout_after = get_zswpout(test_group);
124 	if (zswpout_after <= zswpout_before) {
125 		ksft_print_msg("zswpout does not increase after test program\n");
126 		goto out;
127 	}
128 	ret = KSFT_PASS;
129 
130 out:
131 	cg_destroy(test_group);
132 	free(test_group);
133 	return ret;
134 }
135 
136 /*
137  * When trying to store a memcg page in zswap, if the memcg hits its memory
138  * limit in zswap, writeback should affect only the zswapped pages of that
139  * memcg.
140  */
141 static int test_no_invasive_cgroup_shrink(const char *root)
142 {
143 	int ret = KSFT_FAIL;
144 	size_t control_allocation_size = MB(10);
145 	char *control_allocation, *wb_group = NULL, *control_group = NULL;
146 
147 	/* Set up */
148 	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
149 	if (!wb_group)
150 		return KSFT_FAIL;
151 	if (cg_write(wb_group, "memory.zswap.max", "10K"))
152 		goto out;
153 	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
154 	if (!control_group)
155 		goto out;
156 
157 	/* Push some test_group2 memory into zswap */
158 	if (cg_enter_current(control_group))
159 		goto out;
160 	control_allocation = malloc(control_allocation_size);
161 	for (int i = 0; i < control_allocation_size; i += 4095)
162 		control_allocation[i] = 'a';
163 	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
164 		goto out;
165 
166 	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
167 	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
168 		goto out;
169 
170 	/* Verify that only zswapped memory from gwb_group has been written back */
171 	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
172 		ret = KSFT_PASS;
173 out:
174 	cg_enter_current(root);
175 	if (control_group) {
176 		cg_destroy(control_group);
177 		free(control_group);
178 	}
179 	cg_destroy(wb_group);
180 	free(wb_group);
181 	if (control_allocation)
182 		free(control_allocation);
183 	return ret;
184 }
185 
186 struct no_kmem_bypass_child_args {
187 	size_t target_alloc_bytes;
188 	size_t child_allocated;
189 };
190 
191 static int no_kmem_bypass_child(const char *cgroup, void *arg)
192 {
193 	struct no_kmem_bypass_child_args *values = arg;
194 	void *allocation;
195 
196 	allocation = malloc(values->target_alloc_bytes);
197 	if (!allocation) {
198 		values->child_allocated = true;
199 		return -1;
200 	}
201 	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
202 		((char *)allocation)[i] = 'a';
203 	values->child_allocated = true;
204 	pause();
205 	free(allocation);
206 	return 0;
207 }
208 
209 /*
210  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
211  * charged to that cgroup. This wasn't the case before commit
212  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
213  *
214  * The test first allocates memory in a memcg, then raises min_free_kbytes to
215  * a very high value so that the allocation falls below low wm, then makes
216  * another allocation to trigger kswapd that should push the memcg-owned pages
217  * to zswap and verifies that the zswap pages are correctly charged.
218  *
219  * To be run on a VM with at most 4G of memory.
220  */
221 static int test_no_kmem_bypass(const char *root)
222 {
223 	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
224 	struct no_kmem_bypass_child_args *values;
225 	size_t trigger_allocation_size;
226 	int wait_child_iteration = 0;
227 	long stored_pages_threshold;
228 	struct sysinfo sys_info;
229 	int ret = KSFT_FAIL;
230 	int child_status;
231 	char *test_group;
232 	pid_t child_pid;
233 
234 	/* Read sys info and compute test values accordingly */
235 	if (sysinfo(&sys_info) != 0)
236 		return KSFT_FAIL;
237 	if (sys_info.totalram > 5000000000)
238 		return KSFT_SKIP;
239 	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
240 			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
241 	if (values == MAP_FAILED)
242 		return KSFT_FAIL;
243 	if (read_min_free_kb(&min_free_kb_original))
244 		return KSFT_FAIL;
245 	min_free_kb_high = sys_info.totalram / 2000;
246 	min_free_kb_low = sys_info.totalram / 500000;
247 	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
248 		sys_info.totalram * 5 / 100;
249 	stored_pages_threshold = sys_info.totalram / 5 / 4096;
250 	trigger_allocation_size = sys_info.totalram / 20;
251 
252 	/* Set up test memcg */
253 	if (cg_write(root, "cgroup.subtree_control", "+memory"))
254 		goto out;
255 	test_group = cg_name(root, "kmem_bypass_test");
256 	if (!test_group)
257 		goto out;
258 
259 	/* Spawn memcg child and wait for it to allocate */
260 	set_min_free_kb(min_free_kb_low);
261 	if (cg_create(test_group))
262 		goto out;
263 	values->child_allocated = false;
264 	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
265 	if (child_pid < 0)
266 		goto out;
267 	while (!values->child_allocated && wait_child_iteration++ < 10000)
268 		usleep(1000);
269 
270 	/* Try to wakeup kswapd and let it push child memory to zswap */
271 	set_min_free_kb(min_free_kb_high);
272 	for (int i = 0; i < 20; i++) {
273 		size_t stored_pages;
274 		char *trigger_allocation = malloc(trigger_allocation_size);
275 
276 		if (!trigger_allocation)
277 			break;
278 		for (int i = 0; i < trigger_allocation_size; i += 4095)
279 			trigger_allocation[i] = 'b';
280 		usleep(100000);
281 		free(trigger_allocation);
282 		if (get_zswap_stored_pages(&stored_pages))
283 			break;
284 		if (stored_pages < 0)
285 			break;
286 		/* If memory was pushed to zswap, verify it belongs to memcg */
287 		if (stored_pages > stored_pages_threshold) {
288 			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
289 			int delta = stored_pages * 4096 - zswapped;
290 			int result_ok = delta < stored_pages * 4096 / 4;
291 
292 			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
293 			break;
294 		}
295 	}
296 
297 	kill(child_pid, SIGTERM);
298 	waitpid(child_pid, &child_status, 0);
299 out:
300 	set_min_free_kb(min_free_kb_original);
301 	cg_destroy(test_group);
302 	free(test_group);
303 	return ret;
304 }
305 
306 #define T(x) { x, #x }
307 struct zswap_test {
308 	int (*fn)(const char *root);
309 	const char *name;
310 } tests[] = {
311 	T(test_zswap_usage),
312 	T(test_no_kmem_bypass),
313 	T(test_no_invasive_cgroup_shrink),
314 };
315 #undef T
316 
317 static bool zswap_configured(void)
318 {
319 	return access("/sys/module/zswap", F_OK) == 0;
320 }
321 
322 int main(int argc, char **argv)
323 {
324 	char root[PATH_MAX];
325 	int i, ret = EXIT_SUCCESS;
326 
327 	if (cg_find_unified_root(root, sizeof(root)))
328 		ksft_exit_skip("cgroup v2 isn't mounted\n");
329 
330 	if (!zswap_configured())
331 		ksft_exit_skip("zswap isn't configured\n");
332 
333 	/*
334 	 * Check that memory controller is available:
335 	 * memory is listed in cgroup.controllers
336 	 */
337 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
338 		ksft_exit_skip("memory controller isn't available\n");
339 
340 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
341 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
342 			ksft_exit_skip("Failed to set memory controller\n");
343 
344 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
345 		switch (tests[i].fn(root)) {
346 		case KSFT_PASS:
347 			ksft_test_result_pass("%s\n", tests[i].name);
348 			break;
349 		case KSFT_SKIP:
350 			ksft_test_result_skip("%s\n", tests[i].name);
351 			break;
352 		default:
353 			ret = EXIT_FAILURE;
354 			ksft_test_result_fail("%s\n", tests[i].name);
355 			break;
356 		}
357 	}
358 
359 	return ret;
360 }
361