xref: /linux/tools/testing/selftests/cgroup/test_zswap.c (revision 3027ce13e04eee76539ca65c2cb1028a01c8c508)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15 
16 static int read_int(const char *path, size_t *value)
17 {
18 	FILE *file;
19 	int ret = 0;
20 
21 	file = fopen(path, "r");
22 	if (!file)
23 		return -1;
24 	if (fscanf(file, "%ld", value) != 1)
25 		ret = -1;
26 	fclose(file);
27 	return ret;
28 }
29 
30 static int set_min_free_kb(size_t value)
31 {
32 	FILE *file;
33 	int ret;
34 
35 	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 	if (!file)
37 		return -1;
38 	ret = fprintf(file, "%ld\n", value);
39 	fclose(file);
40 	return ret;
41 }
42 
43 static int read_min_free_kb(size_t *value)
44 {
45 	return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47 
48 static int get_zswap_stored_pages(size_t *value)
49 {
50 	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52 
53 static int get_cg_wb_count(const char *cg)
54 {
55 	return cg_read_key_long(cg, "memory.stat", "zswpwb");
56 }
57 
58 static long get_zswpout(const char *cgroup)
59 {
60 	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61 }
62 
63 static int allocate_and_read_bytes(const char *cgroup, void *arg)
64 {
65 	size_t size = (size_t)arg;
66 	char *mem = (char *)malloc(size);
67 	int ret = 0;
68 
69 	if (!mem)
70 		return -1;
71 	for (int i = 0; i < size; i += 4095)
72 		mem[i] = 'a';
73 
74 	/* Go through the allocated memory to (z)swap in and out pages */
75 	for (int i = 0; i < size; i += 4095) {
76 		if (mem[i] != 'a')
77 			ret = -1;
78 	}
79 
80 	free(mem);
81 	return ret;
82 }
83 
84 static int allocate_bytes(const char *cgroup, void *arg)
85 {
86 	size_t size = (size_t)arg;
87 	char *mem = (char *)malloc(size);
88 
89 	if (!mem)
90 		return -1;
91 	for (int i = 0; i < size; i += 4095)
92 		mem[i] = 'a';
93 	free(mem);
94 	return 0;
95 }
96 
97 static char *setup_test_group_1M(const char *root, const char *name)
98 {
99 	char *group_name = cg_name(root, name);
100 
101 	if (!group_name)
102 		return NULL;
103 	if (cg_create(group_name))
104 		goto fail;
105 	if (cg_write(group_name, "memory.max", "1M")) {
106 		cg_destroy(group_name);
107 		goto fail;
108 	}
109 	return group_name;
110 fail:
111 	free(group_name);
112 	return NULL;
113 }
114 
115 /*
116  * Sanity test to check that pages are written into zswap.
117  */
118 static int test_zswap_usage(const char *root)
119 {
120 	long zswpout_before, zswpout_after;
121 	int ret = KSFT_FAIL;
122 	char *test_group;
123 
124 	test_group = cg_name(root, "no_shrink_test");
125 	if (!test_group)
126 		goto out;
127 	if (cg_create(test_group))
128 		goto out;
129 	if (cg_write(test_group, "memory.max", "1M"))
130 		goto out;
131 
132 	zswpout_before = get_zswpout(test_group);
133 	if (zswpout_before < 0) {
134 		ksft_print_msg("Failed to get zswpout\n");
135 		goto out;
136 	}
137 
138 	/* Allocate more than memory.max to push memory into zswap */
139 	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
140 		goto out;
141 
142 	/* Verify that pages come into zswap */
143 	zswpout_after = get_zswpout(test_group);
144 	if (zswpout_after <= zswpout_before) {
145 		ksft_print_msg("zswpout does not increase after test program\n");
146 		goto out;
147 	}
148 	ret = KSFT_PASS;
149 
150 out:
151 	cg_destroy(test_group);
152 	free(test_group);
153 	return ret;
154 }
155 
156 /*
157  * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158  * the cgroup.
159  */
160 static int test_swapin_nozswap(const char *root)
161 {
162 	int ret = KSFT_FAIL;
163 	char *test_group;
164 	long swap_peak, zswpout;
165 
166 	test_group = cg_name(root, "no_zswap_test");
167 	if (!test_group)
168 		goto out;
169 	if (cg_create(test_group))
170 		goto out;
171 	if (cg_write(test_group, "memory.max", "8M"))
172 		goto out;
173 	if (cg_write(test_group, "memory.zswap.max", "0"))
174 		goto out;
175 
176 	/* Allocate and read more than memory.max to trigger swapin */
177 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178 		goto out;
179 
180 	/* Verify that pages are swapped out, but no zswap happened */
181 	swap_peak = cg_read_long(test_group, "memory.swap.peak");
182 	if (swap_peak < 0) {
183 		ksft_print_msg("failed to get cgroup's swap_peak\n");
184 		goto out;
185 	}
186 
187 	if (swap_peak < MB(24)) {
188 		ksft_print_msg("at least 24MB of memory should be swapped out\n");
189 		goto out;
190 	}
191 
192 	zswpout = get_zswpout(test_group);
193 	if (zswpout < 0) {
194 		ksft_print_msg("failed to get zswpout\n");
195 		goto out;
196 	}
197 
198 	if (zswpout > 0) {
199 		ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200 		goto out;
201 	}
202 
203 	ret = KSFT_PASS;
204 
205 out:
206 	cg_destroy(test_group);
207 	free(test_group);
208 	return ret;
209 }
210 
211 /* Simple test to verify the (z)swapin code paths */
212 static int test_zswapin(const char *root)
213 {
214 	int ret = KSFT_FAIL;
215 	char *test_group;
216 	long zswpin;
217 
218 	test_group = cg_name(root, "zswapin_test");
219 	if (!test_group)
220 		goto out;
221 	if (cg_create(test_group))
222 		goto out;
223 	if (cg_write(test_group, "memory.max", "8M"))
224 		goto out;
225 	if (cg_write(test_group, "memory.zswap.max", "max"))
226 		goto out;
227 
228 	/* Allocate and read more than memory.max to trigger (z)swap in */
229 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230 		goto out;
231 
232 	zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233 	if (zswpin < 0) {
234 		ksft_print_msg("failed to get zswpin\n");
235 		goto out;
236 	}
237 
238 	if (zswpin < MB(24) / PAGE_SIZE) {
239 		ksft_print_msg("at least 24MB should be brought back from zswap\n");
240 		goto out;
241 	}
242 
243 	ret = KSFT_PASS;
244 
245 out:
246 	cg_destroy(test_group);
247 	free(test_group);
248 	return ret;
249 }
250 
251 /*
252  * When trying to store a memcg page in zswap, if the memcg hits its memory
253  * limit in zswap, writeback should affect only the zswapped pages of that
254  * memcg.
255  */
256 static int test_no_invasive_cgroup_shrink(const char *root)
257 {
258 	int ret = KSFT_FAIL;
259 	size_t control_allocation_size = MB(10);
260 	char *control_allocation, *wb_group = NULL, *control_group = NULL;
261 
262 	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
263 	if (!wb_group)
264 		return KSFT_FAIL;
265 	if (cg_write(wb_group, "memory.zswap.max", "10K"))
266 		goto out;
267 	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
268 	if (!control_group)
269 		goto out;
270 
271 	/* Push some test_group2 memory into zswap */
272 	if (cg_enter_current(control_group))
273 		goto out;
274 	control_allocation = malloc(control_allocation_size);
275 	for (int i = 0; i < control_allocation_size; i += 4095)
276 		control_allocation[i] = 'a';
277 	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
278 		goto out;
279 
280 	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
281 	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
282 		goto out;
283 
284 	/* Verify that only zswapped memory from gwb_group has been written back */
285 	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
286 		ret = KSFT_PASS;
287 out:
288 	cg_enter_current(root);
289 	if (control_group) {
290 		cg_destroy(control_group);
291 		free(control_group);
292 	}
293 	cg_destroy(wb_group);
294 	free(wb_group);
295 	if (control_allocation)
296 		free(control_allocation);
297 	return ret;
298 }
299 
300 struct no_kmem_bypass_child_args {
301 	size_t target_alloc_bytes;
302 	size_t child_allocated;
303 };
304 
305 static int no_kmem_bypass_child(const char *cgroup, void *arg)
306 {
307 	struct no_kmem_bypass_child_args *values = arg;
308 	void *allocation;
309 
310 	allocation = malloc(values->target_alloc_bytes);
311 	if (!allocation) {
312 		values->child_allocated = true;
313 		return -1;
314 	}
315 	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
316 		((char *)allocation)[i] = 'a';
317 	values->child_allocated = true;
318 	pause();
319 	free(allocation);
320 	return 0;
321 }
322 
323 /*
324  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
325  * charged to that cgroup. This wasn't the case before commit
326  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
327  *
328  * The test first allocates memory in a memcg, then raises min_free_kbytes to
329  * a very high value so that the allocation falls below low wm, then makes
330  * another allocation to trigger kswapd that should push the memcg-owned pages
331  * to zswap and verifies that the zswap pages are correctly charged.
332  *
333  * To be run on a VM with at most 4G of memory.
334  */
335 static int test_no_kmem_bypass(const char *root)
336 {
337 	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
338 	struct no_kmem_bypass_child_args *values;
339 	size_t trigger_allocation_size;
340 	int wait_child_iteration = 0;
341 	long stored_pages_threshold;
342 	struct sysinfo sys_info;
343 	int ret = KSFT_FAIL;
344 	int child_status;
345 	char *test_group;
346 	pid_t child_pid;
347 
348 	/* Read sys info and compute test values accordingly */
349 	if (sysinfo(&sys_info) != 0)
350 		return KSFT_FAIL;
351 	if (sys_info.totalram > 5000000000)
352 		return KSFT_SKIP;
353 	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
354 			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
355 	if (values == MAP_FAILED)
356 		return KSFT_FAIL;
357 	if (read_min_free_kb(&min_free_kb_original))
358 		return KSFT_FAIL;
359 	min_free_kb_high = sys_info.totalram / 2000;
360 	min_free_kb_low = sys_info.totalram / 500000;
361 	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
362 		sys_info.totalram * 5 / 100;
363 	stored_pages_threshold = sys_info.totalram / 5 / 4096;
364 	trigger_allocation_size = sys_info.totalram / 20;
365 
366 	/* Set up test memcg */
367 	if (cg_write(root, "cgroup.subtree_control", "+memory"))
368 		goto out;
369 	test_group = cg_name(root, "kmem_bypass_test");
370 	if (!test_group)
371 		goto out;
372 
373 	/* Spawn memcg child and wait for it to allocate */
374 	set_min_free_kb(min_free_kb_low);
375 	if (cg_create(test_group))
376 		goto out;
377 	values->child_allocated = false;
378 	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
379 	if (child_pid < 0)
380 		goto out;
381 	while (!values->child_allocated && wait_child_iteration++ < 10000)
382 		usleep(1000);
383 
384 	/* Try to wakeup kswapd and let it push child memory to zswap */
385 	set_min_free_kb(min_free_kb_high);
386 	for (int i = 0; i < 20; i++) {
387 		size_t stored_pages;
388 		char *trigger_allocation = malloc(trigger_allocation_size);
389 
390 		if (!trigger_allocation)
391 			break;
392 		for (int i = 0; i < trigger_allocation_size; i += 4095)
393 			trigger_allocation[i] = 'b';
394 		usleep(100000);
395 		free(trigger_allocation);
396 		if (get_zswap_stored_pages(&stored_pages))
397 			break;
398 		if (stored_pages < 0)
399 			break;
400 		/* If memory was pushed to zswap, verify it belongs to memcg */
401 		if (stored_pages > stored_pages_threshold) {
402 			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
403 			int delta = stored_pages * 4096 - zswapped;
404 			int result_ok = delta < stored_pages * 4096 / 4;
405 
406 			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
407 			break;
408 		}
409 	}
410 
411 	kill(child_pid, SIGTERM);
412 	waitpid(child_pid, &child_status, 0);
413 out:
414 	set_min_free_kb(min_free_kb_original);
415 	cg_destroy(test_group);
416 	free(test_group);
417 	return ret;
418 }
419 
420 #define T(x) { x, #x }
421 struct zswap_test {
422 	int (*fn)(const char *root);
423 	const char *name;
424 } tests[] = {
425 	T(test_zswap_usage),
426 	T(test_swapin_nozswap),
427 	T(test_zswapin),
428 	T(test_no_kmem_bypass),
429 	T(test_no_invasive_cgroup_shrink),
430 };
431 #undef T
432 
433 static bool zswap_configured(void)
434 {
435 	return access("/sys/module/zswap", F_OK) == 0;
436 }
437 
438 int main(int argc, char **argv)
439 {
440 	char root[PATH_MAX];
441 	int i, ret = EXIT_SUCCESS;
442 
443 	if (cg_find_unified_root(root, sizeof(root)))
444 		ksft_exit_skip("cgroup v2 isn't mounted\n");
445 
446 	if (!zswap_configured())
447 		ksft_exit_skip("zswap isn't configured\n");
448 
449 	/*
450 	 * Check that memory controller is available:
451 	 * memory is listed in cgroup.controllers
452 	 */
453 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
454 		ksft_exit_skip("memory controller isn't available\n");
455 
456 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
457 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
458 			ksft_exit_skip("Failed to set memory controller\n");
459 
460 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
461 		switch (tests[i].fn(root)) {
462 		case KSFT_PASS:
463 			ksft_test_result_pass("%s\n", tests[i].name);
464 			break;
465 		case KSFT_SKIP:
466 			ksft_test_result_skip("%s\n", tests[i].name);
467 			break;
468 		default:
469 			ret = EXIT_FAILURE;
470 			ksft_test_result_fail("%s\n", tests[i].name);
471 			break;
472 		}
473 	}
474 
475 	return ret;
476 }
477