xref: /linux/tools/testing/selftests/cgroup/test_zswap.c (revision d53b8e36925256097a08d7cb749198d85cbf9b2b)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12 
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15 
16 static int read_int(const char *path, size_t *value)
17 {
18 	FILE *file;
19 	int ret = 0;
20 
21 	file = fopen(path, "r");
22 	if (!file)
23 		return -1;
24 	if (fscanf(file, "%ld", value) != 1)
25 		ret = -1;
26 	fclose(file);
27 	return ret;
28 }
29 
30 static int set_min_free_kb(size_t value)
31 {
32 	FILE *file;
33 	int ret;
34 
35 	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 	if (!file)
37 		return -1;
38 	ret = fprintf(file, "%ld\n", value);
39 	fclose(file);
40 	return ret;
41 }
42 
43 static int read_min_free_kb(size_t *value)
44 {
45 	return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47 
48 static int get_zswap_stored_pages(size_t *value)
49 {
50 	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52 
53 static long get_cg_wb_count(const char *cg)
54 {
55 	return cg_read_key_long(cg, "memory.stat", "zswpwb");
56 }
57 
58 static long get_zswpout(const char *cgroup)
59 {
60 	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61 }
62 
63 static int allocate_and_read_bytes(const char *cgroup, void *arg)
64 {
65 	size_t size = (size_t)arg;
66 	char *mem = (char *)malloc(size);
67 	int ret = 0;
68 
69 	if (!mem)
70 		return -1;
71 	for (int i = 0; i < size; i += 4095)
72 		mem[i] = 'a';
73 
74 	/* Go through the allocated memory to (z)swap in and out pages */
75 	for (int i = 0; i < size; i += 4095) {
76 		if (mem[i] != 'a')
77 			ret = -1;
78 	}
79 
80 	free(mem);
81 	return ret;
82 }
83 
84 static int allocate_bytes(const char *cgroup, void *arg)
85 {
86 	size_t size = (size_t)arg;
87 	char *mem = (char *)malloc(size);
88 
89 	if (!mem)
90 		return -1;
91 	for (int i = 0; i < size; i += 4095)
92 		mem[i] = 'a';
93 	free(mem);
94 	return 0;
95 }
96 
97 static char *setup_test_group_1M(const char *root, const char *name)
98 {
99 	char *group_name = cg_name(root, name);
100 
101 	if (!group_name)
102 		return NULL;
103 	if (cg_create(group_name))
104 		goto fail;
105 	if (cg_write(group_name, "memory.max", "1M")) {
106 		cg_destroy(group_name);
107 		goto fail;
108 	}
109 	return group_name;
110 fail:
111 	free(group_name);
112 	return NULL;
113 }
114 
115 /*
116  * Sanity test to check that pages are written into zswap.
117  */
118 static int test_zswap_usage(const char *root)
119 {
120 	long zswpout_before, zswpout_after;
121 	int ret = KSFT_FAIL;
122 	char *test_group;
123 
124 	test_group = cg_name(root, "no_shrink_test");
125 	if (!test_group)
126 		goto out;
127 	if (cg_create(test_group))
128 		goto out;
129 	if (cg_write(test_group, "memory.max", "1M"))
130 		goto out;
131 
132 	zswpout_before = get_zswpout(test_group);
133 	if (zswpout_before < 0) {
134 		ksft_print_msg("Failed to get zswpout\n");
135 		goto out;
136 	}
137 
138 	/* Allocate more than memory.max to push memory into zswap */
139 	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
140 		goto out;
141 
142 	/* Verify that pages come into zswap */
143 	zswpout_after = get_zswpout(test_group);
144 	if (zswpout_after <= zswpout_before) {
145 		ksft_print_msg("zswpout does not increase after test program\n");
146 		goto out;
147 	}
148 	ret = KSFT_PASS;
149 
150 out:
151 	cg_destroy(test_group);
152 	free(test_group);
153 	return ret;
154 }
155 
156 /*
157  * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158  * the cgroup.
159  */
160 static int test_swapin_nozswap(const char *root)
161 {
162 	int ret = KSFT_FAIL;
163 	char *test_group;
164 	long swap_peak, zswpout;
165 
166 	test_group = cg_name(root, "no_zswap_test");
167 	if (!test_group)
168 		goto out;
169 	if (cg_create(test_group))
170 		goto out;
171 	if (cg_write(test_group, "memory.max", "8M"))
172 		goto out;
173 	if (cg_write(test_group, "memory.zswap.max", "0"))
174 		goto out;
175 
176 	/* Allocate and read more than memory.max to trigger swapin */
177 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178 		goto out;
179 
180 	/* Verify that pages are swapped out, but no zswap happened */
181 	swap_peak = cg_read_long(test_group, "memory.swap.peak");
182 	if (swap_peak < 0) {
183 		ksft_print_msg("failed to get cgroup's swap_peak\n");
184 		goto out;
185 	}
186 
187 	if (swap_peak < MB(24)) {
188 		ksft_print_msg("at least 24MB of memory should be swapped out\n");
189 		goto out;
190 	}
191 
192 	zswpout = get_zswpout(test_group);
193 	if (zswpout < 0) {
194 		ksft_print_msg("failed to get zswpout\n");
195 		goto out;
196 	}
197 
198 	if (zswpout > 0) {
199 		ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200 		goto out;
201 	}
202 
203 	ret = KSFT_PASS;
204 
205 out:
206 	cg_destroy(test_group);
207 	free(test_group);
208 	return ret;
209 }
210 
211 /* Simple test to verify the (z)swapin code paths */
212 static int test_zswapin(const char *root)
213 {
214 	int ret = KSFT_FAIL;
215 	char *test_group;
216 	long zswpin;
217 
218 	test_group = cg_name(root, "zswapin_test");
219 	if (!test_group)
220 		goto out;
221 	if (cg_create(test_group))
222 		goto out;
223 	if (cg_write(test_group, "memory.max", "8M"))
224 		goto out;
225 	if (cg_write(test_group, "memory.zswap.max", "max"))
226 		goto out;
227 
228 	/* Allocate and read more than memory.max to trigger (z)swap in */
229 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230 		goto out;
231 
232 	zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233 	if (zswpin < 0) {
234 		ksft_print_msg("failed to get zswpin\n");
235 		goto out;
236 	}
237 
238 	if (zswpin < MB(24) / PAGE_SIZE) {
239 		ksft_print_msg("at least 24MB should be brought back from zswap\n");
240 		goto out;
241 	}
242 
243 	ret = KSFT_PASS;
244 
245 out:
246 	cg_destroy(test_group);
247 	free(test_group);
248 	return ret;
249 }
250 
251 /*
252  * Attempt writeback with the following steps:
253  * 1. Allocate memory.
254  * 2. Reclaim memory equal to the amount that was allocated in step 1.
255       This will move it into zswap.
256  * 3. Save current zswap usage.
257  * 4. Move the memory allocated in step 1 back in from zswap.
258  * 5. Set zswap.max to half the amount that was recorded in step 3.
259  * 6. Attempt to reclaim memory equal to the amount that was allocated,
260       this will either trigger writeback if it's enabled, or reclamation
261       will fail if writeback is disabled as there isn't enough zswap space.
262  */
263 static int attempt_writeback(const char *cgroup, void *arg)
264 {
265 	long pagesize = sysconf(_SC_PAGESIZE);
266 	char *test_group = arg;
267 	size_t memsize = MB(4);
268 	char buf[pagesize];
269 	long zswap_usage;
270 	bool wb_enabled;
271 	int ret = -1;
272 	char *mem;
273 
274 	wb_enabled = cg_read_long(test_group, "memory.zswap.writeback");
275 	mem = (char *)malloc(memsize);
276 	if (!mem)
277 		return ret;
278 
279 	/*
280 	 * Fill half of each page with increasing data, and keep other
281 	 * half empty, this will result in data that is still compressible
282 	 * and ends up in zswap, with material zswap usage.
283 	 */
284 	for (int i = 0; i < pagesize; i++)
285 		buf[i] = i < pagesize/2 ? (char) i : 0;
286 
287 	for (int i = 0; i < memsize; i += pagesize)
288 		memcpy(&mem[i], buf, pagesize);
289 
290 	/* Try and reclaim allocated memory */
291 	if (cg_write_numeric(test_group, "memory.reclaim", memsize)) {
292 		ksft_print_msg("Failed to reclaim all of the requested memory\n");
293 		goto out;
294 	}
295 
296 	zswap_usage = cg_read_long(test_group, "memory.zswap.current");
297 
298 	/* zswpin */
299 	for (int i = 0; i < memsize; i += pagesize) {
300 		if (memcmp(&mem[i], buf, pagesize)) {
301 			ksft_print_msg("invalid memory\n");
302 			goto out;
303 		}
304 	}
305 
306 	if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2))
307 		goto out;
308 
309 	/*
310 	 * If writeback is enabled, trying to reclaim memory now will trigger a
311 	 * writeback as zswap.max is half of what was needed when reclaim ran the first time.
312 	 * If writeback is disabled, memory reclaim will fail as zswap is limited and
313 	 * it can't writeback to swap.
314 	 */
315 	ret = cg_write_numeric(test_group, "memory.reclaim", memsize);
316 	if (!wb_enabled)
317 		ret = (ret == -EAGAIN) ? 0 : -1;
318 
319 out:
320 	free(mem);
321 	return ret;
322 }
323 
324 /* Test to verify the zswap writeback path */
325 static int test_zswap_writeback(const char *root, bool wb)
326 {
327 	long zswpwb_before, zswpwb_after;
328 	int ret = KSFT_FAIL;
329 	char *test_group;
330 
331 	test_group = cg_name(root, "zswap_writeback_test");
332 	if (!test_group)
333 		goto out;
334 	if (cg_create(test_group))
335 		goto out;
336 	if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
337 		goto out;
338 
339 	zswpwb_before = get_cg_wb_count(test_group);
340 	if (zswpwb_before != 0) {
341 		ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
342 		goto out;
343 	}
344 
345 	if (cg_run(test_group, attempt_writeback, (void *) test_group))
346 		goto out;
347 
348 	/* Verify that zswap writeback occurred only if writeback was enabled */
349 	zswpwb_after = get_cg_wb_count(test_group);
350 	if (zswpwb_after < 0)
351 		goto out;
352 
353 	if (wb != !!zswpwb_after) {
354 		ksft_print_msg("zswpwb_after is %ld while wb is %s",
355 				zswpwb_after, wb ? "enabled" : "disabled");
356 		goto out;
357 	}
358 
359 	ret = KSFT_PASS;
360 
361 out:
362 	cg_destroy(test_group);
363 	free(test_group);
364 	return ret;
365 }
366 
367 static int test_zswap_writeback_enabled(const char *root)
368 {
369 	return test_zswap_writeback(root, true);
370 }
371 
372 static int test_zswap_writeback_disabled(const char *root)
373 {
374 	return test_zswap_writeback(root, false);
375 }
376 
377 /*
378  * When trying to store a memcg page in zswap, if the memcg hits its memory
379  * limit in zswap, writeback should affect only the zswapped pages of that
380  * memcg.
381  */
382 static int test_no_invasive_cgroup_shrink(const char *root)
383 {
384 	int ret = KSFT_FAIL;
385 	size_t control_allocation_size = MB(10);
386 	char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
387 
388 	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
389 	if (!wb_group)
390 		return KSFT_FAIL;
391 	if (cg_write(wb_group, "memory.zswap.max", "10K"))
392 		goto out;
393 	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
394 	if (!control_group)
395 		goto out;
396 
397 	/* Push some test_group2 memory into zswap */
398 	if (cg_enter_current(control_group))
399 		goto out;
400 	control_allocation = malloc(control_allocation_size);
401 	for (int i = 0; i < control_allocation_size; i += 4095)
402 		control_allocation[i] = 'a';
403 	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
404 		goto out;
405 
406 	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
407 	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
408 		goto out;
409 
410 	/* Verify that only zswapped memory from gwb_group has been written back */
411 	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
412 		ret = KSFT_PASS;
413 out:
414 	cg_enter_current(root);
415 	if (control_group) {
416 		cg_destroy(control_group);
417 		free(control_group);
418 	}
419 	cg_destroy(wb_group);
420 	free(wb_group);
421 	if (control_allocation)
422 		free(control_allocation);
423 	return ret;
424 }
425 
426 struct no_kmem_bypass_child_args {
427 	size_t target_alloc_bytes;
428 	size_t child_allocated;
429 };
430 
431 static int no_kmem_bypass_child(const char *cgroup, void *arg)
432 {
433 	struct no_kmem_bypass_child_args *values = arg;
434 	void *allocation;
435 
436 	allocation = malloc(values->target_alloc_bytes);
437 	if (!allocation) {
438 		values->child_allocated = true;
439 		return -1;
440 	}
441 	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
442 		((char *)allocation)[i] = 'a';
443 	values->child_allocated = true;
444 	pause();
445 	free(allocation);
446 	return 0;
447 }
448 
449 /*
450  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
451  * charged to that cgroup. This wasn't the case before commit
452  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
453  *
454  * The test first allocates memory in a memcg, then raises min_free_kbytes to
455  * a very high value so that the allocation falls below low wm, then makes
456  * another allocation to trigger kswapd that should push the memcg-owned pages
457  * to zswap and verifies that the zswap pages are correctly charged.
458  *
459  * To be run on a VM with at most 4G of memory.
460  */
461 static int test_no_kmem_bypass(const char *root)
462 {
463 	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
464 	struct no_kmem_bypass_child_args *values;
465 	size_t trigger_allocation_size;
466 	int wait_child_iteration = 0;
467 	long stored_pages_threshold;
468 	struct sysinfo sys_info;
469 	int ret = KSFT_FAIL;
470 	int child_status;
471 	char *test_group = NULL;
472 	pid_t child_pid;
473 
474 	/* Read sys info and compute test values accordingly */
475 	if (sysinfo(&sys_info) != 0)
476 		return KSFT_FAIL;
477 	if (sys_info.totalram > 5000000000)
478 		return KSFT_SKIP;
479 	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
480 			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
481 	if (values == MAP_FAILED)
482 		return KSFT_FAIL;
483 	if (read_min_free_kb(&min_free_kb_original))
484 		return KSFT_FAIL;
485 	min_free_kb_high = sys_info.totalram / 2000;
486 	min_free_kb_low = sys_info.totalram / 500000;
487 	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
488 		sys_info.totalram * 5 / 100;
489 	stored_pages_threshold = sys_info.totalram / 5 / 4096;
490 	trigger_allocation_size = sys_info.totalram / 20;
491 
492 	/* Set up test memcg */
493 	test_group = cg_name(root, "kmem_bypass_test");
494 	if (!test_group)
495 		goto out;
496 
497 	/* Spawn memcg child and wait for it to allocate */
498 	set_min_free_kb(min_free_kb_low);
499 	if (cg_create(test_group))
500 		goto out;
501 	values->child_allocated = false;
502 	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
503 	if (child_pid < 0)
504 		goto out;
505 	while (!values->child_allocated && wait_child_iteration++ < 10000)
506 		usleep(1000);
507 
508 	/* Try to wakeup kswapd and let it push child memory to zswap */
509 	set_min_free_kb(min_free_kb_high);
510 	for (int i = 0; i < 20; i++) {
511 		size_t stored_pages;
512 		char *trigger_allocation = malloc(trigger_allocation_size);
513 
514 		if (!trigger_allocation)
515 			break;
516 		for (int i = 0; i < trigger_allocation_size; i += 4095)
517 			trigger_allocation[i] = 'b';
518 		usleep(100000);
519 		free(trigger_allocation);
520 		if (get_zswap_stored_pages(&stored_pages))
521 			break;
522 		if (stored_pages < 0)
523 			break;
524 		/* If memory was pushed to zswap, verify it belongs to memcg */
525 		if (stored_pages > stored_pages_threshold) {
526 			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
527 			int delta = stored_pages * 4096 - zswapped;
528 			int result_ok = delta < stored_pages * 4096 / 4;
529 
530 			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
531 			break;
532 		}
533 	}
534 
535 	kill(child_pid, SIGTERM);
536 	waitpid(child_pid, &child_status, 0);
537 out:
538 	set_min_free_kb(min_free_kb_original);
539 	cg_destroy(test_group);
540 	free(test_group);
541 	return ret;
542 }
543 
544 #define T(x) { x, #x }
545 struct zswap_test {
546 	int (*fn)(const char *root);
547 	const char *name;
548 } tests[] = {
549 	T(test_zswap_usage),
550 	T(test_swapin_nozswap),
551 	T(test_zswapin),
552 	T(test_zswap_writeback_enabled),
553 	T(test_zswap_writeback_disabled),
554 	T(test_no_kmem_bypass),
555 	T(test_no_invasive_cgroup_shrink),
556 };
557 #undef T
558 
559 static bool zswap_configured(void)
560 {
561 	return access("/sys/module/zswap", F_OK) == 0;
562 }
563 
564 int main(int argc, char **argv)
565 {
566 	char root[PATH_MAX];
567 	int i, ret = EXIT_SUCCESS;
568 
569 	if (cg_find_unified_root(root, sizeof(root), NULL))
570 		ksft_exit_skip("cgroup v2 isn't mounted\n");
571 
572 	if (!zswap_configured())
573 		ksft_exit_skip("zswap isn't configured\n");
574 
575 	/*
576 	 * Check that memory controller is available:
577 	 * memory is listed in cgroup.controllers
578 	 */
579 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
580 		ksft_exit_skip("memory controller isn't available\n");
581 
582 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
583 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
584 			ksft_exit_skip("Failed to set memory controller\n");
585 
586 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
587 		switch (tests[i].fn(root)) {
588 		case KSFT_PASS:
589 			ksft_test_result_pass("%s\n", tests[i].name);
590 			break;
591 		case KSFT_SKIP:
592 			ksft_test_result_skip("%s\n", tests[i].name);
593 			break;
594 		default:
595 			ret = EXIT_FAILURE;
596 			ksft_test_result_fail("%s\n", tests[i].name);
597 			break;
598 		}
599 	}
600 
601 	return ret;
602 }
603