xref: /linux/tools/testing/selftests/cgroup/test_zswap.c (revision 61307b7be41a1f1039d1d1368810a1d92cb97b44)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/limits.h>
3 #include <unistd.h>
4 #include <stdio.h>
5 #include <signal.h>
6 #include <sys/sysinfo.h>
7 #include <string.h>
8 #include <sys/wait.h>
9 #include <sys/mman.h>
10 
11 #include "../kselftest.h"
12 #include "cgroup_util.h"
13 
14 static int read_int(const char *path, size_t *value)
15 {
16 	FILE *file;
17 	int ret = 0;
18 
19 	file = fopen(path, "r");
20 	if (!file)
21 		return -1;
22 	if (fscanf(file, "%ld", value) != 1)
23 		ret = -1;
24 	fclose(file);
25 	return ret;
26 }
27 
28 static int set_min_free_kb(size_t value)
29 {
30 	FILE *file;
31 	int ret;
32 
33 	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
34 	if (!file)
35 		return -1;
36 	ret = fprintf(file, "%ld\n", value);
37 	fclose(file);
38 	return ret;
39 }
40 
41 static int read_min_free_kb(size_t *value)
42 {
43 	return read_int("/proc/sys/vm/min_free_kbytes", value);
44 }
45 
46 static int get_zswap_stored_pages(size_t *value)
47 {
48 	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
49 }
50 
51 static long get_cg_wb_count(const char *cg)
52 {
53 	return cg_read_key_long(cg, "memory.stat", "zswpwb");
54 }
55 
56 static long get_zswpout(const char *cgroup)
57 {
58 	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
59 }
60 
61 static int allocate_and_read_bytes(const char *cgroup, void *arg)
62 {
63 	size_t size = (size_t)arg;
64 	char *mem = (char *)malloc(size);
65 	int ret = 0;
66 
67 	if (!mem)
68 		return -1;
69 	for (int i = 0; i < size; i += 4095)
70 		mem[i] = 'a';
71 
72 	/* Go through the allocated memory to (z)swap in and out pages */
73 	for (int i = 0; i < size; i += 4095) {
74 		if (mem[i] != 'a')
75 			ret = -1;
76 	}
77 
78 	free(mem);
79 	return ret;
80 }
81 
82 static int allocate_bytes(const char *cgroup, void *arg)
83 {
84 	size_t size = (size_t)arg;
85 	char *mem = (char *)malloc(size);
86 
87 	if (!mem)
88 		return -1;
89 	for (int i = 0; i < size; i += 4095)
90 		mem[i] = 'a';
91 	free(mem);
92 	return 0;
93 }
94 
95 static char *setup_test_group_1M(const char *root, const char *name)
96 {
97 	char *group_name = cg_name(root, name);
98 
99 	if (!group_name)
100 		return NULL;
101 	if (cg_create(group_name))
102 		goto fail;
103 	if (cg_write(group_name, "memory.max", "1M")) {
104 		cg_destroy(group_name);
105 		goto fail;
106 	}
107 	return group_name;
108 fail:
109 	free(group_name);
110 	return NULL;
111 }
112 
113 /*
114  * Sanity test to check that pages are written into zswap.
115  */
116 static int test_zswap_usage(const char *root)
117 {
118 	long zswpout_before, zswpout_after;
119 	int ret = KSFT_FAIL;
120 	char *test_group;
121 
122 	test_group = cg_name(root, "no_shrink_test");
123 	if (!test_group)
124 		goto out;
125 	if (cg_create(test_group))
126 		goto out;
127 	if (cg_write(test_group, "memory.max", "1M"))
128 		goto out;
129 
130 	zswpout_before = get_zswpout(test_group);
131 	if (zswpout_before < 0) {
132 		ksft_print_msg("Failed to get zswpout\n");
133 		goto out;
134 	}
135 
136 	/* Allocate more than memory.max to push memory into zswap */
137 	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
138 		goto out;
139 
140 	/* Verify that pages come into zswap */
141 	zswpout_after = get_zswpout(test_group);
142 	if (zswpout_after <= zswpout_before) {
143 		ksft_print_msg("zswpout does not increase after test program\n");
144 		goto out;
145 	}
146 	ret = KSFT_PASS;
147 
148 out:
149 	cg_destroy(test_group);
150 	free(test_group);
151 	return ret;
152 }
153 
154 /*
155  * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
156  * the cgroup.
157  */
158 static int test_swapin_nozswap(const char *root)
159 {
160 	int ret = KSFT_FAIL;
161 	char *test_group;
162 	long swap_peak, zswpout;
163 
164 	test_group = cg_name(root, "no_zswap_test");
165 	if (!test_group)
166 		goto out;
167 	if (cg_create(test_group))
168 		goto out;
169 	if (cg_write(test_group, "memory.max", "8M"))
170 		goto out;
171 	if (cg_write(test_group, "memory.zswap.max", "0"))
172 		goto out;
173 
174 	/* Allocate and read more than memory.max to trigger swapin */
175 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
176 		goto out;
177 
178 	/* Verify that pages are swapped out, but no zswap happened */
179 	swap_peak = cg_read_long(test_group, "memory.swap.peak");
180 	if (swap_peak < 0) {
181 		ksft_print_msg("failed to get cgroup's swap_peak\n");
182 		goto out;
183 	}
184 
185 	if (swap_peak < MB(24)) {
186 		ksft_print_msg("at least 24MB of memory should be swapped out\n");
187 		goto out;
188 	}
189 
190 	zswpout = get_zswpout(test_group);
191 	if (zswpout < 0) {
192 		ksft_print_msg("failed to get zswpout\n");
193 		goto out;
194 	}
195 
196 	if (zswpout > 0) {
197 		ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
198 		goto out;
199 	}
200 
201 	ret = KSFT_PASS;
202 
203 out:
204 	cg_destroy(test_group);
205 	free(test_group);
206 	return ret;
207 }
208 
209 /* Simple test to verify the (z)swapin code paths */
210 static int test_zswapin(const char *root)
211 {
212 	int ret = KSFT_FAIL;
213 	char *test_group;
214 	long zswpin;
215 
216 	test_group = cg_name(root, "zswapin_test");
217 	if (!test_group)
218 		goto out;
219 	if (cg_create(test_group))
220 		goto out;
221 	if (cg_write(test_group, "memory.max", "8M"))
222 		goto out;
223 	if (cg_write(test_group, "memory.zswap.max", "max"))
224 		goto out;
225 
226 	/* Allocate and read more than memory.max to trigger (z)swap in */
227 	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
228 		goto out;
229 
230 	zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
231 	if (zswpin < 0) {
232 		ksft_print_msg("failed to get zswpin\n");
233 		goto out;
234 	}
235 
236 	if (zswpin < MB(24) / PAGE_SIZE) {
237 		ksft_print_msg("at least 24MB should be brought back from zswap\n");
238 		goto out;
239 	}
240 
241 	ret = KSFT_PASS;
242 
243 out:
244 	cg_destroy(test_group);
245 	free(test_group);
246 	return ret;
247 }
248 
249 /*
250  * Attempt writeback with the following steps:
251  * 1. Allocate memory.
252  * 2. Reclaim memory equal to the amount that was allocated in step 1.
253       This will move it into zswap.
254  * 3. Save current zswap usage.
255  * 4. Move the memory allocated in step 1 back in from zswap.
256  * 5. Set zswap.max to half the amount that was recorded in step 3.
257  * 6. Attempt to reclaim memory equal to the amount that was allocated,
258       this will either trigger writeback if it's enabled, or reclamation
259       will fail if writeback is disabled as there isn't enough zswap space.
260  */
261 static int attempt_writeback(const char *cgroup, void *arg)
262 {
263 	long pagesize = sysconf(_SC_PAGESIZE);
264 	char *test_group = arg;
265 	size_t memsize = MB(4);
266 	char buf[pagesize];
267 	long zswap_usage;
268 	bool wb_enabled;
269 	int ret = -1;
270 	char *mem;
271 
272 	wb_enabled = cg_read_long(test_group, "memory.zswap.writeback");
273 	mem = (char *)malloc(memsize);
274 	if (!mem)
275 		return ret;
276 
277 	/*
278 	 * Fill half of each page with increasing data, and keep other
279 	 * half empty, this will result in data that is still compressible
280 	 * and ends up in zswap, with material zswap usage.
281 	 */
282 	for (int i = 0; i < pagesize; i++)
283 		buf[i] = i < pagesize/2 ? (char) i : 0;
284 
285 	for (int i = 0; i < memsize; i += pagesize)
286 		memcpy(&mem[i], buf, pagesize);
287 
288 	/* Try and reclaim allocated memory */
289 	if (cg_write_numeric(test_group, "memory.reclaim", memsize)) {
290 		ksft_print_msg("Failed to reclaim all of the requested memory\n");
291 		goto out;
292 	}
293 
294 	zswap_usage = cg_read_long(test_group, "memory.zswap.current");
295 
296 	/* zswpin */
297 	for (int i = 0; i < memsize; i += pagesize) {
298 		if (memcmp(&mem[i], buf, pagesize)) {
299 			ksft_print_msg("invalid memory\n");
300 			goto out;
301 		}
302 	}
303 
304 	if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2))
305 		goto out;
306 
307 	/*
308 	 * If writeback is enabled, trying to reclaim memory now will trigger a
309 	 * writeback as zswap.max is half of what was needed when reclaim ran the first time.
310 	 * If writeback is disabled, memory reclaim will fail as zswap is limited and
311 	 * it can't writeback to swap.
312 	 */
313 	ret = cg_write_numeric(test_group, "memory.reclaim", memsize);
314 	if (!wb_enabled)
315 		ret = (ret == -EAGAIN) ? 0 : -1;
316 
317 out:
318 	free(mem);
319 	return ret;
320 }
321 
322 /* Test to verify the zswap writeback path */
323 static int test_zswap_writeback(const char *root, bool wb)
324 {
325 	long zswpwb_before, zswpwb_after;
326 	int ret = KSFT_FAIL;
327 	char *test_group;
328 
329 	test_group = cg_name(root, "zswap_writeback_test");
330 	if (!test_group)
331 		goto out;
332 	if (cg_create(test_group))
333 		goto out;
334 	if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
335 		goto out;
336 
337 	zswpwb_before = get_cg_wb_count(test_group);
338 	if (zswpwb_before != 0) {
339 		ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
340 		goto out;
341 	}
342 
343 	if (cg_run(test_group, attempt_writeback, (void *) test_group))
344 		goto out;
345 
346 	/* Verify that zswap writeback occurred only if writeback was enabled */
347 	zswpwb_after = get_cg_wb_count(test_group);
348 	if (zswpwb_after < 0)
349 		goto out;
350 
351 	if (wb != !!zswpwb_after) {
352 		ksft_print_msg("zswpwb_after is %ld while wb is %s",
353 				zswpwb_after, wb ? "enabled" : "disabled");
354 		goto out;
355 	}
356 
357 	ret = KSFT_PASS;
358 
359 out:
360 	cg_destroy(test_group);
361 	free(test_group);
362 	return ret;
363 }
364 
365 static int test_zswap_writeback_enabled(const char *root)
366 {
367 	return test_zswap_writeback(root, true);
368 }
369 
370 static int test_zswap_writeback_disabled(const char *root)
371 {
372 	return test_zswap_writeback(root, false);
373 }
374 
375 /*
376  * When trying to store a memcg page in zswap, if the memcg hits its memory
377  * limit in zswap, writeback should affect only the zswapped pages of that
378  * memcg.
379  */
380 static int test_no_invasive_cgroup_shrink(const char *root)
381 {
382 	int ret = KSFT_FAIL;
383 	size_t control_allocation_size = MB(10);
384 	char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
385 
386 	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
387 	if (!wb_group)
388 		return KSFT_FAIL;
389 	if (cg_write(wb_group, "memory.zswap.max", "10K"))
390 		goto out;
391 	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
392 	if (!control_group)
393 		goto out;
394 
395 	/* Push some test_group2 memory into zswap */
396 	if (cg_enter_current(control_group))
397 		goto out;
398 	control_allocation = malloc(control_allocation_size);
399 	for (int i = 0; i < control_allocation_size; i += 4095)
400 		control_allocation[i] = 'a';
401 	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
402 		goto out;
403 
404 	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
405 	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
406 		goto out;
407 
408 	/* Verify that only zswapped memory from gwb_group has been written back */
409 	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
410 		ret = KSFT_PASS;
411 out:
412 	cg_enter_current(root);
413 	if (control_group) {
414 		cg_destroy(control_group);
415 		free(control_group);
416 	}
417 	cg_destroy(wb_group);
418 	free(wb_group);
419 	if (control_allocation)
420 		free(control_allocation);
421 	return ret;
422 }
423 
424 struct no_kmem_bypass_child_args {
425 	size_t target_alloc_bytes;
426 	size_t child_allocated;
427 };
428 
429 static int no_kmem_bypass_child(const char *cgroup, void *arg)
430 {
431 	struct no_kmem_bypass_child_args *values = arg;
432 	void *allocation;
433 
434 	allocation = malloc(values->target_alloc_bytes);
435 	if (!allocation) {
436 		values->child_allocated = true;
437 		return -1;
438 	}
439 	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
440 		((char *)allocation)[i] = 'a';
441 	values->child_allocated = true;
442 	pause();
443 	free(allocation);
444 	return 0;
445 }
446 
447 /*
448  * When pages owned by a memcg are pushed to zswap by kswapd, they should be
449  * charged to that cgroup. This wasn't the case before commit
450  * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
451  *
452  * The test first allocates memory in a memcg, then raises min_free_kbytes to
453  * a very high value so that the allocation falls below low wm, then makes
454  * another allocation to trigger kswapd that should push the memcg-owned pages
455  * to zswap and verifies that the zswap pages are correctly charged.
456  *
457  * To be run on a VM with at most 4G of memory.
458  */
459 static int test_no_kmem_bypass(const char *root)
460 {
461 	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
462 	struct no_kmem_bypass_child_args *values;
463 	size_t trigger_allocation_size;
464 	int wait_child_iteration = 0;
465 	long stored_pages_threshold;
466 	struct sysinfo sys_info;
467 	int ret = KSFT_FAIL;
468 	int child_status;
469 	char *test_group = NULL;
470 	pid_t child_pid;
471 
472 	/* Read sys info and compute test values accordingly */
473 	if (sysinfo(&sys_info) != 0)
474 		return KSFT_FAIL;
475 	if (sys_info.totalram > 5000000000)
476 		return KSFT_SKIP;
477 	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
478 			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
479 	if (values == MAP_FAILED)
480 		return KSFT_FAIL;
481 	if (read_min_free_kb(&min_free_kb_original))
482 		return KSFT_FAIL;
483 	min_free_kb_high = sys_info.totalram / 2000;
484 	min_free_kb_low = sys_info.totalram / 500000;
485 	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
486 		sys_info.totalram * 5 / 100;
487 	stored_pages_threshold = sys_info.totalram / 5 / 4096;
488 	trigger_allocation_size = sys_info.totalram / 20;
489 
490 	/* Set up test memcg */
491 	test_group = cg_name(root, "kmem_bypass_test");
492 	if (!test_group)
493 		goto out;
494 
495 	/* Spawn memcg child and wait for it to allocate */
496 	set_min_free_kb(min_free_kb_low);
497 	if (cg_create(test_group))
498 		goto out;
499 	values->child_allocated = false;
500 	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
501 	if (child_pid < 0)
502 		goto out;
503 	while (!values->child_allocated && wait_child_iteration++ < 10000)
504 		usleep(1000);
505 
506 	/* Try to wakeup kswapd and let it push child memory to zswap */
507 	set_min_free_kb(min_free_kb_high);
508 	for (int i = 0; i < 20; i++) {
509 		size_t stored_pages;
510 		char *trigger_allocation = malloc(trigger_allocation_size);
511 
512 		if (!trigger_allocation)
513 			break;
514 		for (int i = 0; i < trigger_allocation_size; i += 4095)
515 			trigger_allocation[i] = 'b';
516 		usleep(100000);
517 		free(trigger_allocation);
518 		if (get_zswap_stored_pages(&stored_pages))
519 			break;
520 		if (stored_pages < 0)
521 			break;
522 		/* If memory was pushed to zswap, verify it belongs to memcg */
523 		if (stored_pages > stored_pages_threshold) {
524 			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
525 			int delta = stored_pages * 4096 - zswapped;
526 			int result_ok = delta < stored_pages * 4096 / 4;
527 
528 			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
529 			break;
530 		}
531 	}
532 
533 	kill(child_pid, SIGTERM);
534 	waitpid(child_pid, &child_status, 0);
535 out:
536 	set_min_free_kb(min_free_kb_original);
537 	cg_destroy(test_group);
538 	free(test_group);
539 	return ret;
540 }
541 
542 #define T(x) { x, #x }
543 struct zswap_test {
544 	int (*fn)(const char *root);
545 	const char *name;
546 } tests[] = {
547 	T(test_zswap_usage),
548 	T(test_swapin_nozswap),
549 	T(test_zswapin),
550 	T(test_zswap_writeback_enabled),
551 	T(test_zswap_writeback_disabled),
552 	T(test_no_kmem_bypass),
553 	T(test_no_invasive_cgroup_shrink),
554 };
555 #undef T
556 
557 static bool zswap_configured(void)
558 {
559 	return access("/sys/module/zswap", F_OK) == 0;
560 }
561 
562 int main(int argc, char **argv)
563 {
564 	char root[PATH_MAX];
565 	int i, ret = EXIT_SUCCESS;
566 
567 	if (cg_find_unified_root(root, sizeof(root), NULL))
568 		ksft_exit_skip("cgroup v2 isn't mounted\n");
569 
570 	if (!zswap_configured())
571 		ksft_exit_skip("zswap isn't configured\n");
572 
573 	/*
574 	 * Check that memory controller is available:
575 	 * memory is listed in cgroup.controllers
576 	 */
577 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
578 		ksft_exit_skip("memory controller isn't available\n");
579 
580 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
581 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
582 			ksft_exit_skip("Failed to set memory controller\n");
583 
584 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
585 		switch (tests[i].fn(root)) {
586 		case KSFT_PASS:
587 			ksft_test_result_pass("%s\n", tests[i].name);
588 			break;
589 		case KSFT_SKIP:
590 			ksft_test_result_skip("%s\n", tests[i].name);
591 			break;
592 		default:
593 			ret = EXIT_FAILURE;
594 			ksft_test_result_fail("%s\n", tests[i].name);
595 			break;
596 		}
597 	}
598 
599 	return ret;
600 }
601