1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <signal.h>
8 #include <sys/sysinfo.h>
9 #include <string.h>
10 #include <sys/wait.h>
11 #include <sys/mman.h>
12
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
15
read_int(const char * path,size_t * value)16 static int read_int(const char *path, size_t *value)
17 {
18 FILE *file;
19 int ret = 0;
20
21 file = fopen(path, "r");
22 if (!file)
23 return -1;
24 if (fscanf(file, "%ld", value) != 1)
25 ret = -1;
26 fclose(file);
27 return ret;
28 }
29
set_min_free_kb(size_t value)30 static int set_min_free_kb(size_t value)
31 {
32 FILE *file;
33 int ret;
34
35 file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36 if (!file)
37 return -1;
38 ret = fprintf(file, "%ld\n", value);
39 fclose(file);
40 return ret;
41 }
42
read_min_free_kb(size_t * value)43 static int read_min_free_kb(size_t *value)
44 {
45 return read_int("/proc/sys/vm/min_free_kbytes", value);
46 }
47
get_zswap_stored_pages(size_t * value)48 static int get_zswap_stored_pages(size_t *value)
49 {
50 return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51 }
52
get_cg_wb_count(const char * cg)53 static long get_cg_wb_count(const char *cg)
54 {
55 return cg_read_key_long(cg, "memory.stat", "zswpwb");
56 }
57
get_zswpout(const char * cgroup)58 static long get_zswpout(const char *cgroup)
59 {
60 return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61 }
62
allocate_and_read_bytes(const char * cgroup,void * arg)63 static int allocate_and_read_bytes(const char *cgroup, void *arg)
64 {
65 size_t size = (size_t)arg;
66 char *mem = (char *)malloc(size);
67 int ret = 0;
68
69 if (!mem)
70 return -1;
71 for (int i = 0; i < size; i += 4095)
72 mem[i] = 'a';
73
74 /* Go through the allocated memory to (z)swap in and out pages */
75 for (int i = 0; i < size; i += 4095) {
76 if (mem[i] != 'a')
77 ret = -1;
78 }
79
80 free(mem);
81 return ret;
82 }
83
allocate_bytes(const char * cgroup,void * arg)84 static int allocate_bytes(const char *cgroup, void *arg)
85 {
86 size_t size = (size_t)arg;
87 char *mem = (char *)malloc(size);
88
89 if (!mem)
90 return -1;
91 for (int i = 0; i < size; i += 4095)
92 mem[i] = 'a';
93 free(mem);
94 return 0;
95 }
96
setup_test_group_1M(const char * root,const char * name)97 static char *setup_test_group_1M(const char *root, const char *name)
98 {
99 char *group_name = cg_name(root, name);
100
101 if (!group_name)
102 return NULL;
103 if (cg_create(group_name))
104 goto fail;
105 if (cg_write(group_name, "memory.max", "1M")) {
106 cg_destroy(group_name);
107 goto fail;
108 }
109 return group_name;
110 fail:
111 free(group_name);
112 return NULL;
113 }
114
115 /*
116 * Sanity test to check that pages are written into zswap.
117 */
test_zswap_usage(const char * root)118 static int test_zswap_usage(const char *root)
119 {
120 long zswpout_before, zswpout_after;
121 int ret = KSFT_FAIL;
122 char *test_group;
123
124 test_group = cg_name(root, "no_shrink_test");
125 if (!test_group)
126 goto out;
127 if (cg_create(test_group))
128 goto out;
129 if (cg_write(test_group, "memory.max", "1M"))
130 goto out;
131
132 zswpout_before = get_zswpout(test_group);
133 if (zswpout_before < 0) {
134 ksft_print_msg("Failed to get zswpout\n");
135 goto out;
136 }
137
138 /* Allocate more than memory.max to push memory into zswap */
139 if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
140 goto out;
141
142 /* Verify that pages come into zswap */
143 zswpout_after = get_zswpout(test_group);
144 if (zswpout_after <= zswpout_before) {
145 ksft_print_msg("zswpout does not increase after test program\n");
146 goto out;
147 }
148 ret = KSFT_PASS;
149
150 out:
151 cg_destroy(test_group);
152 free(test_group);
153 return ret;
154 }
155
156 /*
157 * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158 * the cgroup.
159 */
test_swapin_nozswap(const char * root)160 static int test_swapin_nozswap(const char *root)
161 {
162 int ret = KSFT_FAIL;
163 char *test_group;
164 long swap_peak, zswpout;
165
166 test_group = cg_name(root, "no_zswap_test");
167 if (!test_group)
168 goto out;
169 if (cg_create(test_group))
170 goto out;
171 if (cg_write(test_group, "memory.max", "8M"))
172 goto out;
173 if (cg_write(test_group, "memory.zswap.max", "0"))
174 goto out;
175
176 /* Allocate and read more than memory.max to trigger swapin */
177 if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178 goto out;
179
180 /* Verify that pages are swapped out, but no zswap happened */
181 swap_peak = cg_read_long(test_group, "memory.swap.peak");
182 if (swap_peak < 0) {
183 ksft_print_msg("failed to get cgroup's swap_peak\n");
184 goto out;
185 }
186
187 if (swap_peak < MB(24)) {
188 ksft_print_msg("at least 24MB of memory should be swapped out\n");
189 goto out;
190 }
191
192 zswpout = get_zswpout(test_group);
193 if (zswpout < 0) {
194 ksft_print_msg("failed to get zswpout\n");
195 goto out;
196 }
197
198 if (zswpout > 0) {
199 ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200 goto out;
201 }
202
203 ret = KSFT_PASS;
204
205 out:
206 cg_destroy(test_group);
207 free(test_group);
208 return ret;
209 }
210
211 /* Simple test to verify the (z)swapin code paths */
test_zswapin(const char * root)212 static int test_zswapin(const char *root)
213 {
214 int ret = KSFT_FAIL;
215 char *test_group;
216 long zswpin;
217
218 test_group = cg_name(root, "zswapin_test");
219 if (!test_group)
220 goto out;
221 if (cg_create(test_group))
222 goto out;
223 if (cg_write(test_group, "memory.max", "8M"))
224 goto out;
225 if (cg_write(test_group, "memory.zswap.max", "max"))
226 goto out;
227
228 /* Allocate and read more than memory.max to trigger (z)swap in */
229 if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230 goto out;
231
232 zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233 if (zswpin < 0) {
234 ksft_print_msg("failed to get zswpin\n");
235 goto out;
236 }
237
238 if (zswpin < MB(24) / PAGE_SIZE) {
239 ksft_print_msg("at least 24MB should be brought back from zswap\n");
240 goto out;
241 }
242
243 ret = KSFT_PASS;
244
245 out:
246 cg_destroy(test_group);
247 free(test_group);
248 return ret;
249 }
250
251 /*
252 * Attempt writeback with the following steps:
253 * 1. Allocate memory.
254 * 2. Reclaim memory equal to the amount that was allocated in step 1.
255 This will move it into zswap.
256 * 3. Save current zswap usage.
257 * 4. Move the memory allocated in step 1 back in from zswap.
258 * 5. Set zswap.max to half the amount that was recorded in step 3.
259 * 6. Attempt to reclaim memory equal to the amount that was allocated,
260 this will either trigger writeback if it's enabled, or reclamation
261 will fail if writeback is disabled as there isn't enough zswap space.
262 */
attempt_writeback(const char * cgroup,void * arg)263 static int attempt_writeback(const char *cgroup, void *arg)
264 {
265 long pagesize = sysconf(_SC_PAGESIZE);
266 size_t memsize = MB(4);
267 char buf[pagesize];
268 long zswap_usage;
269 bool wb_enabled = *(bool *) arg;
270 int ret = -1;
271 char *mem;
272
273 mem = (char *)malloc(memsize);
274 if (!mem)
275 return ret;
276
277 /*
278 * Fill half of each page with increasing data, and keep other
279 * half empty, this will result in data that is still compressible
280 * and ends up in zswap, with material zswap usage.
281 */
282 for (int i = 0; i < pagesize; i++)
283 buf[i] = i < pagesize/2 ? (char) i : 0;
284
285 for (int i = 0; i < memsize; i += pagesize)
286 memcpy(&mem[i], buf, pagesize);
287
288 /* Try and reclaim allocated memory */
289 if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) {
290 ksft_print_msg("Failed to reclaim all of the requested memory\n");
291 goto out;
292 }
293
294 zswap_usage = cg_read_long(cgroup, "memory.zswap.current");
295
296 /* zswpin */
297 for (int i = 0; i < memsize; i += pagesize) {
298 if (memcmp(&mem[i], buf, pagesize)) {
299 ksft_print_msg("invalid memory\n");
300 goto out;
301 }
302 }
303
304 if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2))
305 goto out;
306
307 /*
308 * If writeback is enabled, trying to reclaim memory now will trigger a
309 * writeback as zswap.max is half of what was needed when reclaim ran the first time.
310 * If writeback is disabled, memory reclaim will fail as zswap is limited and
311 * it can't writeback to swap.
312 */
313 ret = cg_write_numeric(cgroup, "memory.reclaim", memsize);
314 if (!wb_enabled)
315 ret = (ret == -EAGAIN) ? 0 : -1;
316
317 out:
318 free(mem);
319 return ret;
320 }
321
test_zswap_writeback_one(const char * cgroup,bool wb)322 static int test_zswap_writeback_one(const char *cgroup, bool wb)
323 {
324 long zswpwb_before, zswpwb_after;
325
326 zswpwb_before = get_cg_wb_count(cgroup);
327 if (zswpwb_before != 0) {
328 ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
329 return -1;
330 }
331
332 if (cg_run(cgroup, attempt_writeback, (void *) &wb))
333 return -1;
334
335 /* Verify that zswap writeback occurred only if writeback was enabled */
336 zswpwb_after = get_cg_wb_count(cgroup);
337 if (zswpwb_after < 0)
338 return -1;
339
340 if (wb != !!zswpwb_after) {
341 ksft_print_msg("zswpwb_after is %ld while wb is %s",
342 zswpwb_after, wb ? "enabled" : "disabled");
343 return -1;
344 }
345
346 return 0;
347 }
348
349 /* Test to verify the zswap writeback path */
test_zswap_writeback(const char * root,bool wb)350 static int test_zswap_writeback(const char *root, bool wb)
351 {
352 int ret = KSFT_FAIL;
353 char *test_group, *test_group_child = NULL;
354
355 if (cg_read_strcmp(root, "memory.zswap.writeback", "1"))
356 return KSFT_SKIP;
357
358 test_group = cg_name(root, "zswap_writeback_test");
359 if (!test_group)
360 goto out;
361 if (cg_create(test_group))
362 goto out;
363 if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
364 goto out;
365
366 if (test_zswap_writeback_one(test_group, wb))
367 goto out;
368
369 /* Reset memory.zswap.max to max (modified by attempt_writeback), and
370 * set up child cgroup, whose memory.zswap.writeback is hardcoded to 1.
371 * Thus, the parent's setting shall be what's in effect. */
372 if (cg_write(test_group, "memory.zswap.max", "max"))
373 goto out;
374 if (cg_write(test_group, "cgroup.subtree_control", "+memory"))
375 goto out;
376
377 test_group_child = cg_name(test_group, "zswap_writeback_test_child");
378 if (!test_group_child)
379 goto out;
380 if (cg_create(test_group_child))
381 goto out;
382 if (cg_write(test_group_child, "memory.zswap.writeback", "1"))
383 goto out;
384
385 if (test_zswap_writeback_one(test_group_child, wb))
386 goto out;
387
388 ret = KSFT_PASS;
389
390 out:
391 if (test_group_child) {
392 cg_destroy(test_group_child);
393 free(test_group_child);
394 }
395 cg_destroy(test_group);
396 free(test_group);
397 return ret;
398 }
399
test_zswap_writeback_enabled(const char * root)400 static int test_zswap_writeback_enabled(const char *root)
401 {
402 return test_zswap_writeback(root, true);
403 }
404
test_zswap_writeback_disabled(const char * root)405 static int test_zswap_writeback_disabled(const char *root)
406 {
407 return test_zswap_writeback(root, false);
408 }
409
410 /*
411 * When trying to store a memcg page in zswap, if the memcg hits its memory
412 * limit in zswap, writeback should affect only the zswapped pages of that
413 * memcg.
414 */
test_no_invasive_cgroup_shrink(const char * root)415 static int test_no_invasive_cgroup_shrink(const char *root)
416 {
417 int ret = KSFT_FAIL;
418 size_t control_allocation_size = MB(10);
419 char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
420
421 wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
422 if (!wb_group)
423 return KSFT_FAIL;
424 if (cg_write(wb_group, "memory.zswap.max", "10K"))
425 goto out;
426 control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
427 if (!control_group)
428 goto out;
429
430 /* Push some test_group2 memory into zswap */
431 if (cg_enter_current(control_group))
432 goto out;
433 control_allocation = malloc(control_allocation_size);
434 for (int i = 0; i < control_allocation_size; i += 4095)
435 control_allocation[i] = 'a';
436 if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
437 goto out;
438
439 /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
440 if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
441 goto out;
442
443 /* Verify that only zswapped memory from gwb_group has been written back */
444 if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
445 ret = KSFT_PASS;
446 out:
447 cg_enter_current(root);
448 if (control_group) {
449 cg_destroy(control_group);
450 free(control_group);
451 }
452 cg_destroy(wb_group);
453 free(wb_group);
454 if (control_allocation)
455 free(control_allocation);
456 return ret;
457 }
458
459 struct no_kmem_bypass_child_args {
460 size_t target_alloc_bytes;
461 size_t child_allocated;
462 };
463
no_kmem_bypass_child(const char * cgroup,void * arg)464 static int no_kmem_bypass_child(const char *cgroup, void *arg)
465 {
466 struct no_kmem_bypass_child_args *values = arg;
467 void *allocation;
468
469 allocation = malloc(values->target_alloc_bytes);
470 if (!allocation) {
471 values->child_allocated = true;
472 return -1;
473 }
474 for (long i = 0; i < values->target_alloc_bytes; i += 4095)
475 ((char *)allocation)[i] = 'a';
476 values->child_allocated = true;
477 pause();
478 free(allocation);
479 return 0;
480 }
481
482 /*
483 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
484 * charged to that cgroup. This wasn't the case before commit
485 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
486 *
487 * The test first allocates memory in a memcg, then raises min_free_kbytes to
488 * a very high value so that the allocation falls below low wm, then makes
489 * another allocation to trigger kswapd that should push the memcg-owned pages
490 * to zswap and verifies that the zswap pages are correctly charged.
491 *
492 * To be run on a VM with at most 4G of memory.
493 */
test_no_kmem_bypass(const char * root)494 static int test_no_kmem_bypass(const char *root)
495 {
496 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
497 struct no_kmem_bypass_child_args *values;
498 size_t trigger_allocation_size;
499 int wait_child_iteration = 0;
500 long stored_pages_threshold;
501 struct sysinfo sys_info;
502 int ret = KSFT_FAIL;
503 int child_status;
504 char *test_group = NULL;
505 pid_t child_pid;
506
507 /* Read sys info and compute test values accordingly */
508 if (sysinfo(&sys_info) != 0)
509 return KSFT_FAIL;
510 if (sys_info.totalram > 5000000000)
511 return KSFT_SKIP;
512 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
513 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
514 if (values == MAP_FAILED)
515 return KSFT_FAIL;
516 if (read_min_free_kb(&min_free_kb_original))
517 return KSFT_FAIL;
518 min_free_kb_high = sys_info.totalram / 2000;
519 min_free_kb_low = sys_info.totalram / 500000;
520 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
521 sys_info.totalram * 5 / 100;
522 stored_pages_threshold = sys_info.totalram / 5 / 4096;
523 trigger_allocation_size = sys_info.totalram / 20;
524
525 /* Set up test memcg */
526 test_group = cg_name(root, "kmem_bypass_test");
527 if (!test_group)
528 goto out;
529
530 /* Spawn memcg child and wait for it to allocate */
531 set_min_free_kb(min_free_kb_low);
532 if (cg_create(test_group))
533 goto out;
534 values->child_allocated = false;
535 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
536 if (child_pid < 0)
537 goto out;
538 while (!values->child_allocated && wait_child_iteration++ < 10000)
539 usleep(1000);
540
541 /* Try to wakeup kswapd and let it push child memory to zswap */
542 set_min_free_kb(min_free_kb_high);
543 for (int i = 0; i < 20; i++) {
544 size_t stored_pages;
545 char *trigger_allocation = malloc(trigger_allocation_size);
546
547 if (!trigger_allocation)
548 break;
549 for (int i = 0; i < trigger_allocation_size; i += 4095)
550 trigger_allocation[i] = 'b';
551 usleep(100000);
552 free(trigger_allocation);
553 if (get_zswap_stored_pages(&stored_pages))
554 break;
555 if (stored_pages < 0)
556 break;
557 /* If memory was pushed to zswap, verify it belongs to memcg */
558 if (stored_pages > stored_pages_threshold) {
559 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
560 int delta = stored_pages * 4096 - zswapped;
561 int result_ok = delta < stored_pages * 4096 / 4;
562
563 ret = result_ok ? KSFT_PASS : KSFT_FAIL;
564 break;
565 }
566 }
567
568 kill(child_pid, SIGTERM);
569 waitpid(child_pid, &child_status, 0);
570 out:
571 set_min_free_kb(min_free_kb_original);
572 cg_destroy(test_group);
573 free(test_group);
574 return ret;
575 }
576
577 #define T(x) { x, #x }
578 struct zswap_test {
579 int (*fn)(const char *root);
580 const char *name;
581 } tests[] = {
582 T(test_zswap_usage),
583 T(test_swapin_nozswap),
584 T(test_zswapin),
585 T(test_zswap_writeback_enabled),
586 T(test_zswap_writeback_disabled),
587 T(test_no_kmem_bypass),
588 T(test_no_invasive_cgroup_shrink),
589 };
590 #undef T
591
zswap_configured(void)592 static bool zswap_configured(void)
593 {
594 return access("/sys/module/zswap", F_OK) == 0;
595 }
596
main(int argc,char ** argv)597 int main(int argc, char **argv)
598 {
599 char root[PATH_MAX];
600 int i, ret = EXIT_SUCCESS;
601
602 if (cg_find_unified_root(root, sizeof(root), NULL))
603 ksft_exit_skip("cgroup v2 isn't mounted\n");
604
605 if (!zswap_configured())
606 ksft_exit_skip("zswap isn't configured\n");
607
608 /*
609 * Check that memory controller is available:
610 * memory is listed in cgroup.controllers
611 */
612 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
613 ksft_exit_skip("memory controller isn't available\n");
614
615 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
616 if (cg_write(root, "cgroup.subtree_control", "+memory"))
617 ksft_exit_skip("Failed to set memory controller\n");
618
619 for (i = 0; i < ARRAY_SIZE(tests); i++) {
620 switch (tests[i].fn(root)) {
621 case KSFT_PASS:
622 ksft_test_result_pass("%s\n", tests[i].name);
623 break;
624 case KSFT_SKIP:
625 ksft_test_result_skip("%s\n", tests[i].name);
626 break;
627 default:
628 ret = EXIT_FAILURE;
629 ksft_test_result_fail("%s\n", tests[i].name);
630 break;
631 }
632 }
633
634 return ret;
635 }
636