1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/limits.h> 3 #include <unistd.h> 4 #include <stdio.h> 5 #include <signal.h> 6 #include <sys/sysinfo.h> 7 #include <string.h> 8 #include <sys/wait.h> 9 #include <sys/mman.h> 10 11 #include "../kselftest.h" 12 #include "cgroup_util.h" 13 14 static int read_int(const char *path, size_t *value) 15 { 16 FILE *file; 17 int ret = 0; 18 19 file = fopen(path, "r"); 20 if (!file) 21 return -1; 22 if (fscanf(file, "%ld", value) != 1) 23 ret = -1; 24 fclose(file); 25 return ret; 26 } 27 28 static int set_min_free_kb(size_t value) 29 { 30 FILE *file; 31 int ret; 32 33 file = fopen("/proc/sys/vm/min_free_kbytes", "w"); 34 if (!file) 35 return -1; 36 ret = fprintf(file, "%ld\n", value); 37 fclose(file); 38 return ret; 39 } 40 41 static int read_min_free_kb(size_t *value) 42 { 43 return read_int("/proc/sys/vm/min_free_kbytes", value); 44 } 45 46 static int get_zswap_stored_pages(size_t *value) 47 { 48 return read_int("/sys/kernel/debug/zswap/stored_pages", value); 49 } 50 51 static long get_cg_wb_count(const char *cg) 52 { 53 return cg_read_key_long(cg, "memory.stat", "zswpwb"); 54 } 55 56 static long get_zswpout(const char *cgroup) 57 { 58 return cg_read_key_long(cgroup, "memory.stat", "zswpout "); 59 } 60 61 static int allocate_and_read_bytes(const char *cgroup, void *arg) 62 { 63 size_t size = (size_t)arg; 64 char *mem = (char *)malloc(size); 65 int ret = 0; 66 67 if (!mem) 68 return -1; 69 for (int i = 0; i < size; i += 4095) 70 mem[i] = 'a'; 71 72 /* Go through the allocated memory to (z)swap in and out pages */ 73 for (int i = 0; i < size; i += 4095) { 74 if (mem[i] != 'a') 75 ret = -1; 76 } 77 78 free(mem); 79 return ret; 80 } 81 82 static int allocate_bytes(const char *cgroup, void *arg) 83 { 84 size_t size = (size_t)arg; 85 char *mem = (char *)malloc(size); 86 87 if (!mem) 88 return -1; 89 for (int i = 0; i < size; i += 4095) 90 mem[i] = 'a'; 91 free(mem); 92 return 0; 93 } 94 95 static char *setup_test_group_1M(const char *root, const char *name) 96 { 97 char *group_name = cg_name(root, name); 98 99 if (!group_name) 100 return NULL; 101 if (cg_create(group_name)) 102 goto fail; 103 if (cg_write(group_name, "memory.max", "1M")) { 104 cg_destroy(group_name); 105 goto fail; 106 } 107 return group_name; 108 fail: 109 free(group_name); 110 return NULL; 111 } 112 113 /* 114 * Sanity test to check that pages are written into zswap. 115 */ 116 static int test_zswap_usage(const char *root) 117 { 118 long zswpout_before, zswpout_after; 119 int ret = KSFT_FAIL; 120 char *test_group; 121 122 test_group = cg_name(root, "no_shrink_test"); 123 if (!test_group) 124 goto out; 125 if (cg_create(test_group)) 126 goto out; 127 if (cg_write(test_group, "memory.max", "1M")) 128 goto out; 129 130 zswpout_before = get_zswpout(test_group); 131 if (zswpout_before < 0) { 132 ksft_print_msg("Failed to get zswpout\n"); 133 goto out; 134 } 135 136 /* Allocate more than memory.max to push memory into zswap */ 137 if (cg_run(test_group, allocate_bytes, (void *)MB(4))) 138 goto out; 139 140 /* Verify that pages come into zswap */ 141 zswpout_after = get_zswpout(test_group); 142 if (zswpout_after <= zswpout_before) { 143 ksft_print_msg("zswpout does not increase after test program\n"); 144 goto out; 145 } 146 ret = KSFT_PASS; 147 148 out: 149 cg_destroy(test_group); 150 free(test_group); 151 return ret; 152 } 153 154 /* 155 * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for 156 * the cgroup. 157 */ 158 static int test_swapin_nozswap(const char *root) 159 { 160 int ret = KSFT_FAIL; 161 char *test_group; 162 long swap_peak, zswpout; 163 164 test_group = cg_name(root, "no_zswap_test"); 165 if (!test_group) 166 goto out; 167 if (cg_create(test_group)) 168 goto out; 169 if (cg_write(test_group, "memory.max", "8M")) 170 goto out; 171 if (cg_write(test_group, "memory.zswap.max", "0")) 172 goto out; 173 174 /* Allocate and read more than memory.max to trigger swapin */ 175 if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32))) 176 goto out; 177 178 /* Verify that pages are swapped out, but no zswap happened */ 179 swap_peak = cg_read_long(test_group, "memory.swap.peak"); 180 if (swap_peak < 0) { 181 ksft_print_msg("failed to get cgroup's swap_peak\n"); 182 goto out; 183 } 184 185 if (swap_peak < MB(24)) { 186 ksft_print_msg("at least 24MB of memory should be swapped out\n"); 187 goto out; 188 } 189 190 zswpout = get_zswpout(test_group); 191 if (zswpout < 0) { 192 ksft_print_msg("failed to get zswpout\n"); 193 goto out; 194 } 195 196 if (zswpout > 0) { 197 ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n"); 198 goto out; 199 } 200 201 ret = KSFT_PASS; 202 203 out: 204 cg_destroy(test_group); 205 free(test_group); 206 return ret; 207 } 208 209 /* Simple test to verify the (z)swapin code paths */ 210 static int test_zswapin(const char *root) 211 { 212 int ret = KSFT_FAIL; 213 char *test_group; 214 long zswpin; 215 216 test_group = cg_name(root, "zswapin_test"); 217 if (!test_group) 218 goto out; 219 if (cg_create(test_group)) 220 goto out; 221 if (cg_write(test_group, "memory.max", "8M")) 222 goto out; 223 if (cg_write(test_group, "memory.zswap.max", "max")) 224 goto out; 225 226 /* Allocate and read more than memory.max to trigger (z)swap in */ 227 if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32))) 228 goto out; 229 230 zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin "); 231 if (zswpin < 0) { 232 ksft_print_msg("failed to get zswpin\n"); 233 goto out; 234 } 235 236 if (zswpin < MB(24) / PAGE_SIZE) { 237 ksft_print_msg("at least 24MB should be brought back from zswap\n"); 238 goto out; 239 } 240 241 ret = KSFT_PASS; 242 243 out: 244 cg_destroy(test_group); 245 free(test_group); 246 return ret; 247 } 248 249 /* 250 * Attempt writeback with the following steps: 251 * 1. Allocate memory. 252 * 2. Reclaim memory equal to the amount that was allocated in step 1. 253 This will move it into zswap. 254 * 3. Save current zswap usage. 255 * 4. Move the memory allocated in step 1 back in from zswap. 256 * 5. Set zswap.max to half the amount that was recorded in step 3. 257 * 6. Attempt to reclaim memory equal to the amount that was allocated, 258 this will either trigger writeback if it's enabled, or reclamation 259 will fail if writeback is disabled as there isn't enough zswap space. 260 */ 261 static int attempt_writeback(const char *cgroup, void *arg) 262 { 263 long pagesize = sysconf(_SC_PAGESIZE); 264 char *test_group = arg; 265 size_t memsize = MB(4); 266 char buf[pagesize]; 267 long zswap_usage; 268 bool wb_enabled; 269 int ret = -1; 270 char *mem; 271 272 wb_enabled = cg_read_long(test_group, "memory.zswap.writeback"); 273 mem = (char *)malloc(memsize); 274 if (!mem) 275 return ret; 276 277 /* 278 * Fill half of each page with increasing data, and keep other 279 * half empty, this will result in data that is still compressible 280 * and ends up in zswap, with material zswap usage. 281 */ 282 for (int i = 0; i < pagesize; i++) 283 buf[i] = i < pagesize/2 ? (char) i : 0; 284 285 for (int i = 0; i < memsize; i += pagesize) 286 memcpy(&mem[i], buf, pagesize); 287 288 /* Try and reclaim allocated memory */ 289 if (cg_write_numeric(test_group, "memory.reclaim", memsize)) { 290 ksft_print_msg("Failed to reclaim all of the requested memory\n"); 291 goto out; 292 } 293 294 zswap_usage = cg_read_long(test_group, "memory.zswap.current"); 295 296 /* zswpin */ 297 for (int i = 0; i < memsize; i += pagesize) { 298 if (memcmp(&mem[i], buf, pagesize)) { 299 ksft_print_msg("invalid memory\n"); 300 goto out; 301 } 302 } 303 304 if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2)) 305 goto out; 306 307 /* 308 * If writeback is enabled, trying to reclaim memory now will trigger a 309 * writeback as zswap.max is half of what was needed when reclaim ran the first time. 310 * If writeback is disabled, memory reclaim will fail as zswap is limited and 311 * it can't writeback to swap. 312 */ 313 ret = cg_write_numeric(test_group, "memory.reclaim", memsize); 314 if (!wb_enabled) 315 ret = (ret == -EAGAIN) ? 0 : -1; 316 317 out: 318 free(mem); 319 return ret; 320 } 321 322 /* Test to verify the zswap writeback path */ 323 static int test_zswap_writeback(const char *root, bool wb) 324 { 325 long zswpwb_before, zswpwb_after; 326 int ret = KSFT_FAIL; 327 char *test_group; 328 329 test_group = cg_name(root, "zswap_writeback_test"); 330 if (!test_group) 331 goto out; 332 if (cg_create(test_group)) 333 goto out; 334 if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0")) 335 goto out; 336 337 zswpwb_before = get_cg_wb_count(test_group); 338 if (zswpwb_before != 0) { 339 ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before); 340 goto out; 341 } 342 343 if (cg_run(test_group, attempt_writeback, (void *) test_group)) 344 goto out; 345 346 /* Verify that zswap writeback occurred only if writeback was enabled */ 347 zswpwb_after = get_cg_wb_count(test_group); 348 if (zswpwb_after < 0) 349 goto out; 350 351 if (wb != !!zswpwb_after) { 352 ksft_print_msg("zswpwb_after is %ld while wb is %s", 353 zswpwb_after, wb ? "enabled" : "disabled"); 354 goto out; 355 } 356 357 ret = KSFT_PASS; 358 359 out: 360 cg_destroy(test_group); 361 free(test_group); 362 return ret; 363 } 364 365 static int test_zswap_writeback_enabled(const char *root) 366 { 367 return test_zswap_writeback(root, true); 368 } 369 370 static int test_zswap_writeback_disabled(const char *root) 371 { 372 return test_zswap_writeback(root, false); 373 } 374 375 /* 376 * When trying to store a memcg page in zswap, if the memcg hits its memory 377 * limit in zswap, writeback should affect only the zswapped pages of that 378 * memcg. 379 */ 380 static int test_no_invasive_cgroup_shrink(const char *root) 381 { 382 int ret = KSFT_FAIL; 383 size_t control_allocation_size = MB(10); 384 char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL; 385 386 wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); 387 if (!wb_group) 388 return KSFT_FAIL; 389 if (cg_write(wb_group, "memory.zswap.max", "10K")) 390 goto out; 391 control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); 392 if (!control_group) 393 goto out; 394 395 /* Push some test_group2 memory into zswap */ 396 if (cg_enter_current(control_group)) 397 goto out; 398 control_allocation = malloc(control_allocation_size); 399 for (int i = 0; i < control_allocation_size; i += 4095) 400 control_allocation[i] = 'a'; 401 if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) 402 goto out; 403 404 /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ 405 if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) 406 goto out; 407 408 /* Verify that only zswapped memory from gwb_group has been written back */ 409 if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) 410 ret = KSFT_PASS; 411 out: 412 cg_enter_current(root); 413 if (control_group) { 414 cg_destroy(control_group); 415 free(control_group); 416 } 417 cg_destroy(wb_group); 418 free(wb_group); 419 if (control_allocation) 420 free(control_allocation); 421 return ret; 422 } 423 424 struct no_kmem_bypass_child_args { 425 size_t target_alloc_bytes; 426 size_t child_allocated; 427 }; 428 429 static int no_kmem_bypass_child(const char *cgroup, void *arg) 430 { 431 struct no_kmem_bypass_child_args *values = arg; 432 void *allocation; 433 434 allocation = malloc(values->target_alloc_bytes); 435 if (!allocation) { 436 values->child_allocated = true; 437 return -1; 438 } 439 for (long i = 0; i < values->target_alloc_bytes; i += 4095) 440 ((char *)allocation)[i] = 'a'; 441 values->child_allocated = true; 442 pause(); 443 free(allocation); 444 return 0; 445 } 446 447 /* 448 * When pages owned by a memcg are pushed to zswap by kswapd, they should be 449 * charged to that cgroup. This wasn't the case before commit 450 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). 451 * 452 * The test first allocates memory in a memcg, then raises min_free_kbytes to 453 * a very high value so that the allocation falls below low wm, then makes 454 * another allocation to trigger kswapd that should push the memcg-owned pages 455 * to zswap and verifies that the zswap pages are correctly charged. 456 * 457 * To be run on a VM with at most 4G of memory. 458 */ 459 static int test_no_kmem_bypass(const char *root) 460 { 461 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; 462 struct no_kmem_bypass_child_args *values; 463 size_t trigger_allocation_size; 464 int wait_child_iteration = 0; 465 long stored_pages_threshold; 466 struct sysinfo sys_info; 467 int ret = KSFT_FAIL; 468 int child_status; 469 char *test_group = NULL; 470 pid_t child_pid; 471 472 /* Read sys info and compute test values accordingly */ 473 if (sysinfo(&sys_info) != 0) 474 return KSFT_FAIL; 475 if (sys_info.totalram > 5000000000) 476 return KSFT_SKIP; 477 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | 478 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 479 if (values == MAP_FAILED) 480 return KSFT_FAIL; 481 if (read_min_free_kb(&min_free_kb_original)) 482 return KSFT_FAIL; 483 min_free_kb_high = sys_info.totalram / 2000; 484 min_free_kb_low = sys_info.totalram / 500000; 485 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + 486 sys_info.totalram * 5 / 100; 487 stored_pages_threshold = sys_info.totalram / 5 / 4096; 488 trigger_allocation_size = sys_info.totalram / 20; 489 490 /* Set up test memcg */ 491 test_group = cg_name(root, "kmem_bypass_test"); 492 if (!test_group) 493 goto out; 494 495 /* Spawn memcg child and wait for it to allocate */ 496 set_min_free_kb(min_free_kb_low); 497 if (cg_create(test_group)) 498 goto out; 499 values->child_allocated = false; 500 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); 501 if (child_pid < 0) 502 goto out; 503 while (!values->child_allocated && wait_child_iteration++ < 10000) 504 usleep(1000); 505 506 /* Try to wakeup kswapd and let it push child memory to zswap */ 507 set_min_free_kb(min_free_kb_high); 508 for (int i = 0; i < 20; i++) { 509 size_t stored_pages; 510 char *trigger_allocation = malloc(trigger_allocation_size); 511 512 if (!trigger_allocation) 513 break; 514 for (int i = 0; i < trigger_allocation_size; i += 4095) 515 trigger_allocation[i] = 'b'; 516 usleep(100000); 517 free(trigger_allocation); 518 if (get_zswap_stored_pages(&stored_pages)) 519 break; 520 if (stored_pages < 0) 521 break; 522 /* If memory was pushed to zswap, verify it belongs to memcg */ 523 if (stored_pages > stored_pages_threshold) { 524 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); 525 int delta = stored_pages * 4096 - zswapped; 526 int result_ok = delta < stored_pages * 4096 / 4; 527 528 ret = result_ok ? KSFT_PASS : KSFT_FAIL; 529 break; 530 } 531 } 532 533 kill(child_pid, SIGTERM); 534 waitpid(child_pid, &child_status, 0); 535 out: 536 set_min_free_kb(min_free_kb_original); 537 cg_destroy(test_group); 538 free(test_group); 539 return ret; 540 } 541 542 #define T(x) { x, #x } 543 struct zswap_test { 544 int (*fn)(const char *root); 545 const char *name; 546 } tests[] = { 547 T(test_zswap_usage), 548 T(test_swapin_nozswap), 549 T(test_zswapin), 550 T(test_zswap_writeback_enabled), 551 T(test_zswap_writeback_disabled), 552 T(test_no_kmem_bypass), 553 T(test_no_invasive_cgroup_shrink), 554 }; 555 #undef T 556 557 static bool zswap_configured(void) 558 { 559 return access("/sys/module/zswap", F_OK) == 0; 560 } 561 562 int main(int argc, char **argv) 563 { 564 char root[PATH_MAX]; 565 int i, ret = EXIT_SUCCESS; 566 567 if (cg_find_unified_root(root, sizeof(root), NULL)) 568 ksft_exit_skip("cgroup v2 isn't mounted\n"); 569 570 if (!zswap_configured()) 571 ksft_exit_skip("zswap isn't configured\n"); 572 573 /* 574 * Check that memory controller is available: 575 * memory is listed in cgroup.controllers 576 */ 577 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 578 ksft_exit_skip("memory controller isn't available\n"); 579 580 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 581 if (cg_write(root, "cgroup.subtree_control", "+memory")) 582 ksft_exit_skip("Failed to set memory controller\n"); 583 584 for (i = 0; i < ARRAY_SIZE(tests); i++) { 585 switch (tests[i].fn(root)) { 586 case KSFT_PASS: 587 ksft_test_result_pass("%s\n", tests[i].name); 588 break; 589 case KSFT_SKIP: 590 ksft_test_result_skip("%s\n", tests[i].name); 591 break; 592 default: 593 ret = EXIT_FAILURE; 594 ksft_test_result_fail("%s\n", tests[i].name); 595 break; 596 } 597 } 598 599 return ret; 600 } 601