1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #include <linux/limits.h> 3 #include <linux/oom.h> 4 #include <fcntl.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <string.h> 8 #include <sys/stat.h> 9 #include <sys/types.h> 10 #include <unistd.h> 11 #include <sys/socket.h> 12 #include <sys/wait.h> 13 #include <arpa/inet.h> 14 #include <netinet/in.h> 15 #include <netdb.h> 16 #include <errno.h> 17 #include <sys/mman.h> 18 19 #include "../kselftest.h" 20 #include "cgroup_util.h" 21 22 static bool has_localevents; 23 static bool has_recursiveprot; 24 25 /* 26 * This test creates two nested cgroups with and without enabling 27 * the memory controller. 28 */ 29 static int test_memcg_subtree_control(const char *root) 30 { 31 char *parent, *child, *parent2 = NULL, *child2 = NULL; 32 int ret = KSFT_FAIL; 33 char buf[PAGE_SIZE]; 34 35 /* Create two nested cgroups with the memory controller enabled */ 36 parent = cg_name(root, "memcg_test_0"); 37 child = cg_name(root, "memcg_test_0/memcg_test_1"); 38 if (!parent || !child) 39 goto cleanup_free; 40 41 if (cg_create(parent)) 42 goto cleanup_free; 43 44 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 45 goto cleanup_parent; 46 47 if (cg_create(child)) 48 goto cleanup_parent; 49 50 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 51 goto cleanup_child; 52 53 /* Create two nested cgroups without enabling memory controller */ 54 parent2 = cg_name(root, "memcg_test_1"); 55 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 56 if (!parent2 || !child2) 57 goto cleanup_free2; 58 59 if (cg_create(parent2)) 60 goto cleanup_free2; 61 62 if (cg_create(child2)) 63 goto cleanup_parent2; 64 65 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 66 goto cleanup_all; 67 68 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 69 goto cleanup_all; 70 71 ret = KSFT_PASS; 72 73 cleanup_all: 74 cg_destroy(child2); 75 cleanup_parent2: 76 cg_destroy(parent2); 77 cleanup_free2: 78 free(parent2); 79 free(child2); 80 cleanup_child: 81 cg_destroy(child); 82 cleanup_parent: 83 cg_destroy(parent); 84 cleanup_free: 85 free(parent); 86 free(child); 87 88 return ret; 89 } 90 91 static int alloc_anon_50M_check(const char *cgroup, void *arg) 92 { 93 size_t size = MB(50); 94 char *buf, *ptr; 95 long anon, current; 96 int ret = -1; 97 98 buf = malloc(size); 99 if (buf == NULL) { 100 fprintf(stderr, "malloc() failed\n"); 101 return -1; 102 } 103 104 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 105 *ptr = 0; 106 107 current = cg_read_long(cgroup, "memory.current"); 108 if (current < size) 109 goto cleanup; 110 111 if (!values_close(size, current, 3)) 112 goto cleanup; 113 114 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 115 if (anon < 0) 116 goto cleanup; 117 118 if (!values_close(anon, current, 3)) 119 goto cleanup; 120 121 ret = 0; 122 cleanup: 123 free(buf); 124 return ret; 125 } 126 127 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 128 { 129 size_t size = MB(50); 130 int ret = -1; 131 long current, file; 132 int fd; 133 134 fd = get_temp_fd(); 135 if (fd < 0) 136 return -1; 137 138 if (alloc_pagecache(fd, size)) 139 goto cleanup; 140 141 current = cg_read_long(cgroup, "memory.current"); 142 if (current < size) 143 goto cleanup; 144 145 file = cg_read_key_long(cgroup, "memory.stat", "file "); 146 if (file < 0) 147 goto cleanup; 148 149 if (!values_close(file, current, 10)) 150 goto cleanup; 151 152 ret = 0; 153 154 cleanup: 155 close(fd); 156 return ret; 157 } 158 159 /* 160 * This test create a memory cgroup, allocates 161 * some anonymous memory and some pagecache 162 * and check memory.current and some memory.stat values. 163 */ 164 static int test_memcg_current(const char *root) 165 { 166 int ret = KSFT_FAIL; 167 long current; 168 char *memcg; 169 170 memcg = cg_name(root, "memcg_test"); 171 if (!memcg) 172 goto cleanup; 173 174 if (cg_create(memcg)) 175 goto cleanup; 176 177 current = cg_read_long(memcg, "memory.current"); 178 if (current != 0) 179 goto cleanup; 180 181 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 182 goto cleanup; 183 184 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 185 goto cleanup; 186 187 ret = KSFT_PASS; 188 189 cleanup: 190 cg_destroy(memcg); 191 free(memcg); 192 193 return ret; 194 } 195 196 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 197 { 198 int fd = (long)arg; 199 int ppid = getppid(); 200 201 if (alloc_pagecache(fd, MB(50))) 202 return -1; 203 204 while (getppid() == ppid) 205 sleep(1); 206 207 return 0; 208 } 209 210 static int alloc_anon_noexit(const char *cgroup, void *arg) 211 { 212 int ppid = getppid(); 213 size_t size = (unsigned long)arg; 214 char *buf, *ptr; 215 216 buf = malloc(size); 217 if (buf == NULL) { 218 fprintf(stderr, "malloc() failed\n"); 219 return -1; 220 } 221 222 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 223 *ptr = 0; 224 225 while (getppid() == ppid) 226 sleep(1); 227 228 free(buf); 229 return 0; 230 } 231 232 /* 233 * Wait until processes are killed asynchronously by the OOM killer 234 * If we exceed a timeout, fail. 235 */ 236 static int cg_test_proc_killed(const char *cgroup) 237 { 238 int limit; 239 240 for (limit = 10; limit > 0; limit--) { 241 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 242 return 0; 243 244 usleep(100000); 245 } 246 return -1; 247 } 248 249 static bool reclaim_until(const char *memcg, long goal); 250 251 /* 252 * First, this test creates the following hierarchy: 253 * A memory.min = 0, memory.max = 200M 254 * A/B memory.min = 50M 255 * A/B/C memory.min = 75M, memory.current = 50M 256 * A/B/D memory.min = 25M, memory.current = 50M 257 * A/B/E memory.min = 0, memory.current = 50M 258 * A/B/F memory.min = 500M, memory.current = 0 259 * 260 * (or memory.low if we test soft protection) 261 * 262 * Usages are pagecache and the test keeps a running 263 * process in every leaf cgroup. 264 * Then it creates A/G and creates a significant 265 * memory pressure in A. 266 * 267 * Then it checks actual memory usages and expects that: 268 * A/B memory.current ~= 50M 269 * A/B/C memory.current ~= 29M 270 * A/B/D memory.current ~= 21M 271 * A/B/E memory.current ~= 0 272 * A/B/F memory.current = 0 273 * (for origin of the numbers, see model in memcg_protection.m.) 274 * 275 * After that it tries to allocate more than there is 276 * unprotected memory in A available, and checks that: 277 * a) memory.min protects pagecache even in this case, 278 * b) memory.low allows reclaiming page cache with low events. 279 * 280 * Then we try to reclaim from A/B/C using memory.reclaim until its 281 * usage reaches 10M. 282 * This makes sure that: 283 * (a) We ignore the protection of the reclaim target memcg. 284 * (b) The previously calculated emin value (~29M) should be dismissed. 285 */ 286 static int test_memcg_protection(const char *root, bool min) 287 { 288 int ret = KSFT_FAIL, rc; 289 char *parent[3] = {NULL}; 290 char *children[4] = {NULL}; 291 const char *attribute = min ? "memory.min" : "memory.low"; 292 long c[4]; 293 long current; 294 int i, attempts; 295 int fd; 296 297 fd = get_temp_fd(); 298 if (fd < 0) 299 goto cleanup; 300 301 parent[0] = cg_name(root, "memcg_test_0"); 302 if (!parent[0]) 303 goto cleanup; 304 305 parent[1] = cg_name(parent[0], "memcg_test_1"); 306 if (!parent[1]) 307 goto cleanup; 308 309 parent[2] = cg_name(parent[0], "memcg_test_2"); 310 if (!parent[2]) 311 goto cleanup; 312 313 if (cg_create(parent[0])) 314 goto cleanup; 315 316 if (cg_read_long(parent[0], attribute)) { 317 /* No memory.min on older kernels is fine */ 318 if (min) 319 ret = KSFT_SKIP; 320 goto cleanup; 321 } 322 323 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 324 goto cleanup; 325 326 if (cg_write(parent[0], "memory.max", "200M")) 327 goto cleanup; 328 329 if (cg_write(parent[0], "memory.swap.max", "0")) 330 goto cleanup; 331 332 if (cg_create(parent[1])) 333 goto cleanup; 334 335 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 336 goto cleanup; 337 338 if (cg_create(parent[2])) 339 goto cleanup; 340 341 for (i = 0; i < ARRAY_SIZE(children); i++) { 342 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 343 if (!children[i]) 344 goto cleanup; 345 346 if (cg_create(children[i])) 347 goto cleanup; 348 349 if (i > 2) 350 continue; 351 352 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 353 (void *)(long)fd); 354 } 355 356 if (cg_write(parent[1], attribute, "50M")) 357 goto cleanup; 358 if (cg_write(children[0], attribute, "75M")) 359 goto cleanup; 360 if (cg_write(children[1], attribute, "25M")) 361 goto cleanup; 362 if (cg_write(children[2], attribute, "0")) 363 goto cleanup; 364 if (cg_write(children[3], attribute, "500M")) 365 goto cleanup; 366 367 attempts = 0; 368 while (!values_close(cg_read_long(parent[1], "memory.current"), 369 MB(150), 3)) { 370 if (attempts++ > 5) 371 break; 372 sleep(1); 373 } 374 375 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 376 goto cleanup; 377 378 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 379 goto cleanup; 380 381 for (i = 0; i < ARRAY_SIZE(children); i++) 382 c[i] = cg_read_long(children[i], "memory.current"); 383 384 if (!values_close(c[0], MB(29), 10)) 385 goto cleanup; 386 387 if (!values_close(c[1], MB(21), 10)) 388 goto cleanup; 389 390 if (c[3] != 0) 391 goto cleanup; 392 393 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 394 if (min && !rc) 395 goto cleanup; 396 else if (!min && rc) { 397 fprintf(stderr, 398 "memory.low prevents from allocating anon memory\n"); 399 goto cleanup; 400 } 401 402 current = min ? MB(50) : MB(30); 403 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 404 goto cleanup; 405 406 if (!reclaim_until(children[0], MB(10))) 407 goto cleanup; 408 409 if (min) { 410 ret = KSFT_PASS; 411 goto cleanup; 412 } 413 414 for (i = 0; i < ARRAY_SIZE(children); i++) { 415 int no_low_events_index = 1; 416 long low, oom; 417 418 oom = cg_read_key_long(children[i], "memory.events", "oom "); 419 low = cg_read_key_long(children[i], "memory.events", "low "); 420 421 if (oom) 422 goto cleanup; 423 if (i <= no_low_events_index && low <= 0) 424 goto cleanup; 425 if (i > no_low_events_index && low) 426 goto cleanup; 427 428 } 429 430 ret = KSFT_PASS; 431 432 cleanup: 433 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 434 if (!children[i]) 435 continue; 436 437 cg_destroy(children[i]); 438 free(children[i]); 439 } 440 441 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 442 if (!parent[i]) 443 continue; 444 445 cg_destroy(parent[i]); 446 free(parent[i]); 447 } 448 close(fd); 449 return ret; 450 } 451 452 static int test_memcg_min(const char *root) 453 { 454 return test_memcg_protection(root, true); 455 } 456 457 static int test_memcg_low(const char *root) 458 { 459 return test_memcg_protection(root, false); 460 } 461 462 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 463 { 464 size_t size = MB(50); 465 int ret = -1; 466 long current, high, max; 467 int fd; 468 469 high = cg_read_long(cgroup, "memory.high"); 470 max = cg_read_long(cgroup, "memory.max"); 471 if (high != MB(30) && max != MB(30)) 472 return -1; 473 474 fd = get_temp_fd(); 475 if (fd < 0) 476 return -1; 477 478 if (alloc_pagecache(fd, size)) 479 goto cleanup; 480 481 current = cg_read_long(cgroup, "memory.current"); 482 if (!values_close(current, MB(30), 5)) 483 goto cleanup; 484 485 ret = 0; 486 487 cleanup: 488 close(fd); 489 return ret; 490 491 } 492 493 /* 494 * This test checks that memory.high limits the amount of 495 * memory which can be consumed by either anonymous memory 496 * or pagecache. 497 */ 498 static int test_memcg_high(const char *root) 499 { 500 int ret = KSFT_FAIL; 501 char *memcg; 502 long high; 503 504 memcg = cg_name(root, "memcg_test"); 505 if (!memcg) 506 goto cleanup; 507 508 if (cg_create(memcg)) 509 goto cleanup; 510 511 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 512 goto cleanup; 513 514 if (cg_write(memcg, "memory.swap.max", "0")) 515 goto cleanup; 516 517 if (cg_write(memcg, "memory.high", "30M")) 518 goto cleanup; 519 520 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 521 goto cleanup; 522 523 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 524 goto cleanup; 525 526 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 527 goto cleanup; 528 529 high = cg_read_key_long(memcg, "memory.events", "high "); 530 if (high <= 0) 531 goto cleanup; 532 533 ret = KSFT_PASS; 534 535 cleanup: 536 cg_destroy(memcg); 537 free(memcg); 538 539 return ret; 540 } 541 542 static int alloc_anon_mlock(const char *cgroup, void *arg) 543 { 544 size_t size = (size_t)arg; 545 void *buf; 546 547 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 548 0, 0); 549 if (buf == MAP_FAILED) 550 return -1; 551 552 mlock(buf, size); 553 munmap(buf, size); 554 return 0; 555 } 556 557 /* 558 * This test checks that memory.high is able to throttle big single shot 559 * allocation i.e. large allocation within one kernel entry. 560 */ 561 static int test_memcg_high_sync(const char *root) 562 { 563 int ret = KSFT_FAIL, pid, fd = -1; 564 char *memcg; 565 long pre_high, pre_max; 566 long post_high, post_max; 567 568 memcg = cg_name(root, "memcg_test"); 569 if (!memcg) 570 goto cleanup; 571 572 if (cg_create(memcg)) 573 goto cleanup; 574 575 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 576 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 577 if (pre_high < 0 || pre_max < 0) 578 goto cleanup; 579 580 if (cg_write(memcg, "memory.swap.max", "0")) 581 goto cleanup; 582 583 if (cg_write(memcg, "memory.high", "30M")) 584 goto cleanup; 585 586 if (cg_write(memcg, "memory.max", "140M")) 587 goto cleanup; 588 589 fd = memcg_prepare_for_wait(memcg); 590 if (fd < 0) 591 goto cleanup; 592 593 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 594 if (pid < 0) 595 goto cleanup; 596 597 cg_wait_for(fd); 598 599 post_high = cg_read_key_long(memcg, "memory.events", "high "); 600 post_max = cg_read_key_long(memcg, "memory.events", "max "); 601 if (post_high < 0 || post_max < 0) 602 goto cleanup; 603 604 if (pre_high == post_high || pre_max != post_max) 605 goto cleanup; 606 607 ret = KSFT_PASS; 608 609 cleanup: 610 if (fd >= 0) 611 close(fd); 612 cg_destroy(memcg); 613 free(memcg); 614 615 return ret; 616 } 617 618 /* 619 * This test checks that memory.max limits the amount of 620 * memory which can be consumed by either anonymous memory 621 * or pagecache. 622 */ 623 static int test_memcg_max(const char *root) 624 { 625 int ret = KSFT_FAIL; 626 char *memcg; 627 long current, max; 628 629 memcg = cg_name(root, "memcg_test"); 630 if (!memcg) 631 goto cleanup; 632 633 if (cg_create(memcg)) 634 goto cleanup; 635 636 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 637 goto cleanup; 638 639 if (cg_write(memcg, "memory.swap.max", "0")) 640 goto cleanup; 641 642 if (cg_write(memcg, "memory.max", "30M")) 643 goto cleanup; 644 645 /* Should be killed by OOM killer */ 646 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 647 goto cleanup; 648 649 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 650 goto cleanup; 651 652 current = cg_read_long(memcg, "memory.current"); 653 if (current > MB(30) || !current) 654 goto cleanup; 655 656 max = cg_read_key_long(memcg, "memory.events", "max "); 657 if (max <= 0) 658 goto cleanup; 659 660 ret = KSFT_PASS; 661 662 cleanup: 663 cg_destroy(memcg); 664 free(memcg); 665 666 return ret; 667 } 668 669 /* 670 * Reclaim from @memcg until usage reaches @goal by writing to 671 * memory.reclaim. 672 * 673 * This function will return false if the usage is already below the 674 * goal. 675 * 676 * This function assumes that writing to memory.reclaim is the only 677 * source of change in memory.current (no concurrent allocations or 678 * reclaim). 679 * 680 * This function makes sure memory.reclaim is sane. It will return 681 * false if memory.reclaim's error codes do not make sense, even if 682 * the usage goal was satisfied. 683 */ 684 static bool reclaim_until(const char *memcg, long goal) 685 { 686 char buf[64]; 687 int retries, err; 688 long current, to_reclaim; 689 bool reclaimed = false; 690 691 for (retries = 5; retries > 0; retries--) { 692 current = cg_read_long(memcg, "memory.current"); 693 694 if (current < goal || values_close(current, goal, 3)) 695 break; 696 /* Did memory.reclaim return 0 incorrectly? */ 697 else if (reclaimed) 698 return false; 699 700 to_reclaim = current - goal; 701 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 702 err = cg_write(memcg, "memory.reclaim", buf); 703 if (!err) 704 reclaimed = true; 705 else if (err != -EAGAIN) 706 return false; 707 } 708 return reclaimed; 709 } 710 711 /* 712 * This test checks that memory.reclaim reclaims the given 713 * amount of memory (from both anon and file, if possible). 714 */ 715 static int test_memcg_reclaim(const char *root) 716 { 717 int ret = KSFT_FAIL; 718 int fd = -1; 719 int retries; 720 char *memcg; 721 long current, expected_usage; 722 723 memcg = cg_name(root, "memcg_test"); 724 if (!memcg) 725 goto cleanup; 726 727 if (cg_create(memcg)) 728 goto cleanup; 729 730 current = cg_read_long(memcg, "memory.current"); 731 if (current != 0) 732 goto cleanup; 733 734 fd = get_temp_fd(); 735 if (fd < 0) 736 goto cleanup; 737 738 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 739 740 /* 741 * If swap is enabled, try to reclaim from both anon and file, else try 742 * to reclaim from file only. 743 */ 744 if (is_swap_enabled()) { 745 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 746 expected_usage = MB(100); 747 } else 748 expected_usage = MB(50); 749 750 /* 751 * Wait until current usage reaches the expected usage (or we run out of 752 * retries). 753 */ 754 retries = 5; 755 while (!values_close(cg_read_long(memcg, "memory.current"), 756 expected_usage, 10)) { 757 if (retries--) { 758 sleep(1); 759 continue; 760 } else { 761 fprintf(stderr, 762 "failed to allocate %ld for memcg reclaim test\n", 763 expected_usage); 764 goto cleanup; 765 } 766 } 767 768 /* 769 * Reclaim until current reaches 30M, this makes sure we hit both anon 770 * and file if swap is enabled. 771 */ 772 if (!reclaim_until(memcg, MB(30))) 773 goto cleanup; 774 775 ret = KSFT_PASS; 776 cleanup: 777 cg_destroy(memcg); 778 free(memcg); 779 close(fd); 780 781 return ret; 782 } 783 784 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 785 { 786 long mem_max = (long)arg; 787 size_t size = MB(50); 788 char *buf, *ptr; 789 long mem_current, swap_current; 790 int ret = -1; 791 792 buf = malloc(size); 793 if (buf == NULL) { 794 fprintf(stderr, "malloc() failed\n"); 795 return -1; 796 } 797 798 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 799 *ptr = 0; 800 801 mem_current = cg_read_long(cgroup, "memory.current"); 802 if (!mem_current || !values_close(mem_current, mem_max, 3)) 803 goto cleanup; 804 805 swap_current = cg_read_long(cgroup, "memory.swap.current"); 806 if (!swap_current || 807 !values_close(mem_current + swap_current, size, 3)) 808 goto cleanup; 809 810 ret = 0; 811 cleanup: 812 free(buf); 813 return ret; 814 } 815 816 /* 817 * This test checks that memory.swap.max limits the amount of 818 * anonymous memory which can be swapped out. 819 */ 820 static int test_memcg_swap_max(const char *root) 821 { 822 int ret = KSFT_FAIL; 823 char *memcg; 824 long max; 825 826 if (!is_swap_enabled()) 827 return KSFT_SKIP; 828 829 memcg = cg_name(root, "memcg_test"); 830 if (!memcg) 831 goto cleanup; 832 833 if (cg_create(memcg)) 834 goto cleanup; 835 836 if (cg_read_long(memcg, "memory.swap.current")) { 837 ret = KSFT_SKIP; 838 goto cleanup; 839 } 840 841 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 842 goto cleanup; 843 844 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 845 goto cleanup; 846 847 if (cg_write(memcg, "memory.swap.max", "30M")) 848 goto cleanup; 849 850 if (cg_write(memcg, "memory.max", "30M")) 851 goto cleanup; 852 853 /* Should be killed by OOM killer */ 854 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 855 goto cleanup; 856 857 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 858 goto cleanup; 859 860 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 861 goto cleanup; 862 863 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 864 goto cleanup; 865 866 max = cg_read_key_long(memcg, "memory.events", "max "); 867 if (max <= 0) 868 goto cleanup; 869 870 ret = KSFT_PASS; 871 872 cleanup: 873 cg_destroy(memcg); 874 free(memcg); 875 876 return ret; 877 } 878 879 /* 880 * This test disables swapping and tries to allocate anonymous memory 881 * up to OOM. Then it checks for oom and oom_kill events in 882 * memory.events. 883 */ 884 static int test_memcg_oom_events(const char *root) 885 { 886 int ret = KSFT_FAIL; 887 char *memcg; 888 889 memcg = cg_name(root, "memcg_test"); 890 if (!memcg) 891 goto cleanup; 892 893 if (cg_create(memcg)) 894 goto cleanup; 895 896 if (cg_write(memcg, "memory.max", "30M")) 897 goto cleanup; 898 899 if (cg_write(memcg, "memory.swap.max", "0")) 900 goto cleanup; 901 902 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 903 goto cleanup; 904 905 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 906 goto cleanup; 907 908 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 909 goto cleanup; 910 911 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 912 goto cleanup; 913 914 ret = KSFT_PASS; 915 916 cleanup: 917 cg_destroy(memcg); 918 free(memcg); 919 920 return ret; 921 } 922 923 struct tcp_server_args { 924 unsigned short port; 925 int ctl[2]; 926 }; 927 928 static int tcp_server(const char *cgroup, void *arg) 929 { 930 struct tcp_server_args *srv_args = arg; 931 struct sockaddr_in6 saddr = { 0 }; 932 socklen_t slen = sizeof(saddr); 933 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 934 935 close(srv_args->ctl[0]); 936 ctl_fd = srv_args->ctl[1]; 937 938 saddr.sin6_family = AF_INET6; 939 saddr.sin6_addr = in6addr_any; 940 saddr.sin6_port = htons(srv_args->port); 941 942 sk = socket(AF_INET6, SOCK_STREAM, 0); 943 if (sk < 0) 944 return ret; 945 946 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 947 goto cleanup; 948 949 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 950 write(ctl_fd, &errno, sizeof(errno)); 951 goto cleanup; 952 } 953 954 if (listen(sk, 1)) 955 goto cleanup; 956 957 ret = 0; 958 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 959 ret = -1; 960 goto cleanup; 961 } 962 963 client_sk = accept(sk, NULL, NULL); 964 if (client_sk < 0) 965 goto cleanup; 966 967 ret = -1; 968 for (;;) { 969 uint8_t buf[0x100000]; 970 971 if (write(client_sk, buf, sizeof(buf)) <= 0) { 972 if (errno == ECONNRESET) 973 ret = 0; 974 break; 975 } 976 } 977 978 close(client_sk); 979 980 cleanup: 981 close(sk); 982 return ret; 983 } 984 985 static int tcp_client(const char *cgroup, unsigned short port) 986 { 987 const char server[] = "localhost"; 988 struct addrinfo *ai; 989 char servport[6]; 990 int retries = 0x10; /* nice round number */ 991 int sk, ret; 992 long allocated; 993 994 allocated = cg_read_long(cgroup, "memory.current"); 995 snprintf(servport, sizeof(servport), "%hd", port); 996 ret = getaddrinfo(server, servport, NULL, &ai); 997 if (ret) 998 return ret; 999 1000 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1001 if (sk < 0) 1002 goto free_ainfo; 1003 1004 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1005 if (ret < 0) 1006 goto close_sk; 1007 1008 ret = KSFT_FAIL; 1009 while (retries--) { 1010 uint8_t buf[0x100000]; 1011 long current, sock; 1012 1013 if (read(sk, buf, sizeof(buf)) <= 0) 1014 goto close_sk; 1015 1016 current = cg_read_long(cgroup, "memory.current"); 1017 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1018 1019 if (current < 0 || sock < 0) 1020 goto close_sk; 1021 1022 /* exclude the memory not related to socket connection */ 1023 if (values_close(current - allocated, sock, 10)) { 1024 ret = KSFT_PASS; 1025 break; 1026 } 1027 } 1028 1029 close_sk: 1030 close(sk); 1031 free_ainfo: 1032 freeaddrinfo(ai); 1033 return ret; 1034 } 1035 1036 /* 1037 * This test checks socket memory accounting. 1038 * The test forks a TCP server listens on a random port between 1000 1039 * and 61000. Once it gets a client connection, it starts writing to 1040 * its socket. 1041 * The TCP client interleaves reads from the socket with check whether 1042 * memory.current and memory.stat.sock are similar. 1043 */ 1044 static int test_memcg_sock(const char *root) 1045 { 1046 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1047 unsigned short port; 1048 char *memcg; 1049 1050 memcg = cg_name(root, "memcg_test"); 1051 if (!memcg) 1052 goto cleanup; 1053 1054 if (cg_create(memcg)) 1055 goto cleanup; 1056 1057 while (bind_retries--) { 1058 struct tcp_server_args args; 1059 1060 if (pipe(args.ctl)) 1061 goto cleanup; 1062 1063 port = args.port = 1000 + rand() % 60000; 1064 1065 pid = cg_run_nowait(memcg, tcp_server, &args); 1066 if (pid < 0) 1067 goto cleanup; 1068 1069 close(args.ctl[1]); 1070 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1071 goto cleanup; 1072 close(args.ctl[0]); 1073 1074 if (!err) 1075 break; 1076 if (err != EADDRINUSE) 1077 goto cleanup; 1078 1079 waitpid(pid, NULL, 0); 1080 } 1081 1082 if (err == EADDRINUSE) { 1083 ret = KSFT_SKIP; 1084 goto cleanup; 1085 } 1086 1087 if (tcp_client(memcg, port) != KSFT_PASS) 1088 goto cleanup; 1089 1090 waitpid(pid, &err, 0); 1091 if (WEXITSTATUS(err)) 1092 goto cleanup; 1093 1094 if (cg_read_long(memcg, "memory.current") < 0) 1095 goto cleanup; 1096 1097 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1098 goto cleanup; 1099 1100 ret = KSFT_PASS; 1101 1102 cleanup: 1103 cg_destroy(memcg); 1104 free(memcg); 1105 1106 return ret; 1107 } 1108 1109 /* 1110 * This test disables swapping and tries to allocate anonymous memory 1111 * up to OOM with memory.group.oom set. Then it checks that all 1112 * processes in the leaf were killed. It also checks that oom_events 1113 * were propagated to the parent level. 1114 */ 1115 static int test_memcg_oom_group_leaf_events(const char *root) 1116 { 1117 int ret = KSFT_FAIL; 1118 char *parent, *child; 1119 long parent_oom_events; 1120 1121 parent = cg_name(root, "memcg_test_0"); 1122 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1123 1124 if (!parent || !child) 1125 goto cleanup; 1126 1127 if (cg_create(parent)) 1128 goto cleanup; 1129 1130 if (cg_create(child)) 1131 goto cleanup; 1132 1133 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1134 goto cleanup; 1135 1136 if (cg_write(child, "memory.max", "50M")) 1137 goto cleanup; 1138 1139 if (cg_write(child, "memory.swap.max", "0")) 1140 goto cleanup; 1141 1142 if (cg_write(child, "memory.oom.group", "1")) 1143 goto cleanup; 1144 1145 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1146 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1147 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1148 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1149 goto cleanup; 1150 1151 if (cg_test_proc_killed(child)) 1152 goto cleanup; 1153 1154 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1155 goto cleanup; 1156 1157 parent_oom_events = cg_read_key_long( 1158 parent, "memory.events", "oom_kill "); 1159 /* 1160 * If memory_localevents is not enabled (the default), the parent should 1161 * count OOM events in its children groups. Otherwise, it should not 1162 * have observed any events. 1163 */ 1164 if (has_localevents && parent_oom_events != 0) 1165 goto cleanup; 1166 else if (!has_localevents && parent_oom_events <= 0) 1167 goto cleanup; 1168 1169 ret = KSFT_PASS; 1170 1171 cleanup: 1172 if (child) 1173 cg_destroy(child); 1174 if (parent) 1175 cg_destroy(parent); 1176 free(child); 1177 free(parent); 1178 1179 return ret; 1180 } 1181 1182 /* 1183 * This test disables swapping and tries to allocate anonymous memory 1184 * up to OOM with memory.group.oom set. Then it checks that all 1185 * processes in the parent and leaf were killed. 1186 */ 1187 static int test_memcg_oom_group_parent_events(const char *root) 1188 { 1189 int ret = KSFT_FAIL; 1190 char *parent, *child; 1191 1192 parent = cg_name(root, "memcg_test_0"); 1193 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1194 1195 if (!parent || !child) 1196 goto cleanup; 1197 1198 if (cg_create(parent)) 1199 goto cleanup; 1200 1201 if (cg_create(child)) 1202 goto cleanup; 1203 1204 if (cg_write(parent, "memory.max", "80M")) 1205 goto cleanup; 1206 1207 if (cg_write(parent, "memory.swap.max", "0")) 1208 goto cleanup; 1209 1210 if (cg_write(parent, "memory.oom.group", "1")) 1211 goto cleanup; 1212 1213 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1214 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1215 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1216 1217 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1218 goto cleanup; 1219 1220 if (cg_test_proc_killed(child)) 1221 goto cleanup; 1222 if (cg_test_proc_killed(parent)) 1223 goto cleanup; 1224 1225 ret = KSFT_PASS; 1226 1227 cleanup: 1228 if (child) 1229 cg_destroy(child); 1230 if (parent) 1231 cg_destroy(parent); 1232 free(child); 1233 free(parent); 1234 1235 return ret; 1236 } 1237 1238 /* 1239 * This test disables swapping and tries to allocate anonymous memory 1240 * up to OOM with memory.group.oom set. Then it checks that all 1241 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1242 */ 1243 static int test_memcg_oom_group_score_events(const char *root) 1244 { 1245 int ret = KSFT_FAIL; 1246 char *memcg; 1247 int safe_pid; 1248 1249 memcg = cg_name(root, "memcg_test_0"); 1250 1251 if (!memcg) 1252 goto cleanup; 1253 1254 if (cg_create(memcg)) 1255 goto cleanup; 1256 1257 if (cg_write(memcg, "memory.max", "50M")) 1258 goto cleanup; 1259 1260 if (cg_write(memcg, "memory.swap.max", "0")) 1261 goto cleanup; 1262 1263 if (cg_write(memcg, "memory.oom.group", "1")) 1264 goto cleanup; 1265 1266 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1267 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1268 goto cleanup; 1269 1270 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1271 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1272 goto cleanup; 1273 1274 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1275 goto cleanup; 1276 1277 if (kill(safe_pid, SIGKILL)) 1278 goto cleanup; 1279 1280 ret = KSFT_PASS; 1281 1282 cleanup: 1283 if (memcg) 1284 cg_destroy(memcg); 1285 free(memcg); 1286 1287 return ret; 1288 } 1289 1290 #define T(x) { x, #x } 1291 struct memcg_test { 1292 int (*fn)(const char *root); 1293 const char *name; 1294 } tests[] = { 1295 T(test_memcg_subtree_control), 1296 T(test_memcg_current), 1297 T(test_memcg_min), 1298 T(test_memcg_low), 1299 T(test_memcg_high), 1300 T(test_memcg_high_sync), 1301 T(test_memcg_max), 1302 T(test_memcg_reclaim), 1303 T(test_memcg_oom_events), 1304 T(test_memcg_swap_max), 1305 T(test_memcg_sock), 1306 T(test_memcg_oom_group_leaf_events), 1307 T(test_memcg_oom_group_parent_events), 1308 T(test_memcg_oom_group_score_events), 1309 }; 1310 #undef T 1311 1312 int main(int argc, char **argv) 1313 { 1314 char root[PATH_MAX]; 1315 int i, proc_status, ret = EXIT_SUCCESS; 1316 1317 if (cg_find_unified_root(root, sizeof(root), NULL)) 1318 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1319 1320 /* 1321 * Check that memory controller is available: 1322 * memory is listed in cgroup.controllers 1323 */ 1324 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1325 ksft_exit_skip("memory controller isn't available\n"); 1326 1327 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1328 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1329 ksft_exit_skip("Failed to set memory controller\n"); 1330 1331 proc_status = proc_mount_contains("memory_recursiveprot"); 1332 if (proc_status < 0) 1333 ksft_exit_skip("Failed to query cgroup mount option\n"); 1334 has_recursiveprot = proc_status; 1335 1336 proc_status = proc_mount_contains("memory_localevents"); 1337 if (proc_status < 0) 1338 ksft_exit_skip("Failed to query cgroup mount option\n"); 1339 has_localevents = proc_status; 1340 1341 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1342 switch (tests[i].fn(root)) { 1343 case KSFT_PASS: 1344 ksft_test_result_pass("%s\n", tests[i].name); 1345 break; 1346 case KSFT_SKIP: 1347 ksft_test_result_skip("%s\n", tests[i].name); 1348 break; 1349 default: 1350 ret = EXIT_FAILURE; 1351 ksft_test_result_fail("%s\n", tests[i].name); 1352 break; 1353 } 1354 } 1355 1356 return ret; 1357 } 1358