1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 if (buf == NULL) { 102 fprintf(stderr, "malloc() failed\n"); 103 return -1; 104 } 105 106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 107 *ptr = 0; 108 109 current = cg_read_long(cgroup, "memory.current"); 110 if (current < size) 111 goto cleanup; 112 113 if (!values_close(size, current, 3)) 114 goto cleanup; 115 116 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 117 if (anon < 0) 118 goto cleanup; 119 120 if (!values_close(anon, current, 3)) 121 goto cleanup; 122 123 ret = 0; 124 cleanup: 125 free(buf); 126 return ret; 127 } 128 129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 130 { 131 size_t size = MB(50); 132 int ret = -1; 133 long current, file; 134 int fd; 135 136 fd = get_temp_fd(); 137 if (fd < 0) 138 return -1; 139 140 if (alloc_pagecache(fd, size)) 141 goto cleanup; 142 143 current = cg_read_long(cgroup, "memory.current"); 144 if (current < size) 145 goto cleanup; 146 147 file = cg_read_key_long(cgroup, "memory.stat", "file "); 148 if (file < 0) 149 goto cleanup; 150 151 if (!values_close(file, current, 10)) 152 goto cleanup; 153 154 ret = 0; 155 156 cleanup: 157 close(fd); 158 return ret; 159 } 160 161 /* 162 * This test create a memory cgroup, allocates 163 * some anonymous memory and some pagecache 164 * and check memory.current and some memory.stat values. 165 */ 166 static int test_memcg_current(const char *root) 167 { 168 int ret = KSFT_FAIL; 169 long current; 170 char *memcg; 171 172 memcg = cg_name(root, "memcg_test"); 173 if (!memcg) 174 goto cleanup; 175 176 if (cg_create(memcg)) 177 goto cleanup; 178 179 current = cg_read_long(memcg, "memory.current"); 180 if (current != 0) 181 goto cleanup; 182 183 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 184 goto cleanup; 185 186 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 187 goto cleanup; 188 189 ret = KSFT_PASS; 190 191 cleanup: 192 cg_destroy(memcg); 193 free(memcg); 194 195 return ret; 196 } 197 198 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 199 { 200 int fd = (long)arg; 201 int ppid = getppid(); 202 203 if (alloc_pagecache(fd, MB(50))) 204 return -1; 205 206 while (getppid() == ppid) 207 sleep(1); 208 209 return 0; 210 } 211 212 static int alloc_anon_noexit(const char *cgroup, void *arg) 213 { 214 int ppid = getppid(); 215 size_t size = (unsigned long)arg; 216 char *buf, *ptr; 217 218 buf = malloc(size); 219 if (buf == NULL) { 220 fprintf(stderr, "malloc() failed\n"); 221 return -1; 222 } 223 224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 225 *ptr = 0; 226 227 while (getppid() == ppid) 228 sleep(1); 229 230 free(buf); 231 return 0; 232 } 233 234 /* 235 * Wait until processes are killed asynchronously by the OOM killer 236 * If we exceed a timeout, fail. 237 */ 238 static int cg_test_proc_killed(const char *cgroup) 239 { 240 int limit; 241 242 for (limit = 10; limit > 0; limit--) { 243 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 244 return 0; 245 246 usleep(100000); 247 } 248 return -1; 249 } 250 251 static bool reclaim_until(const char *memcg, long goal); 252 253 /* 254 * First, this test creates the following hierarchy: 255 * A memory.min = 0, memory.max = 200M 256 * A/B memory.min = 50M 257 * A/B/C memory.min = 75M, memory.current = 50M 258 * A/B/D memory.min = 25M, memory.current = 50M 259 * A/B/E memory.min = 0, memory.current = 50M 260 * A/B/F memory.min = 500M, memory.current = 0 261 * 262 * (or memory.low if we test soft protection) 263 * 264 * Usages are pagecache and the test keeps a running 265 * process in every leaf cgroup. 266 * Then it creates A/G and creates a significant 267 * memory pressure in A. 268 * 269 * Then it checks actual memory usages and expects that: 270 * A/B memory.current ~= 50M 271 * A/B/C memory.current ~= 29M 272 * A/B/D memory.current ~= 21M 273 * A/B/E memory.current ~= 0 274 * A/B/F memory.current = 0 275 * (for origin of the numbers, see model in memcg_protection.m.) 276 * 277 * After that it tries to allocate more than there is 278 * unprotected memory in A available, and checks that: 279 * a) memory.min protects pagecache even in this case, 280 * b) memory.low allows reclaiming page cache with low events. 281 * 282 * Then we try to reclaim from A/B/C using memory.reclaim until its 283 * usage reaches 10M. 284 * This makes sure that: 285 * (a) We ignore the protection of the reclaim target memcg. 286 * (b) The previously calculated emin value (~29M) should be dismissed. 287 */ 288 static int test_memcg_protection(const char *root, bool min) 289 { 290 int ret = KSFT_FAIL, rc; 291 char *parent[3] = {NULL}; 292 char *children[4] = {NULL}; 293 const char *attribute = min ? "memory.min" : "memory.low"; 294 long c[4]; 295 long current; 296 int i, attempts; 297 int fd; 298 299 fd = get_temp_fd(); 300 if (fd < 0) 301 goto cleanup; 302 303 parent[0] = cg_name(root, "memcg_test_0"); 304 if (!parent[0]) 305 goto cleanup; 306 307 parent[1] = cg_name(parent[0], "memcg_test_1"); 308 if (!parent[1]) 309 goto cleanup; 310 311 parent[2] = cg_name(parent[0], "memcg_test_2"); 312 if (!parent[2]) 313 goto cleanup; 314 315 if (cg_create(parent[0])) 316 goto cleanup; 317 318 if (cg_read_long(parent[0], attribute)) { 319 /* No memory.min on older kernels is fine */ 320 if (min) 321 ret = KSFT_SKIP; 322 goto cleanup; 323 } 324 325 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 326 goto cleanup; 327 328 if (cg_write(parent[0], "memory.max", "200M")) 329 goto cleanup; 330 331 if (cg_write(parent[0], "memory.swap.max", "0")) 332 goto cleanup; 333 334 if (cg_create(parent[1])) 335 goto cleanup; 336 337 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 338 goto cleanup; 339 340 if (cg_create(parent[2])) 341 goto cleanup; 342 343 for (i = 0; i < ARRAY_SIZE(children); i++) { 344 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 345 if (!children[i]) 346 goto cleanup; 347 348 if (cg_create(children[i])) 349 goto cleanup; 350 351 if (i > 2) 352 continue; 353 354 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 355 (void *)(long)fd); 356 } 357 358 if (cg_write(parent[1], attribute, "50M")) 359 goto cleanup; 360 if (cg_write(children[0], attribute, "75M")) 361 goto cleanup; 362 if (cg_write(children[1], attribute, "25M")) 363 goto cleanup; 364 if (cg_write(children[2], attribute, "0")) 365 goto cleanup; 366 if (cg_write(children[3], attribute, "500M")) 367 goto cleanup; 368 369 attempts = 0; 370 while (!values_close(cg_read_long(parent[1], "memory.current"), 371 MB(150), 3)) { 372 if (attempts++ > 5) 373 break; 374 sleep(1); 375 } 376 377 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 378 goto cleanup; 379 380 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 381 goto cleanup; 382 383 for (i = 0; i < ARRAY_SIZE(children); i++) 384 c[i] = cg_read_long(children[i], "memory.current"); 385 386 if (!values_close(c[0], MB(29), 10)) 387 goto cleanup; 388 389 if (!values_close(c[1], MB(21), 10)) 390 goto cleanup; 391 392 if (c[3] != 0) 393 goto cleanup; 394 395 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 396 if (min && !rc) 397 goto cleanup; 398 else if (!min && rc) { 399 fprintf(stderr, 400 "memory.low prevents from allocating anon memory\n"); 401 goto cleanup; 402 } 403 404 current = min ? MB(50) : MB(30); 405 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 406 goto cleanup; 407 408 if (!reclaim_until(children[0], MB(10))) 409 goto cleanup; 410 411 if (min) { 412 ret = KSFT_PASS; 413 goto cleanup; 414 } 415 416 for (i = 0; i < ARRAY_SIZE(children); i++) { 417 int no_low_events_index = 1; 418 long low, oom; 419 420 oom = cg_read_key_long(children[i], "memory.events", "oom "); 421 low = cg_read_key_long(children[i], "memory.events", "low "); 422 423 if (oom) 424 goto cleanup; 425 if (i <= no_low_events_index && low <= 0) 426 goto cleanup; 427 if (i > no_low_events_index && low) 428 goto cleanup; 429 430 } 431 432 ret = KSFT_PASS; 433 434 cleanup: 435 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 436 if (!children[i]) 437 continue; 438 439 cg_destroy(children[i]); 440 free(children[i]); 441 } 442 443 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 444 if (!parent[i]) 445 continue; 446 447 cg_destroy(parent[i]); 448 free(parent[i]); 449 } 450 close(fd); 451 return ret; 452 } 453 454 static int test_memcg_min(const char *root) 455 { 456 return test_memcg_protection(root, true); 457 } 458 459 static int test_memcg_low(const char *root) 460 { 461 return test_memcg_protection(root, false); 462 } 463 464 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 465 { 466 size_t size = MB(50); 467 int ret = -1; 468 long current, high, max; 469 int fd; 470 471 high = cg_read_long(cgroup, "memory.high"); 472 max = cg_read_long(cgroup, "memory.max"); 473 if (high != MB(30) && max != MB(30)) 474 return -1; 475 476 fd = get_temp_fd(); 477 if (fd < 0) 478 return -1; 479 480 if (alloc_pagecache(fd, size)) 481 goto cleanup; 482 483 current = cg_read_long(cgroup, "memory.current"); 484 if (!values_close(current, MB(30), 5)) 485 goto cleanup; 486 487 ret = 0; 488 489 cleanup: 490 close(fd); 491 return ret; 492 493 } 494 495 /* 496 * This test checks that memory.high limits the amount of 497 * memory which can be consumed by either anonymous memory 498 * or pagecache. 499 */ 500 static int test_memcg_high(const char *root) 501 { 502 int ret = KSFT_FAIL; 503 char *memcg; 504 long high; 505 506 memcg = cg_name(root, "memcg_test"); 507 if (!memcg) 508 goto cleanup; 509 510 if (cg_create(memcg)) 511 goto cleanup; 512 513 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 514 goto cleanup; 515 516 if (cg_write(memcg, "memory.swap.max", "0")) 517 goto cleanup; 518 519 if (cg_write(memcg, "memory.high", "30M")) 520 goto cleanup; 521 522 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 523 goto cleanup; 524 525 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 526 goto cleanup; 527 528 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 529 goto cleanup; 530 531 high = cg_read_key_long(memcg, "memory.events", "high "); 532 if (high <= 0) 533 goto cleanup; 534 535 ret = KSFT_PASS; 536 537 cleanup: 538 cg_destroy(memcg); 539 free(memcg); 540 541 return ret; 542 } 543 544 static int alloc_anon_mlock(const char *cgroup, void *arg) 545 { 546 size_t size = (size_t)arg; 547 void *buf; 548 549 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 550 0, 0); 551 if (buf == MAP_FAILED) 552 return -1; 553 554 mlock(buf, size); 555 munmap(buf, size); 556 return 0; 557 } 558 559 /* 560 * This test checks that memory.high is able to throttle big single shot 561 * allocation i.e. large allocation within one kernel entry. 562 */ 563 static int test_memcg_high_sync(const char *root) 564 { 565 int ret = KSFT_FAIL, pid, fd = -1; 566 char *memcg; 567 long pre_high, pre_max; 568 long post_high, post_max; 569 570 memcg = cg_name(root, "memcg_test"); 571 if (!memcg) 572 goto cleanup; 573 574 if (cg_create(memcg)) 575 goto cleanup; 576 577 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 578 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 579 if (pre_high < 0 || pre_max < 0) 580 goto cleanup; 581 582 if (cg_write(memcg, "memory.swap.max", "0")) 583 goto cleanup; 584 585 if (cg_write(memcg, "memory.high", "30M")) 586 goto cleanup; 587 588 if (cg_write(memcg, "memory.max", "140M")) 589 goto cleanup; 590 591 fd = memcg_prepare_for_wait(memcg); 592 if (fd < 0) 593 goto cleanup; 594 595 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 596 if (pid < 0) 597 goto cleanup; 598 599 cg_wait_for(fd); 600 601 post_high = cg_read_key_long(memcg, "memory.events", "high "); 602 post_max = cg_read_key_long(memcg, "memory.events", "max "); 603 if (post_high < 0 || post_max < 0) 604 goto cleanup; 605 606 if (pre_high == post_high || pre_max != post_max) 607 goto cleanup; 608 609 ret = KSFT_PASS; 610 611 cleanup: 612 if (fd >= 0) 613 close(fd); 614 cg_destroy(memcg); 615 free(memcg); 616 617 return ret; 618 } 619 620 /* 621 * This test checks that memory.max limits the amount of 622 * memory which can be consumed by either anonymous memory 623 * or pagecache. 624 */ 625 static int test_memcg_max(const char *root) 626 { 627 int ret = KSFT_FAIL; 628 char *memcg; 629 long current, max; 630 631 memcg = cg_name(root, "memcg_test"); 632 if (!memcg) 633 goto cleanup; 634 635 if (cg_create(memcg)) 636 goto cleanup; 637 638 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 639 goto cleanup; 640 641 if (cg_write(memcg, "memory.swap.max", "0")) 642 goto cleanup; 643 644 if (cg_write(memcg, "memory.max", "30M")) 645 goto cleanup; 646 647 /* Should be killed by OOM killer */ 648 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 649 goto cleanup; 650 651 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 652 goto cleanup; 653 654 current = cg_read_long(memcg, "memory.current"); 655 if (current > MB(30) || !current) 656 goto cleanup; 657 658 max = cg_read_key_long(memcg, "memory.events", "max "); 659 if (max <= 0) 660 goto cleanup; 661 662 ret = KSFT_PASS; 663 664 cleanup: 665 cg_destroy(memcg); 666 free(memcg); 667 668 return ret; 669 } 670 671 /* 672 * Reclaim from @memcg until usage reaches @goal by writing to 673 * memory.reclaim. 674 * 675 * This function will return false if the usage is already below the 676 * goal. 677 * 678 * This function assumes that writing to memory.reclaim is the only 679 * source of change in memory.current (no concurrent allocations or 680 * reclaim). 681 * 682 * This function makes sure memory.reclaim is sane. It will return 683 * false if memory.reclaim's error codes do not make sense, even if 684 * the usage goal was satisfied. 685 */ 686 static bool reclaim_until(const char *memcg, long goal) 687 { 688 char buf[64]; 689 int retries, err; 690 long current, to_reclaim; 691 bool reclaimed = false; 692 693 for (retries = 5; retries > 0; retries--) { 694 current = cg_read_long(memcg, "memory.current"); 695 696 if (current < goal || values_close(current, goal, 3)) 697 break; 698 /* Did memory.reclaim return 0 incorrectly? */ 699 else if (reclaimed) 700 return false; 701 702 to_reclaim = current - goal; 703 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 704 err = cg_write(memcg, "memory.reclaim", buf); 705 if (!err) 706 reclaimed = true; 707 else if (err != -EAGAIN) 708 return false; 709 } 710 return reclaimed; 711 } 712 713 /* 714 * This test checks that memory.reclaim reclaims the given 715 * amount of memory (from both anon and file, if possible). 716 */ 717 static int test_memcg_reclaim(const char *root) 718 { 719 int ret = KSFT_FAIL; 720 int fd = -1; 721 int retries; 722 char *memcg; 723 long current, expected_usage; 724 725 memcg = cg_name(root, "memcg_test"); 726 if (!memcg) 727 goto cleanup; 728 729 if (cg_create(memcg)) 730 goto cleanup; 731 732 current = cg_read_long(memcg, "memory.current"); 733 if (current != 0) 734 goto cleanup; 735 736 fd = get_temp_fd(); 737 if (fd < 0) 738 goto cleanup; 739 740 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 741 742 /* 743 * If swap is enabled, try to reclaim from both anon and file, else try 744 * to reclaim from file only. 745 */ 746 if (is_swap_enabled()) { 747 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 748 expected_usage = MB(100); 749 } else 750 expected_usage = MB(50); 751 752 /* 753 * Wait until current usage reaches the expected usage (or we run out of 754 * retries). 755 */ 756 retries = 5; 757 while (!values_close(cg_read_long(memcg, "memory.current"), 758 expected_usage, 10)) { 759 if (retries--) { 760 sleep(1); 761 continue; 762 } else { 763 fprintf(stderr, 764 "failed to allocate %ld for memcg reclaim test\n", 765 expected_usage); 766 goto cleanup; 767 } 768 } 769 770 /* 771 * Reclaim until current reaches 30M, this makes sure we hit both anon 772 * and file if swap is enabled. 773 */ 774 if (!reclaim_until(memcg, MB(30))) 775 goto cleanup; 776 777 ret = KSFT_PASS; 778 cleanup: 779 cg_destroy(memcg); 780 free(memcg); 781 close(fd); 782 783 return ret; 784 } 785 786 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 787 { 788 long mem_max = (long)arg; 789 size_t size = MB(50); 790 char *buf, *ptr; 791 long mem_current, swap_current; 792 int ret = -1; 793 794 buf = malloc(size); 795 if (buf == NULL) { 796 fprintf(stderr, "malloc() failed\n"); 797 return -1; 798 } 799 800 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 801 *ptr = 0; 802 803 mem_current = cg_read_long(cgroup, "memory.current"); 804 if (!mem_current || !values_close(mem_current, mem_max, 3)) 805 goto cleanup; 806 807 swap_current = cg_read_long(cgroup, "memory.swap.current"); 808 if (!swap_current || 809 !values_close(mem_current + swap_current, size, 3)) 810 goto cleanup; 811 812 ret = 0; 813 cleanup: 814 free(buf); 815 return ret; 816 } 817 818 /* 819 * This test checks that memory.swap.max limits the amount of 820 * anonymous memory which can be swapped out. 821 */ 822 static int test_memcg_swap_max(const char *root) 823 { 824 int ret = KSFT_FAIL; 825 char *memcg; 826 long max; 827 828 if (!is_swap_enabled()) 829 return KSFT_SKIP; 830 831 memcg = cg_name(root, "memcg_test"); 832 if (!memcg) 833 goto cleanup; 834 835 if (cg_create(memcg)) 836 goto cleanup; 837 838 if (cg_read_long(memcg, "memory.swap.current")) { 839 ret = KSFT_SKIP; 840 goto cleanup; 841 } 842 843 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 844 goto cleanup; 845 846 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 847 goto cleanup; 848 849 if (cg_write(memcg, "memory.swap.max", "30M")) 850 goto cleanup; 851 852 if (cg_write(memcg, "memory.max", "30M")) 853 goto cleanup; 854 855 /* Should be killed by OOM killer */ 856 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 857 goto cleanup; 858 859 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 860 goto cleanup; 861 862 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 863 goto cleanup; 864 865 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 866 goto cleanup; 867 868 max = cg_read_key_long(memcg, "memory.events", "max "); 869 if (max <= 0) 870 goto cleanup; 871 872 ret = KSFT_PASS; 873 874 cleanup: 875 cg_destroy(memcg); 876 free(memcg); 877 878 return ret; 879 } 880 881 /* 882 * This test disables swapping and tries to allocate anonymous memory 883 * up to OOM. Then it checks for oom and oom_kill events in 884 * memory.events. 885 */ 886 static int test_memcg_oom_events(const char *root) 887 { 888 int ret = KSFT_FAIL; 889 char *memcg; 890 891 memcg = cg_name(root, "memcg_test"); 892 if (!memcg) 893 goto cleanup; 894 895 if (cg_create(memcg)) 896 goto cleanup; 897 898 if (cg_write(memcg, "memory.max", "30M")) 899 goto cleanup; 900 901 if (cg_write(memcg, "memory.swap.max", "0")) 902 goto cleanup; 903 904 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 905 goto cleanup; 906 907 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 908 goto cleanup; 909 910 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 911 goto cleanup; 912 913 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 914 goto cleanup; 915 916 ret = KSFT_PASS; 917 918 cleanup: 919 cg_destroy(memcg); 920 free(memcg); 921 922 return ret; 923 } 924 925 struct tcp_server_args { 926 unsigned short port; 927 int ctl[2]; 928 }; 929 930 static int tcp_server(const char *cgroup, void *arg) 931 { 932 struct tcp_server_args *srv_args = arg; 933 struct sockaddr_in6 saddr = { 0 }; 934 socklen_t slen = sizeof(saddr); 935 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 936 937 close(srv_args->ctl[0]); 938 ctl_fd = srv_args->ctl[1]; 939 940 saddr.sin6_family = AF_INET6; 941 saddr.sin6_addr = in6addr_any; 942 saddr.sin6_port = htons(srv_args->port); 943 944 sk = socket(AF_INET6, SOCK_STREAM, 0); 945 if (sk < 0) 946 return ret; 947 948 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 949 goto cleanup; 950 951 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 952 write(ctl_fd, &errno, sizeof(errno)); 953 goto cleanup; 954 } 955 956 if (listen(sk, 1)) 957 goto cleanup; 958 959 ret = 0; 960 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 961 ret = -1; 962 goto cleanup; 963 } 964 965 client_sk = accept(sk, NULL, NULL); 966 if (client_sk < 0) 967 goto cleanup; 968 969 ret = -1; 970 for (;;) { 971 uint8_t buf[0x100000]; 972 973 if (write(client_sk, buf, sizeof(buf)) <= 0) { 974 if (errno == ECONNRESET) 975 ret = 0; 976 break; 977 } 978 } 979 980 close(client_sk); 981 982 cleanup: 983 close(sk); 984 return ret; 985 } 986 987 static int tcp_client(const char *cgroup, unsigned short port) 988 { 989 const char server[] = "localhost"; 990 struct addrinfo *ai; 991 char servport[6]; 992 int retries = 0x10; /* nice round number */ 993 int sk, ret; 994 long allocated; 995 996 allocated = cg_read_long(cgroup, "memory.current"); 997 snprintf(servport, sizeof(servport), "%hd", port); 998 ret = getaddrinfo(server, servport, NULL, &ai); 999 if (ret) 1000 return ret; 1001 1002 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1003 if (sk < 0) 1004 goto free_ainfo; 1005 1006 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1007 if (ret < 0) 1008 goto close_sk; 1009 1010 ret = KSFT_FAIL; 1011 while (retries--) { 1012 uint8_t buf[0x100000]; 1013 long current, sock; 1014 1015 if (read(sk, buf, sizeof(buf)) <= 0) 1016 goto close_sk; 1017 1018 current = cg_read_long(cgroup, "memory.current"); 1019 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1020 1021 if (current < 0 || sock < 0) 1022 goto close_sk; 1023 1024 /* exclude the memory not related to socket connection */ 1025 if (values_close(current - allocated, sock, 10)) { 1026 ret = KSFT_PASS; 1027 break; 1028 } 1029 } 1030 1031 close_sk: 1032 close(sk); 1033 free_ainfo: 1034 freeaddrinfo(ai); 1035 return ret; 1036 } 1037 1038 /* 1039 * This test checks socket memory accounting. 1040 * The test forks a TCP server listens on a random port between 1000 1041 * and 61000. Once it gets a client connection, it starts writing to 1042 * its socket. 1043 * The TCP client interleaves reads from the socket with check whether 1044 * memory.current and memory.stat.sock are similar. 1045 */ 1046 static int test_memcg_sock(const char *root) 1047 { 1048 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1049 unsigned short port; 1050 char *memcg; 1051 1052 memcg = cg_name(root, "memcg_test"); 1053 if (!memcg) 1054 goto cleanup; 1055 1056 if (cg_create(memcg)) 1057 goto cleanup; 1058 1059 while (bind_retries--) { 1060 struct tcp_server_args args; 1061 1062 if (pipe(args.ctl)) 1063 goto cleanup; 1064 1065 port = args.port = 1000 + rand() % 60000; 1066 1067 pid = cg_run_nowait(memcg, tcp_server, &args); 1068 if (pid < 0) 1069 goto cleanup; 1070 1071 close(args.ctl[1]); 1072 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1073 goto cleanup; 1074 close(args.ctl[0]); 1075 1076 if (!err) 1077 break; 1078 if (err != EADDRINUSE) 1079 goto cleanup; 1080 1081 waitpid(pid, NULL, 0); 1082 } 1083 1084 if (err == EADDRINUSE) { 1085 ret = KSFT_SKIP; 1086 goto cleanup; 1087 } 1088 1089 if (tcp_client(memcg, port) != KSFT_PASS) 1090 goto cleanup; 1091 1092 waitpid(pid, &err, 0); 1093 if (WEXITSTATUS(err)) 1094 goto cleanup; 1095 1096 if (cg_read_long(memcg, "memory.current") < 0) 1097 goto cleanup; 1098 1099 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1100 goto cleanup; 1101 1102 ret = KSFT_PASS; 1103 1104 cleanup: 1105 cg_destroy(memcg); 1106 free(memcg); 1107 1108 return ret; 1109 } 1110 1111 /* 1112 * This test disables swapping and tries to allocate anonymous memory 1113 * up to OOM with memory.group.oom set. Then it checks that all 1114 * processes in the leaf were killed. It also checks that oom_events 1115 * were propagated to the parent level. 1116 */ 1117 static int test_memcg_oom_group_leaf_events(const char *root) 1118 { 1119 int ret = KSFT_FAIL; 1120 char *parent, *child; 1121 long parent_oom_events; 1122 1123 parent = cg_name(root, "memcg_test_0"); 1124 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1125 1126 if (!parent || !child) 1127 goto cleanup; 1128 1129 if (cg_create(parent)) 1130 goto cleanup; 1131 1132 if (cg_create(child)) 1133 goto cleanup; 1134 1135 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1136 goto cleanup; 1137 1138 if (cg_write(child, "memory.max", "50M")) 1139 goto cleanup; 1140 1141 if (cg_write(child, "memory.swap.max", "0")) 1142 goto cleanup; 1143 1144 if (cg_write(child, "memory.oom.group", "1")) 1145 goto cleanup; 1146 1147 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1148 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1149 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1150 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1151 goto cleanup; 1152 1153 if (cg_test_proc_killed(child)) 1154 goto cleanup; 1155 1156 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1157 goto cleanup; 1158 1159 parent_oom_events = cg_read_key_long( 1160 parent, "memory.events", "oom_kill "); 1161 /* 1162 * If memory_localevents is not enabled (the default), the parent should 1163 * count OOM events in its children groups. Otherwise, it should not 1164 * have observed any events. 1165 */ 1166 if (has_localevents && parent_oom_events != 0) 1167 goto cleanup; 1168 else if (!has_localevents && parent_oom_events <= 0) 1169 goto cleanup; 1170 1171 ret = KSFT_PASS; 1172 1173 cleanup: 1174 if (child) 1175 cg_destroy(child); 1176 if (parent) 1177 cg_destroy(parent); 1178 free(child); 1179 free(parent); 1180 1181 return ret; 1182 } 1183 1184 /* 1185 * This test disables swapping and tries to allocate anonymous memory 1186 * up to OOM with memory.group.oom set. Then it checks that all 1187 * processes in the parent and leaf were killed. 1188 */ 1189 static int test_memcg_oom_group_parent_events(const char *root) 1190 { 1191 int ret = KSFT_FAIL; 1192 char *parent, *child; 1193 1194 parent = cg_name(root, "memcg_test_0"); 1195 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1196 1197 if (!parent || !child) 1198 goto cleanup; 1199 1200 if (cg_create(parent)) 1201 goto cleanup; 1202 1203 if (cg_create(child)) 1204 goto cleanup; 1205 1206 if (cg_write(parent, "memory.max", "80M")) 1207 goto cleanup; 1208 1209 if (cg_write(parent, "memory.swap.max", "0")) 1210 goto cleanup; 1211 1212 if (cg_write(parent, "memory.oom.group", "1")) 1213 goto cleanup; 1214 1215 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1216 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1217 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1218 1219 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1220 goto cleanup; 1221 1222 if (cg_test_proc_killed(child)) 1223 goto cleanup; 1224 if (cg_test_proc_killed(parent)) 1225 goto cleanup; 1226 1227 ret = KSFT_PASS; 1228 1229 cleanup: 1230 if (child) 1231 cg_destroy(child); 1232 if (parent) 1233 cg_destroy(parent); 1234 free(child); 1235 free(parent); 1236 1237 return ret; 1238 } 1239 1240 /* 1241 * This test disables swapping and tries to allocate anonymous memory 1242 * up to OOM with memory.group.oom set. Then it checks that all 1243 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1244 */ 1245 static int test_memcg_oom_group_score_events(const char *root) 1246 { 1247 int ret = KSFT_FAIL; 1248 char *memcg; 1249 int safe_pid; 1250 1251 memcg = cg_name(root, "memcg_test_0"); 1252 1253 if (!memcg) 1254 goto cleanup; 1255 1256 if (cg_create(memcg)) 1257 goto cleanup; 1258 1259 if (cg_write(memcg, "memory.max", "50M")) 1260 goto cleanup; 1261 1262 if (cg_write(memcg, "memory.swap.max", "0")) 1263 goto cleanup; 1264 1265 if (cg_write(memcg, "memory.oom.group", "1")) 1266 goto cleanup; 1267 1268 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1269 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1270 goto cleanup; 1271 1272 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1273 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1274 goto cleanup; 1275 1276 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1277 goto cleanup; 1278 1279 if (kill(safe_pid, SIGKILL)) 1280 goto cleanup; 1281 1282 ret = KSFT_PASS; 1283 1284 cleanup: 1285 if (memcg) 1286 cg_destroy(memcg); 1287 free(memcg); 1288 1289 return ret; 1290 } 1291 1292 #define T(x) { x, #x } 1293 struct memcg_test { 1294 int (*fn)(const char *root); 1295 const char *name; 1296 } tests[] = { 1297 T(test_memcg_subtree_control), 1298 T(test_memcg_current), 1299 T(test_memcg_min), 1300 T(test_memcg_low), 1301 T(test_memcg_high), 1302 T(test_memcg_high_sync), 1303 T(test_memcg_max), 1304 T(test_memcg_reclaim), 1305 T(test_memcg_oom_events), 1306 T(test_memcg_swap_max), 1307 T(test_memcg_sock), 1308 T(test_memcg_oom_group_leaf_events), 1309 T(test_memcg_oom_group_parent_events), 1310 T(test_memcg_oom_group_score_events), 1311 }; 1312 #undef T 1313 1314 int main(int argc, char **argv) 1315 { 1316 char root[PATH_MAX]; 1317 int i, proc_status, ret = EXIT_SUCCESS; 1318 1319 if (cg_find_unified_root(root, sizeof(root), NULL)) 1320 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1321 1322 /* 1323 * Check that memory controller is available: 1324 * memory is listed in cgroup.controllers 1325 */ 1326 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1327 ksft_exit_skip("memory controller isn't available\n"); 1328 1329 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1330 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1331 ksft_exit_skip("Failed to set memory controller\n"); 1332 1333 proc_status = proc_mount_contains("memory_recursiveprot"); 1334 if (proc_status < 0) 1335 ksft_exit_skip("Failed to query cgroup mount option\n"); 1336 has_recursiveprot = proc_status; 1337 1338 proc_status = proc_mount_contains("memory_localevents"); 1339 if (proc_status < 0) 1340 ksft_exit_skip("Failed to query cgroup mount option\n"); 1341 has_localevents = proc_status; 1342 1343 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1344 switch (tests[i].fn(root)) { 1345 case KSFT_PASS: 1346 ksft_test_result_pass("%s\n", tests[i].name); 1347 break; 1348 case KSFT_SKIP: 1349 ksft_test_result_skip("%s\n", tests[i].name); 1350 break; 1351 default: 1352 ret = EXIT_FAILURE; 1353 ksft_test_result_fail("%s\n", tests[i].name); 1354 break; 1355 } 1356 } 1357 1358 return ret; 1359 } 1360