1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 int get_temp_fd(void) 28 { 29 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 30 } 31 32 int alloc_pagecache(int fd, size_t size) 33 { 34 char buf[PAGE_SIZE]; 35 struct stat st; 36 int i; 37 38 if (fstat(fd, &st)) 39 goto cleanup; 40 41 size += st.st_size; 42 43 if (ftruncate(fd, size)) 44 goto cleanup; 45 46 for (i = 0; i < size; i += sizeof(buf)) 47 read(fd, buf, sizeof(buf)); 48 49 return 0; 50 51 cleanup: 52 return -1; 53 } 54 55 int alloc_anon(const char *cgroup, void *arg) 56 { 57 size_t size = (unsigned long)arg; 58 char *buf, *ptr; 59 60 buf = malloc(size); 61 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 62 *ptr = 0; 63 64 free(buf); 65 return 0; 66 } 67 68 int is_swap_enabled(void) 69 { 70 char buf[PAGE_SIZE]; 71 const char delim[] = "\n"; 72 int cnt = 0; 73 char *line; 74 75 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 76 return -1; 77 78 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 79 cnt++; 80 81 return cnt > 1; 82 } 83 84 int set_oom_adj_score(int pid, int score) 85 { 86 char path[PATH_MAX]; 87 int fd, len; 88 89 sprintf(path, "/proc/%d/oom_score_adj", pid); 90 91 fd = open(path, O_WRONLY | O_APPEND); 92 if (fd < 0) 93 return fd; 94 95 len = dprintf(fd, "%d", score); 96 if (len < 0) { 97 close(fd); 98 return len; 99 } 100 101 close(fd); 102 return 0; 103 } 104 105 /* 106 * This test creates two nested cgroups with and without enabling 107 * the memory controller. 108 */ 109 static int test_memcg_subtree_control(const char *root) 110 { 111 char *parent, *child, *parent2 = NULL, *child2 = NULL; 112 int ret = KSFT_FAIL; 113 char buf[PAGE_SIZE]; 114 115 /* Create two nested cgroups with the memory controller enabled */ 116 parent = cg_name(root, "memcg_test_0"); 117 child = cg_name(root, "memcg_test_0/memcg_test_1"); 118 if (!parent || !child) 119 goto cleanup_free; 120 121 if (cg_create(parent)) 122 goto cleanup_free; 123 124 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 125 goto cleanup_parent; 126 127 if (cg_create(child)) 128 goto cleanup_parent; 129 130 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 131 goto cleanup_child; 132 133 /* Create two nested cgroups without enabling memory controller */ 134 parent2 = cg_name(root, "memcg_test_1"); 135 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 136 if (!parent2 || !child2) 137 goto cleanup_free2; 138 139 if (cg_create(parent2)) 140 goto cleanup_free2; 141 142 if (cg_create(child2)) 143 goto cleanup_parent2; 144 145 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 146 goto cleanup_all; 147 148 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 149 goto cleanup_all; 150 151 ret = KSFT_PASS; 152 153 cleanup_all: 154 cg_destroy(child2); 155 cleanup_parent2: 156 cg_destroy(parent2); 157 cleanup_free2: 158 free(parent2); 159 free(child2); 160 cleanup_child: 161 cg_destroy(child); 162 cleanup_parent: 163 cg_destroy(parent); 164 cleanup_free: 165 free(parent); 166 free(child); 167 168 return ret; 169 } 170 171 static int alloc_anon_50M_check(const char *cgroup, void *arg) 172 { 173 size_t size = MB(50); 174 char *buf, *ptr; 175 long anon, current; 176 int ret = -1; 177 178 buf = malloc(size); 179 if (buf == NULL) { 180 fprintf(stderr, "malloc() failed\n"); 181 return -1; 182 } 183 184 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 185 *ptr = 0; 186 187 current = cg_read_long(cgroup, "memory.current"); 188 if (current < size) 189 goto cleanup; 190 191 if (!values_close(size, current, 3)) 192 goto cleanup; 193 194 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 195 if (anon < 0) 196 goto cleanup; 197 198 if (!values_close(anon, current, 3)) 199 goto cleanup; 200 201 ret = 0; 202 cleanup: 203 free(buf); 204 return ret; 205 } 206 207 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 208 { 209 size_t size = MB(50); 210 int ret = -1; 211 long current, file; 212 int fd; 213 214 fd = get_temp_fd(); 215 if (fd < 0) 216 return -1; 217 218 if (alloc_pagecache(fd, size)) 219 goto cleanup; 220 221 current = cg_read_long(cgroup, "memory.current"); 222 if (current < size) 223 goto cleanup; 224 225 file = cg_read_key_long(cgroup, "memory.stat", "file "); 226 if (file < 0) 227 goto cleanup; 228 229 if (!values_close(file, current, 10)) 230 goto cleanup; 231 232 ret = 0; 233 234 cleanup: 235 close(fd); 236 return ret; 237 } 238 239 /* 240 * This test create a memory cgroup, allocates 241 * some anonymous memory and some pagecache 242 * and checks memory.current, memory.peak, and some memory.stat values. 243 */ 244 static int test_memcg_current_peak(const char *root) 245 { 246 int ret = KSFT_FAIL; 247 long current, peak, peak_reset; 248 char *memcg; 249 bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 250 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 251 struct stat ss; 252 253 memcg = cg_name(root, "memcg_test"); 254 if (!memcg) 255 goto cleanup; 256 257 if (cg_create(memcg)) 258 goto cleanup; 259 260 current = cg_read_long(memcg, "memory.current"); 261 if (current != 0) 262 goto cleanup; 263 264 peak = cg_read_long(memcg, "memory.peak"); 265 if (peak != 0) 266 goto cleanup; 267 268 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 269 goto cleanup; 270 271 peak = cg_read_long(memcg, "memory.peak"); 272 if (peak < MB(50)) 273 goto cleanup; 274 275 /* 276 * We'll open a few FDs for the same memory.peak file to exercise the free-path 277 * We need at least three to be closed in a different order than writes occurred to test 278 * the linked-list handling. 279 */ 280 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 281 282 if (peak_fd == -1) { 283 if (errno == ENOENT) 284 ret = KSFT_SKIP; 285 goto cleanup; 286 } 287 288 /* 289 * Before we try to use memory.peak's fd, try to figure out whether 290 * this kernel supports writing to that file in the first place. (by 291 * checking the writable bit on the file's st_mode) 292 */ 293 if (fstat(peak_fd, &ss)) 294 goto cleanup; 295 296 if ((ss.st_mode & S_IWUSR) == 0) { 297 ret = KSFT_SKIP; 298 goto cleanup; 299 } 300 301 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 302 303 if (peak_fd2 == -1) 304 goto cleanup; 305 306 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 307 308 if (peak_fd3 == -1) 309 goto cleanup; 310 311 /* any non-empty string resets, but make it clear */ 312 static const char reset_string[] = "reset\n"; 313 314 peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 315 if (peak_reset != sizeof(reset_string)) 316 goto cleanup; 317 318 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 319 if (peak_reset != sizeof(reset_string)) 320 goto cleanup; 321 322 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 323 if (peak_reset != sizeof(reset_string)) 324 goto cleanup; 325 326 /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 327 peak = cg_read_long(memcg, "memory.peak"); 328 if (peak < MB(50)) 329 goto cleanup; 330 331 fd2_closed = true; 332 if (close(peak_fd2)) 333 goto cleanup; 334 335 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 336 337 if (peak_fd4 == -1) 338 goto cleanup; 339 340 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 341 if (peak_reset != sizeof(reset_string)) 342 goto cleanup; 343 344 peak = cg_read_long_fd(peak_fd); 345 if (peak > MB(30) || peak < 0) 346 goto cleanup; 347 348 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 349 goto cleanup; 350 351 peak = cg_read_long(memcg, "memory.peak"); 352 if (peak < MB(50)) 353 goto cleanup; 354 355 /* Make sure everything is back to normal */ 356 peak = cg_read_long_fd(peak_fd); 357 if (peak < MB(50)) 358 goto cleanup; 359 360 peak = cg_read_long_fd(peak_fd4); 361 if (peak < MB(50)) 362 goto cleanup; 363 364 fd3_closed = true; 365 if (close(peak_fd3)) 366 goto cleanup; 367 368 fd4_closed = true; 369 if (close(peak_fd4)) 370 goto cleanup; 371 372 ret = KSFT_PASS; 373 374 cleanup: 375 close(peak_fd); 376 if (!fd2_closed) 377 close(peak_fd2); 378 if (!fd3_closed) 379 close(peak_fd3); 380 if (!fd4_closed) 381 close(peak_fd4); 382 cg_destroy(memcg); 383 free(memcg); 384 385 return ret; 386 } 387 388 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 389 { 390 int fd = (long)arg; 391 int ppid = getppid(); 392 393 if (alloc_pagecache(fd, MB(50))) 394 return -1; 395 396 while (getppid() == ppid) 397 sleep(1); 398 399 return 0; 400 } 401 402 static int alloc_anon_noexit(const char *cgroup, void *arg) 403 { 404 int ppid = getppid(); 405 size_t size = (unsigned long)arg; 406 char *buf, *ptr; 407 408 buf = malloc(size); 409 if (buf == NULL) { 410 fprintf(stderr, "malloc() failed\n"); 411 return -1; 412 } 413 414 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 415 *ptr = 0; 416 417 while (getppid() == ppid) 418 sleep(1); 419 420 free(buf); 421 return 0; 422 } 423 424 /* 425 * Wait until processes are killed asynchronously by the OOM killer 426 * If we exceed a timeout, fail. 427 */ 428 static int cg_test_proc_killed(const char *cgroup) 429 { 430 int limit; 431 432 for (limit = 10; limit > 0; limit--) { 433 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 434 return 0; 435 436 usleep(100000); 437 } 438 return -1; 439 } 440 441 static bool reclaim_until(const char *memcg, long goal); 442 443 /* 444 * First, this test creates the following hierarchy: 445 * A memory.min = 0, memory.max = 200M 446 * A/B memory.min = 50M 447 * A/B/C memory.min = 75M, memory.current = 50M 448 * A/B/D memory.min = 25M, memory.current = 50M 449 * A/B/E memory.min = 0, memory.current = 50M 450 * A/B/F memory.min = 500M, memory.current = 0 451 * 452 * (or memory.low if we test soft protection) 453 * 454 * Usages are pagecache and the test keeps a running 455 * process in every leaf cgroup. 456 * Then it creates A/G and creates a significant 457 * memory pressure in A. 458 * 459 * Then it checks actual memory usages and expects that: 460 * A/B memory.current ~= 50M 461 * A/B/C memory.current ~= 29M [memory.events:low > 0] 462 * A/B/D memory.current ~= 21M [memory.events:low > 0] 463 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, 464 * undefined otherwise] 465 * A/B/F memory.current = 0 [memory.events:low == 0] 466 * (for origin of the numbers, see model in memcg_protection.m.) 467 * 468 * After that it tries to allocate more than there is 469 * unprotected memory in A available, and checks that: 470 * a) memory.min protects pagecache even in this case, 471 * b) memory.low allows reclaiming page cache with low events. 472 * 473 * Then we try to reclaim from A/B/C using memory.reclaim until its 474 * usage reaches 10M. 475 * This makes sure that: 476 * (a) We ignore the protection of the reclaim target memcg. 477 * (b) The previously calculated emin value (~29M) should be dismissed. 478 */ 479 static int test_memcg_protection(const char *root, bool min) 480 { 481 int ret = KSFT_FAIL, rc; 482 char *parent[3] = {NULL}; 483 char *children[4] = {NULL}; 484 const char *attribute = min ? "memory.min" : "memory.low"; 485 long c[4]; 486 long current; 487 int i, attempts; 488 int fd; 489 490 fd = get_temp_fd(); 491 if (fd < 0) 492 goto cleanup; 493 494 parent[0] = cg_name(root, "memcg_test_0"); 495 if (!parent[0]) 496 goto cleanup; 497 498 parent[1] = cg_name(parent[0], "memcg_test_1"); 499 if (!parent[1]) 500 goto cleanup; 501 502 parent[2] = cg_name(parent[0], "memcg_test_2"); 503 if (!parent[2]) 504 goto cleanup; 505 506 if (cg_create(parent[0])) 507 goto cleanup; 508 509 if (cg_read_long(parent[0], attribute)) { 510 /* No memory.min on older kernels is fine */ 511 if (min) 512 ret = KSFT_SKIP; 513 goto cleanup; 514 } 515 516 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 517 goto cleanup; 518 519 if (cg_write(parent[0], "memory.max", "200M")) 520 goto cleanup; 521 522 if (cg_write(parent[0], "memory.swap.max", "0")) 523 goto cleanup; 524 525 if (cg_create(parent[1])) 526 goto cleanup; 527 528 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 529 goto cleanup; 530 531 if (cg_create(parent[2])) 532 goto cleanup; 533 534 for (i = 0; i < ARRAY_SIZE(children); i++) { 535 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 536 if (!children[i]) 537 goto cleanup; 538 539 if (cg_create(children[i])) 540 goto cleanup; 541 542 if (i > 2) 543 continue; 544 545 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 546 (void *)(long)fd); 547 } 548 549 if (cg_write(parent[1], attribute, "50M")) 550 goto cleanup; 551 if (cg_write(children[0], attribute, "75M")) 552 goto cleanup; 553 if (cg_write(children[1], attribute, "25M")) 554 goto cleanup; 555 if (cg_write(children[2], attribute, "0")) 556 goto cleanup; 557 if (cg_write(children[3], attribute, "500M")) 558 goto cleanup; 559 560 attempts = 0; 561 while (!values_close(cg_read_long(parent[1], "memory.current"), 562 MB(150), 3)) { 563 if (attempts++ > 5) 564 break; 565 sleep(1); 566 } 567 568 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 569 goto cleanup; 570 571 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 572 goto cleanup; 573 574 for (i = 0; i < ARRAY_SIZE(children); i++) 575 c[i] = cg_read_long(children[i], "memory.current"); 576 577 if (!values_close(c[0], MB(29), 15)) 578 goto cleanup; 579 580 if (!values_close(c[1], MB(21), 20)) 581 goto cleanup; 582 583 if (c[3] != 0) 584 goto cleanup; 585 586 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 587 if (min && !rc) 588 goto cleanup; 589 else if (!min && rc) { 590 fprintf(stderr, 591 "memory.low prevents from allocating anon memory\n"); 592 goto cleanup; 593 } 594 595 current = min ? MB(50) : MB(30); 596 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 597 goto cleanup; 598 599 if (!reclaim_until(children[0], MB(10))) 600 goto cleanup; 601 602 if (min) { 603 ret = KSFT_PASS; 604 goto cleanup; 605 } 606 607 /* 608 * Child 2 has memory.low=0, but some low protection may still be 609 * distributed down from its parent with memory.low=50M if cgroup2 610 * memory_recursiveprot mount option is enabled. Ignore the low 611 * event count in this case. 612 */ 613 for (i = 0; i < ARRAY_SIZE(children); i++) { 614 int ignore_low_events_index = has_recursiveprot ? 2 : -1; 615 int no_low_events_index = 1; 616 long low, oom; 617 618 oom = cg_read_key_long(children[i], "memory.events", "oom "); 619 low = cg_read_key_long(children[i], "memory.events", "low "); 620 621 if (oom) 622 goto cleanup; 623 if (i == ignore_low_events_index) 624 continue; 625 if (i <= no_low_events_index && low <= 0) 626 goto cleanup; 627 if (i > no_low_events_index && low) 628 goto cleanup; 629 630 } 631 632 ret = KSFT_PASS; 633 634 cleanup: 635 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 636 if (!children[i]) 637 continue; 638 639 cg_destroy(children[i]); 640 free(children[i]); 641 } 642 643 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 644 if (!parent[i]) 645 continue; 646 647 cg_destroy(parent[i]); 648 free(parent[i]); 649 } 650 close(fd); 651 return ret; 652 } 653 654 static int test_memcg_min(const char *root) 655 { 656 return test_memcg_protection(root, true); 657 } 658 659 static int test_memcg_low(const char *root) 660 { 661 return test_memcg_protection(root, false); 662 } 663 664 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 665 { 666 size_t size = MB(50); 667 int ret = -1; 668 long current, high, max; 669 int fd; 670 671 high = cg_read_long(cgroup, "memory.high"); 672 max = cg_read_long(cgroup, "memory.max"); 673 if (high != MB(30) && max != MB(30)) 674 return -1; 675 676 fd = get_temp_fd(); 677 if (fd < 0) 678 return -1; 679 680 if (alloc_pagecache(fd, size)) 681 goto cleanup; 682 683 current = cg_read_long(cgroup, "memory.current"); 684 if (!values_close(current, MB(30), 5)) 685 goto cleanup; 686 687 ret = 0; 688 689 cleanup: 690 close(fd); 691 return ret; 692 693 } 694 695 /* 696 * This test checks that memory.high limits the amount of 697 * memory which can be consumed by either anonymous memory 698 * or pagecache. 699 */ 700 static int test_memcg_high(const char *root) 701 { 702 int ret = KSFT_FAIL; 703 char *memcg; 704 long high; 705 706 memcg = cg_name(root, "memcg_test"); 707 if (!memcg) 708 goto cleanup; 709 710 if (cg_create(memcg)) 711 goto cleanup; 712 713 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 714 goto cleanup; 715 716 if (cg_write(memcg, "memory.swap.max", "0")) 717 goto cleanup; 718 719 if (cg_write(memcg, "memory.high", "30M")) 720 goto cleanup; 721 722 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 723 goto cleanup; 724 725 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 726 goto cleanup; 727 728 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 729 goto cleanup; 730 731 high = cg_read_key_long(memcg, "memory.events", "high "); 732 if (high <= 0) 733 goto cleanup; 734 735 ret = KSFT_PASS; 736 737 cleanup: 738 cg_destroy(memcg); 739 free(memcg); 740 741 return ret; 742 } 743 744 static int alloc_anon_mlock(const char *cgroup, void *arg) 745 { 746 size_t size = (size_t)arg; 747 void *buf; 748 749 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 750 0, 0); 751 if (buf == MAP_FAILED) 752 return -1; 753 754 mlock(buf, size); 755 munmap(buf, size); 756 return 0; 757 } 758 759 /* 760 * This test checks that memory.high is able to throttle big single shot 761 * allocation i.e. large allocation within one kernel entry. 762 */ 763 static int test_memcg_high_sync(const char *root) 764 { 765 int ret = KSFT_FAIL, pid, fd = -1; 766 char *memcg; 767 long pre_high, pre_max; 768 long post_high, post_max; 769 770 memcg = cg_name(root, "memcg_test"); 771 if (!memcg) 772 goto cleanup; 773 774 if (cg_create(memcg)) 775 goto cleanup; 776 777 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 778 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 779 if (pre_high < 0 || pre_max < 0) 780 goto cleanup; 781 782 if (cg_write(memcg, "memory.swap.max", "0")) 783 goto cleanup; 784 785 if (cg_write(memcg, "memory.high", "30M")) 786 goto cleanup; 787 788 if (cg_write(memcg, "memory.max", "140M")) 789 goto cleanup; 790 791 fd = memcg_prepare_for_wait(memcg); 792 if (fd < 0) 793 goto cleanup; 794 795 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 796 if (pid < 0) 797 goto cleanup; 798 799 cg_wait_for(fd); 800 801 post_high = cg_read_key_long(memcg, "memory.events", "high "); 802 post_max = cg_read_key_long(memcg, "memory.events", "max "); 803 if (post_high < 0 || post_max < 0) 804 goto cleanup; 805 806 if (pre_high == post_high || pre_max != post_max) 807 goto cleanup; 808 809 ret = KSFT_PASS; 810 811 cleanup: 812 if (fd >= 0) 813 close(fd); 814 cg_destroy(memcg); 815 free(memcg); 816 817 return ret; 818 } 819 820 /* 821 * This test checks that memory.max limits the amount of 822 * memory which can be consumed by either anonymous memory 823 * or pagecache. 824 */ 825 static int test_memcg_max(const char *root) 826 { 827 int ret = KSFT_FAIL; 828 char *memcg; 829 long current, max; 830 831 memcg = cg_name(root, "memcg_test"); 832 if (!memcg) 833 goto cleanup; 834 835 if (cg_create(memcg)) 836 goto cleanup; 837 838 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 839 goto cleanup; 840 841 if (cg_write(memcg, "memory.swap.max", "0")) 842 goto cleanup; 843 844 if (cg_write(memcg, "memory.max", "30M")) 845 goto cleanup; 846 847 /* Should be killed by OOM killer */ 848 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 849 goto cleanup; 850 851 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 852 goto cleanup; 853 854 current = cg_read_long(memcg, "memory.current"); 855 if (current > MB(30) || !current) 856 goto cleanup; 857 858 max = cg_read_key_long(memcg, "memory.events", "max "); 859 if (max <= 0) 860 goto cleanup; 861 862 ret = KSFT_PASS; 863 864 cleanup: 865 cg_destroy(memcg); 866 free(memcg); 867 868 return ret; 869 } 870 871 /* 872 * Reclaim from @memcg until usage reaches @goal by writing to 873 * memory.reclaim. 874 * 875 * This function will return false if the usage is already below the 876 * goal. 877 * 878 * This function assumes that writing to memory.reclaim is the only 879 * source of change in memory.current (no concurrent allocations or 880 * reclaim). 881 * 882 * This function makes sure memory.reclaim is sane. It will return 883 * false if memory.reclaim's error codes do not make sense, even if 884 * the usage goal was satisfied. 885 */ 886 static bool reclaim_until(const char *memcg, long goal) 887 { 888 char buf[64]; 889 int retries, err; 890 long current, to_reclaim; 891 bool reclaimed = false; 892 893 for (retries = 5; retries > 0; retries--) { 894 current = cg_read_long(memcg, "memory.current"); 895 896 if (current < goal || values_close(current, goal, 3)) 897 break; 898 /* Did memory.reclaim return 0 incorrectly? */ 899 else if (reclaimed) 900 return false; 901 902 to_reclaim = current - goal; 903 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 904 err = cg_write(memcg, "memory.reclaim", buf); 905 if (!err) 906 reclaimed = true; 907 else if (err != -EAGAIN) 908 return false; 909 } 910 return reclaimed; 911 } 912 913 /* 914 * This test checks that memory.reclaim reclaims the given 915 * amount of memory (from both anon and file, if possible). 916 */ 917 static int test_memcg_reclaim(const char *root) 918 { 919 int ret = KSFT_FAIL; 920 int fd = -1; 921 int retries; 922 char *memcg; 923 long current, expected_usage; 924 925 memcg = cg_name(root, "memcg_test"); 926 if (!memcg) 927 goto cleanup; 928 929 if (cg_create(memcg)) 930 goto cleanup; 931 932 current = cg_read_long(memcg, "memory.current"); 933 if (current != 0) 934 goto cleanup; 935 936 fd = get_temp_fd(); 937 if (fd < 0) 938 goto cleanup; 939 940 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 941 942 /* 943 * If swap is enabled, try to reclaim from both anon and file, else try 944 * to reclaim from file only. 945 */ 946 if (is_swap_enabled()) { 947 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 948 expected_usage = MB(100); 949 } else 950 expected_usage = MB(50); 951 952 /* 953 * Wait until current usage reaches the expected usage (or we run out of 954 * retries). 955 */ 956 retries = 5; 957 while (!values_close(cg_read_long(memcg, "memory.current"), 958 expected_usage, 10)) { 959 if (retries--) { 960 sleep(1); 961 continue; 962 } else { 963 fprintf(stderr, 964 "failed to allocate %ld for memcg reclaim test\n", 965 expected_usage); 966 goto cleanup; 967 } 968 } 969 970 /* 971 * Reclaim until current reaches 30M, this makes sure we hit both anon 972 * and file if swap is enabled. 973 */ 974 if (!reclaim_until(memcg, MB(30))) 975 goto cleanup; 976 977 ret = KSFT_PASS; 978 cleanup: 979 cg_destroy(memcg); 980 free(memcg); 981 close(fd); 982 983 return ret; 984 } 985 986 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 987 { 988 long mem_max = (long)arg; 989 size_t size = MB(50); 990 char *buf, *ptr; 991 long mem_current, swap_current; 992 int ret = -1; 993 994 buf = malloc(size); 995 if (buf == NULL) { 996 fprintf(stderr, "malloc() failed\n"); 997 return -1; 998 } 999 1000 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 1001 *ptr = 0; 1002 1003 mem_current = cg_read_long(cgroup, "memory.current"); 1004 if (!mem_current || !values_close(mem_current, mem_max, 3)) 1005 goto cleanup; 1006 1007 swap_current = cg_read_long(cgroup, "memory.swap.current"); 1008 if (!swap_current || 1009 !values_close(mem_current + swap_current, size, 3)) 1010 goto cleanup; 1011 1012 ret = 0; 1013 cleanup: 1014 free(buf); 1015 return ret; 1016 } 1017 1018 /* 1019 * This test checks that memory.swap.max limits the amount of 1020 * anonymous memory which can be swapped out. Additionally, it verifies that 1021 * memory.swap.peak reflects the high watermark and can be reset. 1022 */ 1023 static int test_memcg_swap_max_peak(const char *root) 1024 { 1025 int ret = KSFT_FAIL; 1026 char *memcg; 1027 long max, peak; 1028 struct stat ss; 1029 int swap_peak_fd = -1, mem_peak_fd = -1; 1030 1031 /* any non-empty string resets */ 1032 static const char reset_string[] = "foobarbaz"; 1033 1034 if (!is_swap_enabled()) 1035 return KSFT_SKIP; 1036 1037 memcg = cg_name(root, "memcg_test"); 1038 if (!memcg) 1039 goto cleanup; 1040 1041 if (cg_create(memcg)) 1042 goto cleanup; 1043 1044 if (cg_read_long(memcg, "memory.swap.current")) { 1045 ret = KSFT_SKIP; 1046 goto cleanup; 1047 } 1048 1049 swap_peak_fd = cg_open(memcg, "memory.swap.peak", 1050 O_RDWR | O_APPEND | O_CLOEXEC); 1051 1052 if (swap_peak_fd == -1) { 1053 if (errno == ENOENT) 1054 ret = KSFT_SKIP; 1055 goto cleanup; 1056 } 1057 1058 /* 1059 * Before we try to use memory.swap.peak's fd, try to figure out 1060 * whether this kernel supports writing to that file in the first 1061 * place. (by checking the writable bit on the file's st_mode) 1062 */ 1063 if (fstat(swap_peak_fd, &ss)) 1064 goto cleanup; 1065 1066 if ((ss.st_mode & S_IWUSR) == 0) { 1067 ret = KSFT_SKIP; 1068 goto cleanup; 1069 } 1070 1071 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 1072 1073 if (mem_peak_fd == -1) 1074 goto cleanup; 1075 1076 if (cg_read_long(memcg, "memory.swap.peak")) 1077 goto cleanup; 1078 1079 if (cg_read_long_fd(swap_peak_fd)) 1080 goto cleanup; 1081 1082 /* switch the swap and mem fds into local-peak tracking mode*/ 1083 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1084 1085 if (peak_reset != sizeof(reset_string)) 1086 goto cleanup; 1087 1088 if (cg_read_long_fd(swap_peak_fd)) 1089 goto cleanup; 1090 1091 if (cg_read_long(memcg, "memory.peak")) 1092 goto cleanup; 1093 1094 if (cg_read_long_fd(mem_peak_fd)) 1095 goto cleanup; 1096 1097 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1098 if (peak_reset != sizeof(reset_string)) 1099 goto cleanup; 1100 1101 if (cg_read_long_fd(mem_peak_fd)) 1102 goto cleanup; 1103 1104 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 1105 goto cleanup; 1106 1107 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 1108 goto cleanup; 1109 1110 if (cg_write(memcg, "memory.swap.max", "30M")) 1111 goto cleanup; 1112 1113 if (cg_write(memcg, "memory.max", "30M")) 1114 goto cleanup; 1115 1116 /* Should be killed by OOM killer */ 1117 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1118 goto cleanup; 1119 1120 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1121 goto cleanup; 1122 1123 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1124 goto cleanup; 1125 1126 peak = cg_read_long(memcg, "memory.peak"); 1127 if (peak < MB(29)) 1128 goto cleanup; 1129 1130 peak = cg_read_long(memcg, "memory.swap.peak"); 1131 if (peak < MB(29)) 1132 goto cleanup; 1133 1134 peak = cg_read_long_fd(mem_peak_fd); 1135 if (peak < MB(29)) 1136 goto cleanup; 1137 1138 peak = cg_read_long_fd(swap_peak_fd); 1139 if (peak < MB(29)) 1140 goto cleanup; 1141 1142 /* 1143 * open, reset and close the peak swap on another FD to make sure 1144 * multiple extant fds don't corrupt the linked-list 1145 */ 1146 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 1147 if (peak_reset) 1148 goto cleanup; 1149 1150 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 1151 if (peak_reset) 1152 goto cleanup; 1153 1154 /* actually reset on the fds */ 1155 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1156 if (peak_reset != sizeof(reset_string)) 1157 goto cleanup; 1158 1159 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1160 if (peak_reset != sizeof(reset_string)) 1161 goto cleanup; 1162 1163 peak = cg_read_long_fd(swap_peak_fd); 1164 if (peak > MB(10)) 1165 goto cleanup; 1166 1167 /* 1168 * The cgroup is now empty, but there may be a page or two associated 1169 * with the open FD accounted to it. 1170 */ 1171 peak = cg_read_long_fd(mem_peak_fd); 1172 if (peak > MB(1)) 1173 goto cleanup; 1174 1175 if (cg_read_long(memcg, "memory.peak") < MB(29)) 1176 goto cleanup; 1177 1178 if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 1179 goto cleanup; 1180 1181 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1182 goto cleanup; 1183 1184 max = cg_read_key_long(memcg, "memory.events", "max "); 1185 if (max <= 0) 1186 goto cleanup; 1187 1188 peak = cg_read_long(memcg, "memory.peak"); 1189 if (peak < MB(29)) 1190 goto cleanup; 1191 1192 peak = cg_read_long(memcg, "memory.swap.peak"); 1193 if (peak < MB(29)) 1194 goto cleanup; 1195 1196 peak = cg_read_long_fd(mem_peak_fd); 1197 if (peak < MB(29)) 1198 goto cleanup; 1199 1200 peak = cg_read_long_fd(swap_peak_fd); 1201 if (peak < MB(19)) 1202 goto cleanup; 1203 1204 ret = KSFT_PASS; 1205 1206 cleanup: 1207 if (mem_peak_fd != -1 && close(mem_peak_fd)) 1208 ret = KSFT_FAIL; 1209 if (swap_peak_fd != -1 && close(swap_peak_fd)) 1210 ret = KSFT_FAIL; 1211 cg_destroy(memcg); 1212 free(memcg); 1213 1214 return ret; 1215 } 1216 1217 /* 1218 * This test disables swapping and tries to allocate anonymous memory 1219 * up to OOM. Then it checks for oom and oom_kill events in 1220 * memory.events. 1221 */ 1222 static int test_memcg_oom_events(const char *root) 1223 { 1224 int ret = KSFT_FAIL; 1225 char *memcg; 1226 1227 memcg = cg_name(root, "memcg_test"); 1228 if (!memcg) 1229 goto cleanup; 1230 1231 if (cg_create(memcg)) 1232 goto cleanup; 1233 1234 if (cg_write(memcg, "memory.max", "30M")) 1235 goto cleanup; 1236 1237 if (cg_write(memcg, "memory.swap.max", "0")) 1238 goto cleanup; 1239 1240 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1241 goto cleanup; 1242 1243 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 1244 goto cleanup; 1245 1246 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1247 goto cleanup; 1248 1249 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1250 goto cleanup; 1251 1252 ret = KSFT_PASS; 1253 1254 cleanup: 1255 cg_destroy(memcg); 1256 free(memcg); 1257 1258 return ret; 1259 } 1260 1261 struct tcp_server_args { 1262 unsigned short port; 1263 int ctl[2]; 1264 }; 1265 1266 static int tcp_server(const char *cgroup, void *arg) 1267 { 1268 struct tcp_server_args *srv_args = arg; 1269 struct sockaddr_in6 saddr = { 0 }; 1270 socklen_t slen = sizeof(saddr); 1271 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1272 1273 close(srv_args->ctl[0]); 1274 ctl_fd = srv_args->ctl[1]; 1275 1276 saddr.sin6_family = AF_INET6; 1277 saddr.sin6_addr = in6addr_any; 1278 saddr.sin6_port = htons(srv_args->port); 1279 1280 sk = socket(AF_INET6, SOCK_STREAM, 0); 1281 if (sk < 0) 1282 return ret; 1283 1284 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1285 goto cleanup; 1286 1287 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1288 write(ctl_fd, &errno, sizeof(errno)); 1289 goto cleanup; 1290 } 1291 1292 if (listen(sk, 1)) 1293 goto cleanup; 1294 1295 ret = 0; 1296 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1297 ret = -1; 1298 goto cleanup; 1299 } 1300 1301 client_sk = accept(sk, NULL, NULL); 1302 if (client_sk < 0) 1303 goto cleanup; 1304 1305 ret = -1; 1306 for (;;) { 1307 uint8_t buf[0x100000]; 1308 1309 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1310 if (errno == ECONNRESET) 1311 ret = 0; 1312 break; 1313 } 1314 } 1315 1316 close(client_sk); 1317 1318 cleanup: 1319 close(sk); 1320 return ret; 1321 } 1322 1323 static int tcp_client(const char *cgroup, unsigned short port) 1324 { 1325 const char server[] = "localhost"; 1326 struct addrinfo *ai; 1327 char servport[6]; 1328 int retries = 0x10; /* nice round number */ 1329 int sk, ret; 1330 long allocated; 1331 1332 allocated = cg_read_long(cgroup, "memory.current"); 1333 snprintf(servport, sizeof(servport), "%hd", port); 1334 ret = getaddrinfo(server, servport, NULL, &ai); 1335 if (ret) 1336 return ret; 1337 1338 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1339 if (sk < 0) 1340 goto free_ainfo; 1341 1342 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1343 if (ret < 0) 1344 goto close_sk; 1345 1346 ret = KSFT_FAIL; 1347 while (retries--) { 1348 uint8_t buf[0x100000]; 1349 long current, sock; 1350 1351 if (read(sk, buf, sizeof(buf)) <= 0) 1352 goto close_sk; 1353 1354 current = cg_read_long(cgroup, "memory.current"); 1355 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1356 1357 if (current < 0 || sock < 0) 1358 goto close_sk; 1359 1360 /* exclude the memory not related to socket connection */ 1361 if (values_close(current - allocated, sock, 10)) { 1362 ret = KSFT_PASS; 1363 break; 1364 } 1365 } 1366 1367 close_sk: 1368 close(sk); 1369 free_ainfo: 1370 freeaddrinfo(ai); 1371 return ret; 1372 } 1373 1374 /* 1375 * This test checks socket memory accounting. 1376 * The test forks a TCP server listens on a random port between 1000 1377 * and 61000. Once it gets a client connection, it starts writing to 1378 * its socket. 1379 * The TCP client interleaves reads from the socket with check whether 1380 * memory.current and memory.stat.sock are similar. 1381 */ 1382 static int test_memcg_sock(const char *root) 1383 { 1384 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1385 unsigned short port; 1386 char *memcg; 1387 1388 memcg = cg_name(root, "memcg_test"); 1389 if (!memcg) 1390 goto cleanup; 1391 1392 if (cg_create(memcg)) 1393 goto cleanup; 1394 1395 while (bind_retries--) { 1396 struct tcp_server_args args; 1397 1398 if (pipe(args.ctl)) 1399 goto cleanup; 1400 1401 port = args.port = 1000 + rand() % 60000; 1402 1403 pid = cg_run_nowait(memcg, tcp_server, &args); 1404 if (pid < 0) 1405 goto cleanup; 1406 1407 close(args.ctl[1]); 1408 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1409 goto cleanup; 1410 close(args.ctl[0]); 1411 1412 if (!err) 1413 break; 1414 if (err != EADDRINUSE) 1415 goto cleanup; 1416 1417 waitpid(pid, NULL, 0); 1418 } 1419 1420 if (err == EADDRINUSE) { 1421 ret = KSFT_SKIP; 1422 goto cleanup; 1423 } 1424 1425 if (tcp_client(memcg, port) != KSFT_PASS) 1426 goto cleanup; 1427 1428 waitpid(pid, &err, 0); 1429 if (WEXITSTATUS(err)) 1430 goto cleanup; 1431 1432 if (cg_read_long(memcg, "memory.current") < 0) 1433 goto cleanup; 1434 1435 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1436 goto cleanup; 1437 1438 ret = KSFT_PASS; 1439 1440 cleanup: 1441 cg_destroy(memcg); 1442 free(memcg); 1443 1444 return ret; 1445 } 1446 1447 /* 1448 * This test disables swapping and tries to allocate anonymous memory 1449 * up to OOM with memory.group.oom set. Then it checks that all 1450 * processes in the leaf were killed. It also checks that oom_events 1451 * were propagated to the parent level. 1452 */ 1453 static int test_memcg_oom_group_leaf_events(const char *root) 1454 { 1455 int ret = KSFT_FAIL; 1456 char *parent, *child; 1457 long parent_oom_events; 1458 1459 parent = cg_name(root, "memcg_test_0"); 1460 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1461 1462 if (!parent || !child) 1463 goto cleanup; 1464 1465 if (cg_create(parent)) 1466 goto cleanup; 1467 1468 if (cg_create(child)) 1469 goto cleanup; 1470 1471 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1472 goto cleanup; 1473 1474 if (cg_write(child, "memory.max", "50M")) 1475 goto cleanup; 1476 1477 if (cg_write(child, "memory.swap.max", "0")) 1478 goto cleanup; 1479 1480 if (cg_write(child, "memory.oom.group", "1")) 1481 goto cleanup; 1482 1483 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1484 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1485 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1486 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1487 goto cleanup; 1488 1489 if (cg_test_proc_killed(child)) 1490 goto cleanup; 1491 1492 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1493 goto cleanup; 1494 1495 parent_oom_events = cg_read_key_long( 1496 parent, "memory.events", "oom_kill "); 1497 /* 1498 * If memory_localevents is not enabled (the default), the parent should 1499 * count OOM events in its children groups. Otherwise, it should not 1500 * have observed any events. 1501 */ 1502 if (has_localevents && parent_oom_events != 0) 1503 goto cleanup; 1504 else if (!has_localevents && parent_oom_events <= 0) 1505 goto cleanup; 1506 1507 ret = KSFT_PASS; 1508 1509 cleanup: 1510 if (child) 1511 cg_destroy(child); 1512 if (parent) 1513 cg_destroy(parent); 1514 free(child); 1515 free(parent); 1516 1517 return ret; 1518 } 1519 1520 /* 1521 * This test disables swapping and tries to allocate anonymous memory 1522 * up to OOM with memory.group.oom set. Then it checks that all 1523 * processes in the parent and leaf were killed. 1524 */ 1525 static int test_memcg_oom_group_parent_events(const char *root) 1526 { 1527 int ret = KSFT_FAIL; 1528 char *parent, *child; 1529 1530 parent = cg_name(root, "memcg_test_0"); 1531 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1532 1533 if (!parent || !child) 1534 goto cleanup; 1535 1536 if (cg_create(parent)) 1537 goto cleanup; 1538 1539 if (cg_create(child)) 1540 goto cleanup; 1541 1542 if (cg_write(parent, "memory.max", "80M")) 1543 goto cleanup; 1544 1545 if (cg_write(parent, "memory.swap.max", "0")) 1546 goto cleanup; 1547 1548 if (cg_write(parent, "memory.oom.group", "1")) 1549 goto cleanup; 1550 1551 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1552 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1553 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1554 1555 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1556 goto cleanup; 1557 1558 if (cg_test_proc_killed(child)) 1559 goto cleanup; 1560 if (cg_test_proc_killed(parent)) 1561 goto cleanup; 1562 1563 ret = KSFT_PASS; 1564 1565 cleanup: 1566 if (child) 1567 cg_destroy(child); 1568 if (parent) 1569 cg_destroy(parent); 1570 free(child); 1571 free(parent); 1572 1573 return ret; 1574 } 1575 1576 /* 1577 * This test disables swapping and tries to allocate anonymous memory 1578 * up to OOM with memory.group.oom set. Then it checks that all 1579 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1580 */ 1581 static int test_memcg_oom_group_score_events(const char *root) 1582 { 1583 int ret = KSFT_FAIL; 1584 char *memcg; 1585 int safe_pid; 1586 1587 memcg = cg_name(root, "memcg_test_0"); 1588 1589 if (!memcg) 1590 goto cleanup; 1591 1592 if (cg_create(memcg)) 1593 goto cleanup; 1594 1595 if (cg_write(memcg, "memory.max", "50M")) 1596 goto cleanup; 1597 1598 if (cg_write(memcg, "memory.swap.max", "0")) 1599 goto cleanup; 1600 1601 if (cg_write(memcg, "memory.oom.group", "1")) 1602 goto cleanup; 1603 1604 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1605 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1606 goto cleanup; 1607 1608 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1609 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1610 goto cleanup; 1611 1612 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1613 goto cleanup; 1614 1615 if (kill(safe_pid, SIGKILL)) 1616 goto cleanup; 1617 1618 ret = KSFT_PASS; 1619 1620 cleanup: 1621 if (memcg) 1622 cg_destroy(memcg); 1623 free(memcg); 1624 1625 return ret; 1626 } 1627 1628 #define T(x) { x, #x } 1629 struct memcg_test { 1630 int (*fn)(const char *root); 1631 const char *name; 1632 } tests[] = { 1633 T(test_memcg_subtree_control), 1634 T(test_memcg_current_peak), 1635 T(test_memcg_min), 1636 T(test_memcg_low), 1637 T(test_memcg_high), 1638 T(test_memcg_high_sync), 1639 T(test_memcg_max), 1640 T(test_memcg_reclaim), 1641 T(test_memcg_oom_events), 1642 T(test_memcg_swap_max_peak), 1643 T(test_memcg_sock), 1644 T(test_memcg_oom_group_leaf_events), 1645 T(test_memcg_oom_group_parent_events), 1646 T(test_memcg_oom_group_score_events), 1647 }; 1648 #undef T 1649 1650 int main(int argc, char **argv) 1651 { 1652 char root[PATH_MAX]; 1653 int i, proc_status, ret = EXIT_SUCCESS; 1654 1655 if (cg_find_unified_root(root, sizeof(root), NULL)) 1656 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1657 1658 /* 1659 * Check that memory controller is available: 1660 * memory is listed in cgroup.controllers 1661 */ 1662 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1663 ksft_exit_skip("memory controller isn't available\n"); 1664 1665 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1666 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1667 ksft_exit_skip("Failed to set memory controller\n"); 1668 1669 proc_status = proc_mount_contains("memory_recursiveprot"); 1670 if (proc_status < 0) 1671 ksft_exit_skip("Failed to query cgroup mount option\n"); 1672 has_recursiveprot = proc_status; 1673 1674 proc_status = proc_mount_contains("memory_localevents"); 1675 if (proc_status < 0) 1676 ksft_exit_skip("Failed to query cgroup mount option\n"); 1677 has_localevents = proc_status; 1678 1679 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1680 switch (tests[i].fn(root)) { 1681 case KSFT_PASS: 1682 ksft_test_result_pass("%s\n", tests[i].name); 1683 break; 1684 case KSFT_SKIP: 1685 ksft_test_result_skip("%s\n", tests[i].name); 1686 break; 1687 default: 1688 ret = EXIT_FAILURE; 1689 ksft_test_result_fail("%s\n", tests[i].name); 1690 break; 1691 } 1692 } 1693 1694 return ret; 1695 } 1696