1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/inotify.h> 14 #include <sys/socket.h> 15 #include <sys/wait.h> 16 #include <arpa/inet.h> 17 #include <netinet/in.h> 18 #include <netdb.h> 19 #include <errno.h> 20 #include <sys/mman.h> 21 22 #include "kselftest.h" 23 #include "cgroup_util.h" 24 25 #define MEMCG_SOCKSTAT_WAIT_RETRIES 30 26 27 static bool has_localevents; 28 static bool has_recursiveprot; 29 30 int get_temp_fd(void) 31 { 32 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 33 } 34 35 int alloc_pagecache(int fd, size_t size) 36 { 37 char buf[PAGE_SIZE]; 38 struct stat st; 39 int i; 40 41 if (fstat(fd, &st)) 42 goto cleanup; 43 44 size += st.st_size; 45 46 if (ftruncate(fd, size)) 47 goto cleanup; 48 49 for (i = 0; i < size; i += sizeof(buf)) 50 read(fd, buf, sizeof(buf)); 51 52 return 0; 53 54 cleanup: 55 return -1; 56 } 57 58 int alloc_anon(const char *cgroup, void *arg) 59 { 60 size_t size = (unsigned long)arg; 61 char *buf, *ptr; 62 63 buf = malloc(size); 64 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 65 *ptr = 0; 66 67 free(buf); 68 return 0; 69 } 70 71 int is_swap_enabled(void) 72 { 73 char buf[PAGE_SIZE]; 74 const char delim[] = "\n"; 75 int cnt = 0; 76 char *line; 77 78 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 79 return -1; 80 81 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 82 cnt++; 83 84 return cnt > 1; 85 } 86 87 int set_oom_adj_score(int pid, int score) 88 { 89 char path[PATH_MAX]; 90 int fd, len; 91 92 sprintf(path, "/proc/%d/oom_score_adj", pid); 93 94 fd = open(path, O_WRONLY | O_APPEND); 95 if (fd < 0) 96 return fd; 97 98 len = dprintf(fd, "%d", score); 99 if (len < 0) { 100 close(fd); 101 return len; 102 } 103 104 close(fd); 105 return 0; 106 } 107 108 /* 109 * This test creates two nested cgroups with and without enabling 110 * the memory controller. 111 */ 112 static int test_memcg_subtree_control(const char *root) 113 { 114 char *parent, *child, *parent2 = NULL, *child2 = NULL; 115 int ret = KSFT_FAIL; 116 char buf[PAGE_SIZE]; 117 118 /* Create two nested cgroups with the memory controller enabled */ 119 parent = cg_name(root, "memcg_test_0"); 120 child = cg_name(root, "memcg_test_0/memcg_test_1"); 121 if (!parent || !child) 122 goto cleanup_free; 123 124 if (cg_create(parent)) 125 goto cleanup_free; 126 127 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 128 goto cleanup_parent; 129 130 if (cg_create(child)) 131 goto cleanup_parent; 132 133 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 134 goto cleanup_child; 135 136 /* Create two nested cgroups without enabling memory controller */ 137 parent2 = cg_name(root, "memcg_test_1"); 138 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 139 if (!parent2 || !child2) 140 goto cleanup_free2; 141 142 if (cg_create(parent2)) 143 goto cleanup_free2; 144 145 if (cg_create(child2)) 146 goto cleanup_parent2; 147 148 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 149 goto cleanup_all; 150 151 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 152 goto cleanup_all; 153 154 ret = KSFT_PASS; 155 156 cleanup_all: 157 cg_destroy(child2); 158 cleanup_parent2: 159 cg_destroy(parent2); 160 cleanup_free2: 161 free(parent2); 162 free(child2); 163 cleanup_child: 164 cg_destroy(child); 165 cleanup_parent: 166 cg_destroy(parent); 167 cleanup_free: 168 free(parent); 169 free(child); 170 171 return ret; 172 } 173 174 static int alloc_anon_50M_check(const char *cgroup, void *arg) 175 { 176 size_t size = MB(50); 177 char *buf, *ptr; 178 long anon, current; 179 int ret = -1; 180 181 buf = malloc(size); 182 if (buf == NULL) { 183 fprintf(stderr, "malloc() failed\n"); 184 return -1; 185 } 186 187 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 188 *ptr = 0; 189 190 current = cg_read_long(cgroup, "memory.current"); 191 if (current < size) 192 goto cleanup; 193 194 if (!values_close(size, current, 3)) 195 goto cleanup; 196 197 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 198 if (anon < 0) 199 goto cleanup; 200 201 if (!values_close(anon, current, 3)) 202 goto cleanup; 203 204 ret = 0; 205 cleanup: 206 free(buf); 207 return ret; 208 } 209 210 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 211 { 212 size_t size = MB(50); 213 int ret = -1; 214 long current, file; 215 int fd; 216 217 fd = get_temp_fd(); 218 if (fd < 0) 219 return -1; 220 221 if (alloc_pagecache(fd, size)) 222 goto cleanup; 223 224 current = cg_read_long(cgroup, "memory.current"); 225 if (current < size) 226 goto cleanup; 227 228 file = cg_read_key_long(cgroup, "memory.stat", "file "); 229 if (file < 0) 230 goto cleanup; 231 232 if (!values_close(file, current, 10)) 233 goto cleanup; 234 235 ret = 0; 236 237 cleanup: 238 close(fd); 239 return ret; 240 } 241 242 /* 243 * This test create a memory cgroup, allocates 244 * some anonymous memory and some pagecache 245 * and checks memory.current, memory.peak, and some memory.stat values. 246 */ 247 static int test_memcg_current_peak(const char *root) 248 { 249 int ret = KSFT_FAIL; 250 long current, peak, peak_reset; 251 char *memcg; 252 bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 253 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 254 struct stat ss; 255 256 memcg = cg_name(root, "memcg_test"); 257 if (!memcg) 258 goto cleanup; 259 260 if (cg_create(memcg)) 261 goto cleanup; 262 263 current = cg_read_long(memcg, "memory.current"); 264 if (current != 0) 265 goto cleanup; 266 267 peak = cg_read_long(memcg, "memory.peak"); 268 if (peak != 0) 269 goto cleanup; 270 271 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 272 goto cleanup; 273 274 peak = cg_read_long(memcg, "memory.peak"); 275 if (peak < MB(50)) 276 goto cleanup; 277 278 /* 279 * We'll open a few FDs for the same memory.peak file to exercise the free-path 280 * We need at least three to be closed in a different order than writes occurred to test 281 * the linked-list handling. 282 */ 283 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 284 285 if (peak_fd == -1) { 286 if (errno == ENOENT) 287 ret = KSFT_SKIP; 288 goto cleanup; 289 } 290 291 /* 292 * Before we try to use memory.peak's fd, try to figure out whether 293 * this kernel supports writing to that file in the first place. (by 294 * checking the writable bit on the file's st_mode) 295 */ 296 if (fstat(peak_fd, &ss)) 297 goto cleanup; 298 299 if ((ss.st_mode & S_IWUSR) == 0) { 300 ret = KSFT_SKIP; 301 goto cleanup; 302 } 303 304 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 305 306 if (peak_fd2 == -1) 307 goto cleanup; 308 309 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 310 311 if (peak_fd3 == -1) 312 goto cleanup; 313 314 /* any non-empty string resets, but make it clear */ 315 static const char reset_string[] = "reset\n"; 316 317 peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 318 if (peak_reset != sizeof(reset_string)) 319 goto cleanup; 320 321 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 322 if (peak_reset != sizeof(reset_string)) 323 goto cleanup; 324 325 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 326 if (peak_reset != sizeof(reset_string)) 327 goto cleanup; 328 329 /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 330 peak = cg_read_long(memcg, "memory.peak"); 331 if (peak < MB(50)) 332 goto cleanup; 333 334 fd2_closed = true; 335 if (close(peak_fd2)) 336 goto cleanup; 337 338 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 339 340 if (peak_fd4 == -1) 341 goto cleanup; 342 343 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 344 if (peak_reset != sizeof(reset_string)) 345 goto cleanup; 346 347 peak = cg_read_long_fd(peak_fd); 348 if (peak > MB(30) || peak < 0) 349 goto cleanup; 350 351 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 352 goto cleanup; 353 354 peak = cg_read_long(memcg, "memory.peak"); 355 if (peak < MB(50)) 356 goto cleanup; 357 358 /* Make sure everything is back to normal */ 359 peak = cg_read_long_fd(peak_fd); 360 if (peak < MB(50)) 361 goto cleanup; 362 363 peak = cg_read_long_fd(peak_fd4); 364 if (peak < MB(50)) 365 goto cleanup; 366 367 fd3_closed = true; 368 if (close(peak_fd3)) 369 goto cleanup; 370 371 fd4_closed = true; 372 if (close(peak_fd4)) 373 goto cleanup; 374 375 ret = KSFT_PASS; 376 377 cleanup: 378 close(peak_fd); 379 if (!fd2_closed) 380 close(peak_fd2); 381 if (!fd3_closed) 382 close(peak_fd3); 383 if (!fd4_closed) 384 close(peak_fd4); 385 cg_destroy(memcg); 386 free(memcg); 387 388 return ret; 389 } 390 391 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 392 { 393 int fd = (long)arg; 394 int ppid = getppid(); 395 396 if (alloc_pagecache(fd, MB(50))) 397 return -1; 398 399 while (getppid() == ppid) 400 sleep(1); 401 402 return 0; 403 } 404 405 static int alloc_anon_noexit(const char *cgroup, void *arg) 406 { 407 int ppid = getppid(); 408 size_t size = (unsigned long)arg; 409 char *buf, *ptr; 410 411 buf = malloc(size); 412 if (buf == NULL) { 413 fprintf(stderr, "malloc() failed\n"); 414 return -1; 415 } 416 417 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 418 *ptr = 0; 419 420 while (getppid() == ppid) 421 sleep(1); 422 423 free(buf); 424 return 0; 425 } 426 427 /* 428 * Wait until processes are killed asynchronously by the OOM killer 429 * If we exceed a timeout, fail. 430 */ 431 static int cg_test_proc_killed(const char *cgroup) 432 { 433 int limit; 434 435 for (limit = 10; limit > 0; limit--) { 436 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 437 return 0; 438 439 usleep(100000); 440 } 441 return -1; 442 } 443 444 static bool reclaim_until(const char *memcg, long goal); 445 446 /* 447 * First, this test creates the following hierarchy: 448 * A memory.min = 0, memory.max = 200M 449 * A/B memory.min = 50M 450 * A/B/C memory.min = 75M, memory.current = 50M 451 * A/B/D memory.min = 25M, memory.current = 50M 452 * A/B/E memory.min = 0, memory.current = 50M 453 * A/B/F memory.min = 500M, memory.current = 0 454 * 455 * (or memory.low if we test soft protection) 456 * 457 * Usages are pagecache and the test keeps a running 458 * process in every leaf cgroup. 459 * Then it creates A/G and creates a significant 460 * memory pressure in A. 461 * 462 * Then it checks actual memory usages and expects that: 463 * A/B memory.current ~= 50M 464 * A/B/C memory.current ~= 29M [memory.events:low > 0] 465 * A/B/D memory.current ~= 21M [memory.events:low > 0] 466 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, 467 * undefined otherwise] 468 * A/B/F memory.current = 0 [memory.events:low == 0] 469 * (for origin of the numbers, see model in memcg_protection.m.) 470 * 471 * After that it tries to allocate more than there is 472 * unprotected memory in A available, and checks that: 473 * a) memory.min protects pagecache even in this case, 474 * b) memory.low allows reclaiming page cache with low events. 475 * 476 * Then we try to reclaim from A/B/C using memory.reclaim until its 477 * usage reaches 10M. 478 * This makes sure that: 479 * (a) We ignore the protection of the reclaim target memcg. 480 * (b) The previously calculated emin value (~29M) should be dismissed. 481 */ 482 static int test_memcg_protection(const char *root, bool min) 483 { 484 int ret = KSFT_FAIL, rc; 485 char *parent[3] = {NULL}; 486 char *children[4] = {NULL}; 487 const char *attribute = min ? "memory.min" : "memory.low"; 488 long c[4]; 489 long current; 490 int i, attempts; 491 int fd; 492 493 fd = get_temp_fd(); 494 if (fd < 0) 495 goto cleanup; 496 497 parent[0] = cg_name(root, "memcg_test_0"); 498 if (!parent[0]) 499 goto cleanup; 500 501 parent[1] = cg_name(parent[0], "memcg_test_1"); 502 if (!parent[1]) 503 goto cleanup; 504 505 parent[2] = cg_name(parent[0], "memcg_test_2"); 506 if (!parent[2]) 507 goto cleanup; 508 509 if (cg_create(parent[0])) 510 goto cleanup; 511 512 if (cg_read_long(parent[0], attribute)) { 513 /* No memory.min on older kernels is fine */ 514 if (min) 515 ret = KSFT_SKIP; 516 goto cleanup; 517 } 518 519 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 520 goto cleanup; 521 522 if (cg_write(parent[0], "memory.max", "200M")) 523 goto cleanup; 524 525 if (cg_write(parent[0], "memory.swap.max", "0")) 526 goto cleanup; 527 528 if (cg_create(parent[1])) 529 goto cleanup; 530 531 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 532 goto cleanup; 533 534 if (cg_create(parent[2])) 535 goto cleanup; 536 537 for (i = 0; i < ARRAY_SIZE(children); i++) { 538 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 539 if (!children[i]) 540 goto cleanup; 541 542 if (cg_create(children[i])) 543 goto cleanup; 544 545 if (i > 2) 546 continue; 547 548 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 549 (void *)(long)fd); 550 } 551 552 if (cg_write(parent[1], attribute, "50M")) 553 goto cleanup; 554 if (cg_write(children[0], attribute, "75M")) 555 goto cleanup; 556 if (cg_write(children[1], attribute, "25M")) 557 goto cleanup; 558 if (cg_write(children[2], attribute, "0")) 559 goto cleanup; 560 if (cg_write(children[3], attribute, "500M")) 561 goto cleanup; 562 563 attempts = 0; 564 while (!values_close(cg_read_long(parent[1], "memory.current"), 565 MB(150), 3)) { 566 if (attempts++ > 5) 567 break; 568 sleep(1); 569 } 570 571 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 572 goto cleanup; 573 574 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 575 goto cleanup; 576 577 for (i = 0; i < ARRAY_SIZE(children); i++) 578 c[i] = cg_read_long(children[i], "memory.current"); 579 580 if (!values_close(c[0], MB(29), 15)) 581 goto cleanup; 582 583 if (!values_close(c[1], MB(21), 20)) 584 goto cleanup; 585 586 if (c[3] != 0) 587 goto cleanup; 588 589 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 590 if (min && !rc) 591 goto cleanup; 592 else if (!min && rc) { 593 fprintf(stderr, 594 "memory.low prevents from allocating anon memory\n"); 595 goto cleanup; 596 } 597 598 current = min ? MB(50) : MB(30); 599 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 600 goto cleanup; 601 602 if (!reclaim_until(children[0], MB(10))) 603 goto cleanup; 604 605 if (min) { 606 ret = KSFT_PASS; 607 goto cleanup; 608 } 609 610 /* 611 * Child 2 has memory.low=0, but some low protection may still be 612 * distributed down from its parent with memory.low=50M if cgroup2 613 * memory_recursiveprot mount option is enabled. Ignore the low 614 * event count in this case. 615 */ 616 for (i = 0; i < ARRAY_SIZE(children); i++) { 617 int ignore_low_events_index = has_recursiveprot ? 2 : -1; 618 int no_low_events_index = 1; 619 long low, oom; 620 621 oom = cg_read_key_long(children[i], "memory.events", "oom "); 622 low = cg_read_key_long(children[i], "memory.events", "low "); 623 624 if (oom) 625 goto cleanup; 626 if (i == ignore_low_events_index) 627 continue; 628 if (i <= no_low_events_index && low <= 0) 629 goto cleanup; 630 if (i > no_low_events_index && low) 631 goto cleanup; 632 633 } 634 635 ret = KSFT_PASS; 636 637 cleanup: 638 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 639 if (!children[i]) 640 continue; 641 642 cg_destroy(children[i]); 643 free(children[i]); 644 } 645 646 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 647 if (!parent[i]) 648 continue; 649 650 cg_destroy(parent[i]); 651 free(parent[i]); 652 } 653 close(fd); 654 return ret; 655 } 656 657 static int test_memcg_min(const char *root) 658 { 659 return test_memcg_protection(root, true); 660 } 661 662 static int test_memcg_low(const char *root) 663 { 664 return test_memcg_protection(root, false); 665 } 666 667 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 668 { 669 size_t size = MB(50); 670 int ret = -1; 671 long current, high, max; 672 int fd; 673 674 high = cg_read_long(cgroup, "memory.high"); 675 max = cg_read_long(cgroup, "memory.max"); 676 if (high != MB(30) && max != MB(30)) 677 return -1; 678 679 fd = get_temp_fd(); 680 if (fd < 0) 681 return -1; 682 683 if (alloc_pagecache(fd, size)) 684 goto cleanup; 685 686 current = cg_read_long(cgroup, "memory.current"); 687 if (!values_close(current, MB(30), 5)) 688 goto cleanup; 689 690 ret = 0; 691 692 cleanup: 693 close(fd); 694 return ret; 695 696 } 697 698 /* 699 * This test checks that memory.high limits the amount of 700 * memory which can be consumed by either anonymous memory 701 * or pagecache. 702 */ 703 static int test_memcg_high(const char *root) 704 { 705 int ret = KSFT_FAIL; 706 char *memcg; 707 long high; 708 709 memcg = cg_name(root, "memcg_test"); 710 if (!memcg) 711 goto cleanup; 712 713 if (cg_create(memcg)) 714 goto cleanup; 715 716 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 717 goto cleanup; 718 719 if (cg_write(memcg, "memory.swap.max", "0")) 720 goto cleanup; 721 722 if (cg_write(memcg, "memory.high", "30M")) 723 goto cleanup; 724 725 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 726 goto cleanup; 727 728 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 729 goto cleanup; 730 731 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 732 goto cleanup; 733 734 high = cg_read_key_long(memcg, "memory.events", "high "); 735 if (high <= 0) 736 goto cleanup; 737 738 ret = KSFT_PASS; 739 740 cleanup: 741 cg_destroy(memcg); 742 free(memcg); 743 744 return ret; 745 } 746 747 static int alloc_anon_mlock(const char *cgroup, void *arg) 748 { 749 size_t size = (size_t)arg; 750 void *buf; 751 752 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 753 0, 0); 754 if (buf == MAP_FAILED) 755 return -1; 756 757 mlock(buf, size); 758 munmap(buf, size); 759 return 0; 760 } 761 762 /* 763 * This test checks that memory.high is able to throttle big single shot 764 * allocation i.e. large allocation within one kernel entry. 765 */ 766 static int test_memcg_high_sync(const char *root) 767 { 768 int ret = KSFT_FAIL, pid, fd = -1; 769 char *memcg; 770 long pre_high, pre_max; 771 long post_high, post_max; 772 773 memcg = cg_name(root, "memcg_test"); 774 if (!memcg) 775 goto cleanup; 776 777 if (cg_create(memcg)) 778 goto cleanup; 779 780 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 781 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 782 if (pre_high < 0 || pre_max < 0) 783 goto cleanup; 784 785 if (cg_write(memcg, "memory.swap.max", "0")) 786 goto cleanup; 787 788 if (cg_write(memcg, "memory.high", "30M")) 789 goto cleanup; 790 791 if (cg_write(memcg, "memory.max", "140M")) 792 goto cleanup; 793 794 fd = memcg_prepare_for_wait(memcg); 795 if (fd < 0) 796 goto cleanup; 797 798 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 799 if (pid < 0) 800 goto cleanup; 801 802 cg_wait_for(fd); 803 804 post_high = cg_read_key_long(memcg, "memory.events", "high "); 805 post_max = cg_read_key_long(memcg, "memory.events", "max "); 806 if (post_high < 0 || post_max < 0) 807 goto cleanup; 808 809 if (pre_high == post_high || pre_max != post_max) 810 goto cleanup; 811 812 ret = KSFT_PASS; 813 814 cleanup: 815 if (fd >= 0) 816 close(fd); 817 cg_destroy(memcg); 818 free(memcg); 819 820 return ret; 821 } 822 823 /* 824 * This test checks that memory.max limits the amount of 825 * memory which can be consumed by either anonymous memory 826 * or pagecache. 827 */ 828 static int test_memcg_max(const char *root) 829 { 830 int ret = KSFT_FAIL; 831 char *memcg; 832 long current, max; 833 834 memcg = cg_name(root, "memcg_test"); 835 if (!memcg) 836 goto cleanup; 837 838 if (cg_create(memcg)) 839 goto cleanup; 840 841 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 842 goto cleanup; 843 844 if (cg_write(memcg, "memory.swap.max", "0")) 845 goto cleanup; 846 847 if (cg_write(memcg, "memory.max", "30M")) 848 goto cleanup; 849 850 /* Should be killed by OOM killer */ 851 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 852 goto cleanup; 853 854 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 855 goto cleanup; 856 857 current = cg_read_long(memcg, "memory.current"); 858 if (current > MB(30) || !current) 859 goto cleanup; 860 861 max = cg_read_key_long(memcg, "memory.events", "max "); 862 if (max <= 0) 863 goto cleanup; 864 865 ret = KSFT_PASS; 866 867 cleanup: 868 cg_destroy(memcg); 869 free(memcg); 870 871 return ret; 872 } 873 874 /* 875 * Reclaim from @memcg until usage reaches @goal by writing to 876 * memory.reclaim. 877 * 878 * This function will return false if the usage is already below the 879 * goal. 880 * 881 * This function assumes that writing to memory.reclaim is the only 882 * source of change in memory.current (no concurrent allocations or 883 * reclaim). 884 * 885 * This function makes sure memory.reclaim is sane. It will return 886 * false if memory.reclaim's error codes do not make sense, even if 887 * the usage goal was satisfied. 888 */ 889 static bool reclaim_until(const char *memcg, long goal) 890 { 891 char buf[64]; 892 int retries, err; 893 long current, to_reclaim; 894 bool reclaimed = false; 895 896 for (retries = 5; retries > 0; retries--) { 897 current = cg_read_long(memcg, "memory.current"); 898 899 if (current < goal || values_close(current, goal, 3)) 900 break; 901 /* Did memory.reclaim return 0 incorrectly? */ 902 else if (reclaimed) 903 return false; 904 905 to_reclaim = current - goal; 906 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 907 err = cg_write(memcg, "memory.reclaim", buf); 908 if (!err) 909 reclaimed = true; 910 else if (err != -EAGAIN) 911 return false; 912 } 913 return reclaimed; 914 } 915 916 /* 917 * This test checks that memory.reclaim reclaims the given 918 * amount of memory (from both anon and file, if possible). 919 */ 920 static int test_memcg_reclaim(const char *root) 921 { 922 int ret = KSFT_FAIL; 923 int fd = -1; 924 int retries; 925 char *memcg; 926 long current, expected_usage; 927 928 memcg = cg_name(root, "memcg_test"); 929 if (!memcg) 930 goto cleanup; 931 932 if (cg_create(memcg)) 933 goto cleanup; 934 935 current = cg_read_long(memcg, "memory.current"); 936 if (current != 0) 937 goto cleanup; 938 939 fd = get_temp_fd(); 940 if (fd < 0) 941 goto cleanup; 942 943 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 944 945 /* 946 * If swap is enabled, try to reclaim from both anon and file, else try 947 * to reclaim from file only. 948 */ 949 if (is_swap_enabled()) { 950 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 951 expected_usage = MB(100); 952 } else 953 expected_usage = MB(50); 954 955 /* 956 * Wait until current usage reaches the expected usage (or we run out of 957 * retries). 958 */ 959 retries = 5; 960 while (!values_close(cg_read_long(memcg, "memory.current"), 961 expected_usage, 10)) { 962 if (retries--) { 963 sleep(1); 964 continue; 965 } else { 966 fprintf(stderr, 967 "failed to allocate %ld for memcg reclaim test\n", 968 expected_usage); 969 goto cleanup; 970 } 971 } 972 973 /* 974 * Reclaim until current reaches 30M, this makes sure we hit both anon 975 * and file if swap is enabled. 976 */ 977 if (!reclaim_until(memcg, MB(30))) 978 goto cleanup; 979 980 ret = KSFT_PASS; 981 cleanup: 982 cg_destroy(memcg); 983 free(memcg); 984 close(fd); 985 986 return ret; 987 } 988 989 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 990 { 991 long mem_max = (long)arg; 992 size_t size = MB(50); 993 char *buf, *ptr; 994 long mem_current, swap_current; 995 int ret = -1; 996 997 buf = malloc(size); 998 if (buf == NULL) { 999 fprintf(stderr, "malloc() failed\n"); 1000 return -1; 1001 } 1002 1003 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 1004 *ptr = 0; 1005 1006 mem_current = cg_read_long(cgroup, "memory.current"); 1007 if (!mem_current || !values_close(mem_current, mem_max, 3)) 1008 goto cleanup; 1009 1010 swap_current = cg_read_long(cgroup, "memory.swap.current"); 1011 if (!swap_current || 1012 !values_close(mem_current + swap_current, size, 3)) 1013 goto cleanup; 1014 1015 ret = 0; 1016 cleanup: 1017 free(buf); 1018 return ret; 1019 } 1020 1021 /* 1022 * This test checks that memory.swap.max limits the amount of 1023 * anonymous memory which can be swapped out. Additionally, it verifies that 1024 * memory.swap.peak reflects the high watermark and can be reset. 1025 */ 1026 static int test_memcg_swap_max_peak(const char *root) 1027 { 1028 int ret = KSFT_FAIL; 1029 char *memcg; 1030 long max, peak; 1031 struct stat ss; 1032 int swap_peak_fd = -1, mem_peak_fd = -1; 1033 1034 /* any non-empty string resets */ 1035 static const char reset_string[] = "foobarbaz"; 1036 1037 if (!is_swap_enabled()) 1038 return KSFT_SKIP; 1039 1040 memcg = cg_name(root, "memcg_test"); 1041 if (!memcg) 1042 goto cleanup; 1043 1044 if (cg_create(memcg)) 1045 goto cleanup; 1046 1047 if (cg_read_long(memcg, "memory.swap.current")) { 1048 ret = KSFT_SKIP; 1049 goto cleanup; 1050 } 1051 1052 swap_peak_fd = cg_open(memcg, "memory.swap.peak", 1053 O_RDWR | O_APPEND | O_CLOEXEC); 1054 1055 if (swap_peak_fd == -1) { 1056 if (errno == ENOENT) 1057 ret = KSFT_SKIP; 1058 goto cleanup; 1059 } 1060 1061 /* 1062 * Before we try to use memory.swap.peak's fd, try to figure out 1063 * whether this kernel supports writing to that file in the first 1064 * place. (by checking the writable bit on the file's st_mode) 1065 */ 1066 if (fstat(swap_peak_fd, &ss)) 1067 goto cleanup; 1068 1069 if ((ss.st_mode & S_IWUSR) == 0) { 1070 ret = KSFT_SKIP; 1071 goto cleanup; 1072 } 1073 1074 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 1075 1076 if (mem_peak_fd == -1) 1077 goto cleanup; 1078 1079 if (cg_read_long(memcg, "memory.swap.peak")) 1080 goto cleanup; 1081 1082 if (cg_read_long_fd(swap_peak_fd)) 1083 goto cleanup; 1084 1085 /* switch the swap and mem fds into local-peak tracking mode*/ 1086 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1087 1088 if (peak_reset != sizeof(reset_string)) 1089 goto cleanup; 1090 1091 if (cg_read_long_fd(swap_peak_fd)) 1092 goto cleanup; 1093 1094 if (cg_read_long(memcg, "memory.peak")) 1095 goto cleanup; 1096 1097 if (cg_read_long_fd(mem_peak_fd)) 1098 goto cleanup; 1099 1100 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1101 if (peak_reset != sizeof(reset_string)) 1102 goto cleanup; 1103 1104 if (cg_read_long_fd(mem_peak_fd)) 1105 goto cleanup; 1106 1107 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 1108 goto cleanup; 1109 1110 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 1111 goto cleanup; 1112 1113 if (cg_write(memcg, "memory.swap.max", "30M")) 1114 goto cleanup; 1115 1116 if (cg_write(memcg, "memory.max", "30M")) 1117 goto cleanup; 1118 1119 /* Should be killed by OOM killer */ 1120 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1121 goto cleanup; 1122 1123 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1124 goto cleanup; 1125 1126 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1127 goto cleanup; 1128 1129 peak = cg_read_long(memcg, "memory.peak"); 1130 if (peak < MB(29)) 1131 goto cleanup; 1132 1133 peak = cg_read_long(memcg, "memory.swap.peak"); 1134 if (peak < MB(29)) 1135 goto cleanup; 1136 1137 peak = cg_read_long_fd(mem_peak_fd); 1138 if (peak < MB(29)) 1139 goto cleanup; 1140 1141 peak = cg_read_long_fd(swap_peak_fd); 1142 if (peak < MB(29)) 1143 goto cleanup; 1144 1145 /* 1146 * open, reset and close the peak swap on another FD to make sure 1147 * multiple extant fds don't corrupt the linked-list 1148 */ 1149 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 1150 if (peak_reset) 1151 goto cleanup; 1152 1153 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 1154 if (peak_reset) 1155 goto cleanup; 1156 1157 /* actually reset on the fds */ 1158 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1159 if (peak_reset != sizeof(reset_string)) 1160 goto cleanup; 1161 1162 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1163 if (peak_reset != sizeof(reset_string)) 1164 goto cleanup; 1165 1166 peak = cg_read_long_fd(swap_peak_fd); 1167 if (peak > MB(10)) 1168 goto cleanup; 1169 1170 /* 1171 * The cgroup is now empty, but there may be a page or two associated 1172 * with the open FD accounted to it. 1173 */ 1174 peak = cg_read_long_fd(mem_peak_fd); 1175 if (peak > MB(1)) 1176 goto cleanup; 1177 1178 if (cg_read_long(memcg, "memory.peak") < MB(29)) 1179 goto cleanup; 1180 1181 if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 1182 goto cleanup; 1183 1184 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1185 goto cleanup; 1186 1187 max = cg_read_key_long(memcg, "memory.events", "max "); 1188 if (max <= 0) 1189 goto cleanup; 1190 1191 peak = cg_read_long(memcg, "memory.peak"); 1192 if (peak < MB(29)) 1193 goto cleanup; 1194 1195 peak = cg_read_long(memcg, "memory.swap.peak"); 1196 if (peak < MB(29)) 1197 goto cleanup; 1198 1199 peak = cg_read_long_fd(mem_peak_fd); 1200 if (peak < MB(29)) 1201 goto cleanup; 1202 1203 peak = cg_read_long_fd(swap_peak_fd); 1204 if (peak < MB(19)) 1205 goto cleanup; 1206 1207 ret = KSFT_PASS; 1208 1209 cleanup: 1210 if (mem_peak_fd != -1 && close(mem_peak_fd)) 1211 ret = KSFT_FAIL; 1212 if (swap_peak_fd != -1 && close(swap_peak_fd)) 1213 ret = KSFT_FAIL; 1214 cg_destroy(memcg); 1215 free(memcg); 1216 1217 return ret; 1218 } 1219 1220 /* 1221 * This test disables swapping and tries to allocate anonymous memory 1222 * up to OOM. Then it checks for oom and oom_kill events in 1223 * memory.events. 1224 */ 1225 static int test_memcg_oom_events(const char *root) 1226 { 1227 int ret = KSFT_FAIL; 1228 char *memcg; 1229 1230 memcg = cg_name(root, "memcg_test"); 1231 if (!memcg) 1232 goto cleanup; 1233 1234 if (cg_create(memcg)) 1235 goto cleanup; 1236 1237 if (cg_write(memcg, "memory.max", "30M")) 1238 goto cleanup; 1239 1240 if (cg_write(memcg, "memory.swap.max", "0")) 1241 goto cleanup; 1242 1243 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1244 goto cleanup; 1245 1246 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 1247 goto cleanup; 1248 1249 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1250 goto cleanup; 1251 1252 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1253 goto cleanup; 1254 1255 ret = KSFT_PASS; 1256 1257 cleanup: 1258 cg_destroy(memcg); 1259 free(memcg); 1260 1261 return ret; 1262 } 1263 1264 struct tcp_server_args { 1265 unsigned short port; 1266 int ctl[2]; 1267 }; 1268 1269 static int tcp_server(const char *cgroup, void *arg) 1270 { 1271 struct tcp_server_args *srv_args = arg; 1272 struct sockaddr_in6 saddr = { 0 }; 1273 socklen_t slen = sizeof(saddr); 1274 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1275 1276 close(srv_args->ctl[0]); 1277 ctl_fd = srv_args->ctl[1]; 1278 1279 saddr.sin6_family = AF_INET6; 1280 saddr.sin6_addr = in6addr_any; 1281 saddr.sin6_port = htons(srv_args->port); 1282 1283 sk = socket(AF_INET6, SOCK_STREAM, 0); 1284 if (sk < 0) { 1285 /* Pass back errno to the ctl_fd */ 1286 write(ctl_fd, &errno, sizeof(errno)); 1287 return ret; 1288 } 1289 1290 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1291 goto cleanup; 1292 1293 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1294 write(ctl_fd, &errno, sizeof(errno)); 1295 goto cleanup; 1296 } 1297 1298 if (listen(sk, 1)) 1299 goto cleanup; 1300 1301 ret = 0; 1302 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1303 ret = -1; 1304 goto cleanup; 1305 } 1306 1307 client_sk = accept(sk, NULL, NULL); 1308 if (client_sk < 0) 1309 goto cleanup; 1310 1311 ret = -1; 1312 for (;;) { 1313 uint8_t buf[0x100000]; 1314 1315 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1316 if (errno == ECONNRESET) 1317 ret = 0; 1318 break; 1319 } 1320 } 1321 1322 close(client_sk); 1323 1324 cleanup: 1325 close(sk); 1326 return ret; 1327 } 1328 1329 static int tcp_client(const char *cgroup, unsigned short port) 1330 { 1331 const char server[] = "localhost"; 1332 struct addrinfo *ai; 1333 char servport[6]; 1334 int retries = 0x10; /* nice round number */ 1335 int sk, ret; 1336 long allocated; 1337 1338 allocated = cg_read_long(cgroup, "memory.current"); 1339 snprintf(servport, sizeof(servport), "%hd", port); 1340 ret = getaddrinfo(server, servport, NULL, &ai); 1341 if (ret) 1342 return ret; 1343 1344 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1345 if (sk < 0) 1346 goto free_ainfo; 1347 1348 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1349 if (ret < 0) 1350 goto close_sk; 1351 1352 ret = KSFT_FAIL; 1353 while (retries--) { 1354 uint8_t buf[0x100000]; 1355 long current, sock; 1356 1357 if (read(sk, buf, sizeof(buf)) <= 0) 1358 goto close_sk; 1359 1360 current = cg_read_long(cgroup, "memory.current"); 1361 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1362 1363 if (current < 0 || sock < 0) 1364 goto close_sk; 1365 1366 /* exclude the memory not related to socket connection */ 1367 if (values_close(current - allocated, sock, 10)) { 1368 ret = KSFT_PASS; 1369 break; 1370 } 1371 } 1372 1373 close_sk: 1374 close(sk); 1375 free_ainfo: 1376 freeaddrinfo(ai); 1377 return ret; 1378 } 1379 1380 /* 1381 * This test checks socket memory accounting. 1382 * The test forks a TCP server listens on a random port between 1000 1383 * and 61000. Once it gets a client connection, it starts writing to 1384 * its socket. 1385 * The TCP client interleaves reads from the socket with check whether 1386 * memory.current and memory.stat.sock are similar. 1387 */ 1388 static int test_memcg_sock(const char *root) 1389 { 1390 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1391 unsigned short port; 1392 char *memcg; 1393 long sock_post = -1; 1394 1395 memcg = cg_name(root, "memcg_test"); 1396 if (!memcg) 1397 goto cleanup; 1398 1399 if (cg_create(memcg)) 1400 goto cleanup; 1401 1402 while (bind_retries--) { 1403 struct tcp_server_args args; 1404 1405 if (pipe(args.ctl)) 1406 goto cleanup; 1407 1408 port = args.port = 1000 + rand() % 60000; 1409 1410 pid = cg_run_nowait(memcg, tcp_server, &args); 1411 if (pid < 0) 1412 goto cleanup; 1413 1414 close(args.ctl[1]); 1415 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1416 goto cleanup; 1417 close(args.ctl[0]); 1418 1419 /* Skip if address family not supported by protocol */ 1420 if (err == EAFNOSUPPORT) { 1421 ret = KSFT_SKIP; 1422 goto cleanup; 1423 } 1424 1425 if (!err) 1426 break; 1427 if (err != EADDRINUSE) 1428 goto cleanup; 1429 1430 waitpid(pid, NULL, 0); 1431 } 1432 1433 if (err == EADDRINUSE) { 1434 ret = KSFT_SKIP; 1435 goto cleanup; 1436 } 1437 1438 if (tcp_client(memcg, port) != KSFT_PASS) 1439 goto cleanup; 1440 1441 waitpid(pid, &err, 0); 1442 if (WEXITSTATUS(err)) 1443 goto cleanup; 1444 1445 if (cg_read_long(memcg, "memory.current") < 0) 1446 goto cleanup; 1447 1448 /* 1449 * memory.stat is updated asynchronously via the memcg rstat 1450 * flushing worker, which runs periodically (every 2 seconds, 1451 * see FLUSH_TIME). On a busy system, the "sock " counter may 1452 * stay non-zero for a short period of time after the TCP 1453 * connection is closed and all socket memory has been 1454 * uncharged. 1455 * 1456 * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some 1457 * scheduling slack) and require that the "sock " counter 1458 * eventually drops to zero. 1459 */ 1460 sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0, 1461 MEMCG_SOCKSTAT_WAIT_RETRIES, 1462 DEFAULT_WAIT_INTERVAL_US); 1463 if (sock_post) 1464 goto cleanup; 1465 1466 ret = KSFT_PASS; 1467 1468 cleanup: 1469 cg_destroy(memcg); 1470 free(memcg); 1471 1472 return ret; 1473 } 1474 1475 /* 1476 * This test disables swapping and tries to allocate anonymous memory 1477 * up to OOM with memory.group.oom set. Then it checks that all 1478 * processes in the leaf were killed. It also checks that oom_events 1479 * were propagated to the parent level. 1480 */ 1481 static int test_memcg_oom_group_leaf_events(const char *root) 1482 { 1483 int ret = KSFT_FAIL; 1484 char *parent, *child; 1485 long parent_oom_events; 1486 1487 parent = cg_name(root, "memcg_test_0"); 1488 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1489 1490 if (!parent || !child) 1491 goto cleanup; 1492 1493 if (cg_create(parent)) 1494 goto cleanup; 1495 1496 if (cg_create(child)) 1497 goto cleanup; 1498 1499 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1500 goto cleanup; 1501 1502 if (cg_write(child, "memory.max", "50M")) 1503 goto cleanup; 1504 1505 if (cg_write(child, "memory.swap.max", "0")) 1506 goto cleanup; 1507 1508 if (cg_write(child, "memory.oom.group", "1")) 1509 goto cleanup; 1510 1511 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1512 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1513 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1514 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1515 goto cleanup; 1516 1517 if (cg_test_proc_killed(child)) 1518 goto cleanup; 1519 1520 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1521 goto cleanup; 1522 1523 parent_oom_events = cg_read_key_long( 1524 parent, "memory.events", "oom_kill "); 1525 /* 1526 * If memory_localevents is not enabled (the default), the parent should 1527 * count OOM events in its children groups. Otherwise, it should not 1528 * have observed any events. 1529 */ 1530 if (has_localevents && parent_oom_events != 0) 1531 goto cleanup; 1532 else if (!has_localevents && parent_oom_events <= 0) 1533 goto cleanup; 1534 1535 ret = KSFT_PASS; 1536 1537 cleanup: 1538 if (child) 1539 cg_destroy(child); 1540 if (parent) 1541 cg_destroy(parent); 1542 free(child); 1543 free(parent); 1544 1545 return ret; 1546 } 1547 1548 /* 1549 * This test disables swapping and tries to allocate anonymous memory 1550 * up to OOM with memory.group.oom set. Then it checks that all 1551 * processes in the parent and leaf were killed. 1552 */ 1553 static int test_memcg_oom_group_parent_events(const char *root) 1554 { 1555 int ret = KSFT_FAIL; 1556 char *parent, *child; 1557 1558 parent = cg_name(root, "memcg_test_0"); 1559 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1560 1561 if (!parent || !child) 1562 goto cleanup; 1563 1564 if (cg_create(parent)) 1565 goto cleanup; 1566 1567 if (cg_create(child)) 1568 goto cleanup; 1569 1570 if (cg_write(parent, "memory.max", "80M")) 1571 goto cleanup; 1572 1573 if (cg_write(parent, "memory.swap.max", "0")) 1574 goto cleanup; 1575 1576 if (cg_write(parent, "memory.oom.group", "1")) 1577 goto cleanup; 1578 1579 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1580 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1581 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1582 1583 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1584 goto cleanup; 1585 1586 if (cg_test_proc_killed(child)) 1587 goto cleanup; 1588 if (cg_test_proc_killed(parent)) 1589 goto cleanup; 1590 1591 ret = KSFT_PASS; 1592 1593 cleanup: 1594 if (child) 1595 cg_destroy(child); 1596 if (parent) 1597 cg_destroy(parent); 1598 free(child); 1599 free(parent); 1600 1601 return ret; 1602 } 1603 1604 /* 1605 * This test disables swapping and tries to allocate anonymous memory 1606 * up to OOM with memory.group.oom set. Then it checks that all 1607 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1608 */ 1609 static int test_memcg_oom_group_score_events(const char *root) 1610 { 1611 int ret = KSFT_FAIL; 1612 char *memcg; 1613 int safe_pid; 1614 1615 memcg = cg_name(root, "memcg_test_0"); 1616 1617 if (!memcg) 1618 goto cleanup; 1619 1620 if (cg_create(memcg)) 1621 goto cleanup; 1622 1623 if (cg_write(memcg, "memory.max", "50M")) 1624 goto cleanup; 1625 1626 if (cg_write(memcg, "memory.swap.max", "0")) 1627 goto cleanup; 1628 1629 if (cg_write(memcg, "memory.oom.group", "1")) 1630 goto cleanup; 1631 1632 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1633 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1634 goto cleanup; 1635 1636 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1637 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1638 goto cleanup; 1639 1640 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1641 goto cleanup; 1642 1643 if (kill(safe_pid, SIGKILL)) 1644 goto cleanup; 1645 1646 ret = KSFT_PASS; 1647 1648 cleanup: 1649 if (memcg) 1650 cg_destroy(memcg); 1651 free(memcg); 1652 1653 return ret; 1654 } 1655 1656 static int read_event(int inotify_fd, int expected_event, int expected_wd) 1657 { 1658 struct inotify_event event; 1659 ssize_t len = 0; 1660 1661 len = read(inotify_fd, &event, sizeof(event)); 1662 if (len < (ssize_t)sizeof(event)) 1663 return -1; 1664 1665 if (event.mask != expected_event || event.wd != expected_wd) { 1666 fprintf(stderr, 1667 "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n", 1668 event.mask, expected_event, event.wd, expected_wd); 1669 return -1; 1670 } 1671 1672 return 0; 1673 } 1674 1675 static int test_memcg_inotify_delete_file(const char *root) 1676 { 1677 int ret = KSFT_FAIL; 1678 char *memcg = NULL; 1679 int fd, wd; 1680 1681 memcg = cg_name(root, "memcg_test_0"); 1682 1683 if (!memcg) 1684 goto cleanup; 1685 1686 if (cg_create(memcg)) 1687 goto cleanup; 1688 1689 fd = inotify_init1(0); 1690 if (fd == -1) 1691 goto cleanup; 1692 1693 wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF); 1694 if (wd == -1) 1695 goto cleanup; 1696 1697 if (cg_destroy(memcg)) 1698 goto cleanup; 1699 free(memcg); 1700 memcg = NULL; 1701 1702 if (read_event(fd, IN_DELETE_SELF, wd)) 1703 goto cleanup; 1704 1705 if (read_event(fd, IN_IGNORED, wd)) 1706 goto cleanup; 1707 1708 ret = KSFT_PASS; 1709 1710 cleanup: 1711 if (fd >= 0) 1712 close(fd); 1713 if (memcg) 1714 cg_destroy(memcg); 1715 free(memcg); 1716 1717 return ret; 1718 } 1719 1720 static int test_memcg_inotify_delete_dir(const char *root) 1721 { 1722 int ret = KSFT_FAIL; 1723 char *memcg = NULL; 1724 int fd, wd; 1725 1726 memcg = cg_name(root, "memcg_test_0"); 1727 1728 if (!memcg) 1729 goto cleanup; 1730 1731 if (cg_create(memcg)) 1732 goto cleanup; 1733 1734 fd = inotify_init1(0); 1735 if (fd == -1) 1736 goto cleanup; 1737 1738 wd = inotify_add_watch(fd, memcg, IN_DELETE_SELF); 1739 if (wd == -1) 1740 goto cleanup; 1741 1742 if (cg_destroy(memcg)) 1743 goto cleanup; 1744 free(memcg); 1745 memcg = NULL; 1746 1747 if (read_event(fd, IN_DELETE_SELF, wd)) 1748 goto cleanup; 1749 1750 if (read_event(fd, IN_IGNORED, wd)) 1751 goto cleanup; 1752 1753 ret = KSFT_PASS; 1754 1755 cleanup: 1756 if (fd >= 0) 1757 close(fd); 1758 if (memcg) 1759 cg_destroy(memcg); 1760 free(memcg); 1761 1762 return ret; 1763 } 1764 1765 #define T(x) { x, #x } 1766 struct memcg_test { 1767 int (*fn)(const char *root); 1768 const char *name; 1769 } tests[] = { 1770 T(test_memcg_subtree_control), 1771 T(test_memcg_current_peak), 1772 T(test_memcg_min), 1773 T(test_memcg_low), 1774 T(test_memcg_high), 1775 T(test_memcg_high_sync), 1776 T(test_memcg_max), 1777 T(test_memcg_reclaim), 1778 T(test_memcg_oom_events), 1779 T(test_memcg_swap_max_peak), 1780 T(test_memcg_sock), 1781 T(test_memcg_oom_group_leaf_events), 1782 T(test_memcg_oom_group_parent_events), 1783 T(test_memcg_oom_group_score_events), 1784 T(test_memcg_inotify_delete_file), 1785 T(test_memcg_inotify_delete_dir), 1786 }; 1787 #undef T 1788 1789 int main(int argc, char **argv) 1790 { 1791 char root[PATH_MAX]; 1792 int i, proc_status; 1793 1794 ksft_print_header(); 1795 ksft_set_plan(ARRAY_SIZE(tests)); 1796 if (cg_find_unified_root(root, sizeof(root), NULL)) 1797 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1798 1799 /* 1800 * Check that memory controller is available: 1801 * memory is listed in cgroup.controllers 1802 */ 1803 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1804 ksft_exit_skip("memory controller isn't available\n"); 1805 1806 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1807 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1808 ksft_exit_skip("Failed to set memory controller\n"); 1809 1810 proc_status = proc_mount_contains("memory_recursiveprot"); 1811 if (proc_status < 0) 1812 ksft_exit_skip("Failed to query cgroup mount option\n"); 1813 has_recursiveprot = proc_status; 1814 1815 proc_status = proc_mount_contains("memory_localevents"); 1816 if (proc_status < 0) 1817 ksft_exit_skip("Failed to query cgroup mount option\n"); 1818 has_localevents = proc_status; 1819 1820 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1821 switch (tests[i].fn(root)) { 1822 case KSFT_PASS: 1823 ksft_test_result_pass("%s\n", tests[i].name); 1824 break; 1825 case KSFT_SKIP: 1826 ksft_test_result_skip("%s\n", tests[i].name); 1827 break; 1828 default: 1829 ksft_test_result_fail("%s\n", tests[i].name); 1830 break; 1831 } 1832 } 1833 1834 ksft_finished(); 1835 } 1836