1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/inotify.h> 14 #include <sys/socket.h> 15 #include <sys/wait.h> 16 #include <arpa/inet.h> 17 #include <netinet/in.h> 18 #include <netdb.h> 19 #include <errno.h> 20 #include <sys/mman.h> 21 22 #include "kselftest.h" 23 #include "cgroup_util.h" 24 25 #define MEMCG_SOCKSTAT_WAIT_RETRIES 30 26 27 static bool has_localevents; 28 static bool has_recursiveprot; 29 static int page_size; 30 31 int get_temp_fd(void) 32 { 33 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 34 } 35 36 int alloc_pagecache(int fd, size_t size) 37 { 38 char buf[BUF_SIZE]; 39 struct stat st; 40 int i; 41 42 if (fstat(fd, &st)) 43 goto cleanup; 44 45 size += st.st_size; 46 47 if (ftruncate(fd, size)) 48 goto cleanup; 49 50 for (i = 0; i < size; i += sizeof(buf)) 51 read(fd, buf, sizeof(buf)); 52 53 return 0; 54 55 cleanup: 56 return -1; 57 } 58 59 static char *alloc_and_populate_anon(size_t size) 60 { 61 char *buf, *ptr; 62 63 buf = malloc(size); 64 if (buf == NULL) { 65 fprintf(stderr, "malloc() failed\n"); 66 return NULL; 67 } 68 69 for (ptr = buf; ptr < buf + size; ptr += page_size) 70 *ptr = 0; 71 72 return buf; 73 } 74 75 int alloc_anon(const char *cgroup, void *arg) 76 { 77 size_t size = (unsigned long)arg; 78 char *buf; 79 80 buf = alloc_and_populate_anon(size); 81 if (!buf) 82 return -1; 83 84 free(buf); 85 return 0; 86 } 87 88 int is_swap_enabled(void) 89 { 90 char buf[BUF_SIZE]; 91 const char delim[] = "\n"; 92 int cnt = 0; 93 char *line; 94 95 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 96 return -1; 97 98 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 99 cnt++; 100 101 return cnt > 1; 102 } 103 104 int set_oom_adj_score(int pid, int score) 105 { 106 char path[PATH_MAX]; 107 int fd, len; 108 109 sprintf(path, "/proc/%d/oom_score_adj", pid); 110 111 fd = open(path, O_WRONLY | O_APPEND); 112 if (fd < 0) 113 return fd; 114 115 len = dprintf(fd, "%d", score); 116 if (len < 0) { 117 close(fd); 118 return len; 119 } 120 121 close(fd); 122 return 0; 123 } 124 125 /* 126 * This test creates two nested cgroups with and without enabling 127 * the memory controller. 128 */ 129 static int test_memcg_subtree_control(const char *root) 130 { 131 char *parent, *child, *parent2 = NULL, *child2 = NULL; 132 int ret = KSFT_FAIL; 133 char buf[BUF_SIZE]; 134 135 /* Create two nested cgroups with the memory controller enabled */ 136 parent = cg_name(root, "memcg_test_0"); 137 child = cg_name(root, "memcg_test_0/memcg_test_1"); 138 if (!parent || !child) 139 goto cleanup_free; 140 141 if (cg_create(parent)) 142 goto cleanup_free; 143 144 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 145 goto cleanup_parent; 146 147 if (cg_create(child)) 148 goto cleanup_parent; 149 150 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 151 goto cleanup_child; 152 153 /* Create two nested cgroups without enabling memory controller */ 154 parent2 = cg_name(root, "memcg_test_1"); 155 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 156 if (!parent2 || !child2) 157 goto cleanup_free2; 158 159 if (cg_create(parent2)) 160 goto cleanup_free2; 161 162 if (cg_create(child2)) 163 goto cleanup_parent2; 164 165 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 166 goto cleanup_all; 167 168 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 169 goto cleanup_all; 170 171 ret = KSFT_PASS; 172 173 cleanup_all: 174 cg_destroy(child2); 175 cleanup_parent2: 176 cg_destroy(parent2); 177 cleanup_free2: 178 free(parent2); 179 free(child2); 180 cleanup_child: 181 cg_destroy(child); 182 cleanup_parent: 183 cg_destroy(parent); 184 cleanup_free: 185 free(parent); 186 free(child); 187 188 return ret; 189 } 190 191 static int alloc_anon_50M_check(const char *cgroup, void *arg) 192 { 193 size_t size = MB(50); 194 char *buf; 195 long anon, current; 196 int ret = -1; 197 198 buf = alloc_and_populate_anon(size); 199 if (!buf) 200 return -1; 201 202 current = cg_read_long(cgroup, "memory.current"); 203 if (current < size) 204 goto cleanup; 205 206 if (!values_close(size, current, 3)) 207 goto cleanup; 208 209 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 210 if (anon < 0) 211 goto cleanup; 212 213 if (!values_close(anon, current, 3)) 214 goto cleanup; 215 216 ret = 0; 217 cleanup: 218 free(buf); 219 return ret; 220 } 221 222 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 223 { 224 size_t size = MB(50); 225 int ret = -1; 226 long current, file; 227 int fd; 228 229 fd = get_temp_fd(); 230 if (fd < 0) 231 return -1; 232 233 if (alloc_pagecache(fd, size)) 234 goto cleanup; 235 236 current = cg_read_long(cgroup, "memory.current"); 237 if (current < size) 238 goto cleanup; 239 240 file = cg_read_key_long(cgroup, "memory.stat", "file "); 241 if (file < 0) 242 goto cleanup; 243 244 if (!values_close(file, current, 10)) 245 goto cleanup; 246 247 ret = 0; 248 249 cleanup: 250 close(fd); 251 return ret; 252 } 253 254 /* 255 * This test create a memory cgroup, allocates 256 * some anonymous memory and some pagecache 257 * and checks memory.current, memory.peak, and some memory.stat values. 258 */ 259 static int test_memcg_current_peak(const char *root) 260 { 261 int ret = KSFT_FAIL; 262 long current, peak, peak_reset; 263 char *memcg; 264 bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 265 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 266 struct stat ss; 267 268 memcg = cg_name(root, "memcg_test"); 269 if (!memcg) 270 goto cleanup; 271 272 if (cg_create(memcg)) 273 goto cleanup; 274 275 current = cg_read_long(memcg, "memory.current"); 276 if (current != 0) 277 goto cleanup; 278 279 peak = cg_read_long(memcg, "memory.peak"); 280 if (peak != 0) 281 goto cleanup; 282 283 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 284 goto cleanup; 285 286 peak = cg_read_long(memcg, "memory.peak"); 287 if (peak < MB(50)) 288 goto cleanup; 289 290 /* 291 * We'll open a few FDs for the same memory.peak file to exercise the free-path 292 * We need at least three to be closed in a different order than writes occurred to test 293 * the linked-list handling. 294 */ 295 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 296 297 if (peak_fd == -1) { 298 if (errno == ENOENT) 299 ret = KSFT_SKIP; 300 goto cleanup; 301 } 302 303 /* 304 * Before we try to use memory.peak's fd, try to figure out whether 305 * this kernel supports writing to that file in the first place. (by 306 * checking the writable bit on the file's st_mode) 307 */ 308 if (fstat(peak_fd, &ss)) 309 goto cleanup; 310 311 if ((ss.st_mode & S_IWUSR) == 0) { 312 ret = KSFT_SKIP; 313 goto cleanup; 314 } 315 316 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 317 318 if (peak_fd2 == -1) 319 goto cleanup; 320 321 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 322 323 if (peak_fd3 == -1) 324 goto cleanup; 325 326 /* any non-empty string resets, but make it clear */ 327 static const char reset_string[] = "reset\n"; 328 329 peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 330 if (peak_reset != sizeof(reset_string)) 331 goto cleanup; 332 333 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 334 if (peak_reset != sizeof(reset_string)) 335 goto cleanup; 336 337 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 338 if (peak_reset != sizeof(reset_string)) 339 goto cleanup; 340 341 /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 342 peak = cg_read_long(memcg, "memory.peak"); 343 if (peak < MB(50)) 344 goto cleanup; 345 346 fd2_closed = true; 347 if (close(peak_fd2)) 348 goto cleanup; 349 350 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 351 352 if (peak_fd4 == -1) 353 goto cleanup; 354 355 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 356 if (peak_reset != sizeof(reset_string)) 357 goto cleanup; 358 359 peak = cg_read_long_fd(peak_fd); 360 if (peak > MB(30) || peak < 0) 361 goto cleanup; 362 363 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 364 goto cleanup; 365 366 peak = cg_read_long(memcg, "memory.peak"); 367 if (peak < MB(50)) 368 goto cleanup; 369 370 /* Make sure everything is back to normal */ 371 peak = cg_read_long_fd(peak_fd); 372 if (peak < MB(50)) 373 goto cleanup; 374 375 peak = cg_read_long_fd(peak_fd4); 376 if (peak < MB(50)) 377 goto cleanup; 378 379 fd3_closed = true; 380 if (close(peak_fd3)) 381 goto cleanup; 382 383 fd4_closed = true; 384 if (close(peak_fd4)) 385 goto cleanup; 386 387 ret = KSFT_PASS; 388 389 cleanup: 390 close(peak_fd); 391 if (!fd2_closed) 392 close(peak_fd2); 393 if (!fd3_closed) 394 close(peak_fd3); 395 if (!fd4_closed) 396 close(peak_fd4); 397 cg_destroy(memcg); 398 free(memcg); 399 400 return ret; 401 } 402 403 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 404 { 405 int fd = (long)arg; 406 int ppid = getppid(); 407 408 if (alloc_pagecache(fd, MB(50))) 409 return -1; 410 411 while (getppid() == ppid) 412 sleep(1); 413 414 return 0; 415 } 416 417 static int alloc_anon_noexit(const char *cgroup, void *arg) 418 { 419 int ppid = getppid(); 420 size_t size = (unsigned long)arg; 421 char *buf; 422 423 buf = alloc_and_populate_anon(size); 424 if (!buf) 425 return -1; 426 427 while (getppid() == ppid) 428 sleep(1); 429 430 free(buf); 431 return 0; 432 } 433 434 /* 435 * Wait until processes are killed asynchronously by the OOM killer 436 * If we exceed a timeout, fail. 437 */ 438 static int cg_test_proc_killed(const char *cgroup) 439 { 440 int limit; 441 442 for (limit = 10; limit > 0; limit--) { 443 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 444 return 0; 445 446 usleep(100000); 447 } 448 return -1; 449 } 450 451 static bool reclaim_until(const char *memcg, long goal); 452 453 /* 454 * First, this test creates the following hierarchy: 455 * A memory.min = 0, memory.max = 200M 456 * A/B memory.min = 50M 457 * A/B/C memory.min = 75M, memory.current = 50M 458 * A/B/D memory.min = 25M, memory.current = 50M 459 * A/B/E memory.min = 0, memory.current = 50M 460 * A/B/F memory.min = 500M, memory.current = 0 461 * 462 * (or memory.low if we test soft protection) 463 * 464 * Usages are pagecache and the test keeps a running 465 * process in every leaf cgroup. 466 * Then it creates A/G and creates a significant 467 * memory pressure in A. 468 * 469 * Then it checks actual memory usages and expects that: 470 * A/B memory.current ~= 50M 471 * A/B/C memory.current ~= 29M [memory.events:low > 0] 472 * A/B/D memory.current ~= 21M [memory.events:low > 0] 473 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, 474 * undefined otherwise] 475 * A/B/F memory.current = 0 [memory.events:low == 0] 476 * (for origin of the numbers, see model in memcg_protection.m.) 477 * 478 * After that it tries to allocate more than there is 479 * unprotected memory in A available, and checks that: 480 * a) memory.min protects pagecache even in this case, 481 * b) memory.low allows reclaiming page cache with low events. 482 * 483 * Then we try to reclaim from A/B/C using memory.reclaim until its 484 * usage reaches 10M. 485 * This makes sure that: 486 * (a) We ignore the protection of the reclaim target memcg. 487 * (b) The previously calculated emin value (~29M) should be dismissed. 488 */ 489 static int test_memcg_protection(const char *root, bool min) 490 { 491 int ret = KSFT_FAIL, rc; 492 char *parent[3] = {NULL}; 493 char *children[4] = {NULL}; 494 const char *attribute = min ? "memory.min" : "memory.low"; 495 long c[4]; 496 long current; 497 int i, attempts; 498 int fd; 499 500 fd = get_temp_fd(); 501 if (fd < 0) 502 goto cleanup; 503 504 parent[0] = cg_name(root, "memcg_test_0"); 505 if (!parent[0]) 506 goto cleanup; 507 508 parent[1] = cg_name(parent[0], "memcg_test_1"); 509 if (!parent[1]) 510 goto cleanup; 511 512 parent[2] = cg_name(parent[0], "memcg_test_2"); 513 if (!parent[2]) 514 goto cleanup; 515 516 if (cg_create(parent[0])) 517 goto cleanup; 518 519 if (cg_read_long(parent[0], attribute)) { 520 /* No memory.min on older kernels is fine */ 521 if (min) 522 ret = KSFT_SKIP; 523 goto cleanup; 524 } 525 526 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 527 goto cleanup; 528 529 if (cg_write(parent[0], "memory.max", "200M")) 530 goto cleanup; 531 532 if (cg_write(parent[0], "memory.swap.max", "0")) 533 goto cleanup; 534 535 if (cg_create(parent[1])) 536 goto cleanup; 537 538 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 539 goto cleanup; 540 541 if (cg_create(parent[2])) 542 goto cleanup; 543 544 for (i = 0; i < ARRAY_SIZE(children); i++) { 545 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 546 if (!children[i]) 547 goto cleanup; 548 549 if (cg_create(children[i])) 550 goto cleanup; 551 552 if (i > 2) 553 continue; 554 555 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 556 (void *)(long)fd); 557 } 558 559 if (cg_write(parent[1], attribute, "50M")) 560 goto cleanup; 561 if (cg_write(children[0], attribute, "75M")) 562 goto cleanup; 563 if (cg_write(children[1], attribute, "25M")) 564 goto cleanup; 565 if (cg_write(children[2], attribute, "0")) 566 goto cleanup; 567 if (cg_write(children[3], attribute, "500M")) 568 goto cleanup; 569 570 attempts = 0; 571 while (!values_close(cg_read_long(parent[1], "memory.current"), 572 MB(150), 3)) { 573 if (attempts++ > 5) 574 break; 575 sleep(1); 576 } 577 578 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 579 goto cleanup; 580 581 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 582 goto cleanup; 583 584 for (i = 0; i < ARRAY_SIZE(children); i++) 585 c[i] = cg_read_long(children[i], "memory.current"); 586 587 if (!values_close(c[0], MB(29), 15)) 588 goto cleanup; 589 590 if (!values_close(c[1], MB(21), 20)) 591 goto cleanup; 592 593 if (c[3] != 0) 594 goto cleanup; 595 596 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 597 if (min && !rc) 598 goto cleanup; 599 else if (!min && rc) { 600 fprintf(stderr, 601 "memory.low prevents from allocating anon memory\n"); 602 goto cleanup; 603 } 604 605 current = min ? MB(50) : MB(30); 606 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 607 goto cleanup; 608 609 if (!reclaim_until(children[0], MB(10))) 610 goto cleanup; 611 612 if (min) { 613 ret = KSFT_PASS; 614 goto cleanup; 615 } 616 617 /* 618 * Child 2 has memory.low=0, but some low protection may still be 619 * distributed down from its parent with memory.low=50M if cgroup2 620 * memory_recursiveprot mount option is enabled. Ignore the low 621 * event count in this case. 622 */ 623 for (i = 0; i < ARRAY_SIZE(children); i++) { 624 int ignore_low_events_index = has_recursiveprot ? 2 : -1; 625 int no_low_events_index = 1; 626 long low, oom; 627 628 oom = cg_read_key_long(children[i], "memory.events", "oom "); 629 low = cg_read_key_long(children[i], "memory.events", "low "); 630 631 if (oom) 632 goto cleanup; 633 if (i == ignore_low_events_index) 634 continue; 635 if (i <= no_low_events_index && low <= 0) 636 goto cleanup; 637 if (i > no_low_events_index && low) 638 goto cleanup; 639 640 } 641 642 ret = KSFT_PASS; 643 644 cleanup: 645 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 646 if (!children[i]) 647 continue; 648 649 cg_destroy(children[i]); 650 free(children[i]); 651 } 652 653 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 654 if (!parent[i]) 655 continue; 656 657 cg_destroy(parent[i]); 658 free(parent[i]); 659 } 660 close(fd); 661 return ret; 662 } 663 664 static int test_memcg_min(const char *root) 665 { 666 return test_memcg_protection(root, true); 667 } 668 669 static int test_memcg_low(const char *root) 670 { 671 return test_memcg_protection(root, false); 672 } 673 674 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 675 { 676 size_t size = MB(50); 677 int ret = -1; 678 long current, high, max; 679 int fd; 680 681 high = cg_read_long(cgroup, "memory.high"); 682 max = cg_read_long(cgroup, "memory.max"); 683 if (high != MB(30) && max != MB(30)) 684 return -1; 685 686 fd = get_temp_fd(); 687 if (fd < 0) 688 return -1; 689 690 if (alloc_pagecache(fd, size)) 691 goto cleanup; 692 693 current = cg_read_long(cgroup, "memory.current"); 694 if (!values_close(current, MB(30), 5)) 695 goto cleanup; 696 697 ret = 0; 698 699 cleanup: 700 close(fd); 701 return ret; 702 703 } 704 705 /* 706 * This test checks that memory.high limits the amount of 707 * memory which can be consumed by either anonymous memory 708 * or pagecache. 709 */ 710 static int test_memcg_high(const char *root) 711 { 712 int ret = KSFT_FAIL; 713 char *memcg; 714 long high; 715 716 memcg = cg_name(root, "memcg_test"); 717 if (!memcg) 718 goto cleanup; 719 720 if (cg_create(memcg)) 721 goto cleanup; 722 723 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 724 goto cleanup; 725 726 if (cg_write(memcg, "memory.swap.max", "0")) 727 goto cleanup; 728 729 if (cg_write(memcg, "memory.high", "30M")) 730 goto cleanup; 731 732 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 733 goto cleanup; 734 735 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 736 goto cleanup; 737 738 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 739 goto cleanup; 740 741 high = cg_read_key_long(memcg, "memory.events", "high "); 742 if (high <= 0) 743 goto cleanup; 744 745 ret = KSFT_PASS; 746 747 cleanup: 748 cg_destroy(memcg); 749 free(memcg); 750 751 return ret; 752 } 753 754 static int alloc_anon_mlock(const char *cgroup, void *arg) 755 { 756 size_t size = (size_t)arg; 757 void *buf; 758 759 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 760 0, 0); 761 if (buf == MAP_FAILED) 762 return -1; 763 764 mlock(buf, size); 765 munmap(buf, size); 766 return 0; 767 } 768 769 /* 770 * This test checks that memory.high is able to throttle big single shot 771 * allocation i.e. large allocation within one kernel entry. 772 */ 773 static int test_memcg_high_sync(const char *root) 774 { 775 int ret = KSFT_FAIL, pid, fd = -1; 776 char *memcg; 777 long pre_high, pre_max; 778 long post_high, post_max; 779 780 memcg = cg_name(root, "memcg_test"); 781 if (!memcg) 782 goto cleanup; 783 784 if (cg_create(memcg)) 785 goto cleanup; 786 787 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 788 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 789 if (pre_high < 0 || pre_max < 0) 790 goto cleanup; 791 792 if (cg_write(memcg, "memory.swap.max", "0")) 793 goto cleanup; 794 795 if (cg_write(memcg, "memory.high", "30M")) 796 goto cleanup; 797 798 if (cg_write(memcg, "memory.max", "140M")) 799 goto cleanup; 800 801 fd = memcg_prepare_for_wait(memcg); 802 if (fd < 0) 803 goto cleanup; 804 805 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 806 if (pid < 0) 807 goto cleanup; 808 809 cg_wait_for(fd); 810 811 post_high = cg_read_key_long(memcg, "memory.events", "high "); 812 post_max = cg_read_key_long(memcg, "memory.events", "max "); 813 if (post_high < 0 || post_max < 0) 814 goto cleanup; 815 816 if (pre_high == post_high || pre_max != post_max) 817 goto cleanup; 818 819 ret = KSFT_PASS; 820 821 cleanup: 822 if (fd >= 0) 823 close(fd); 824 cg_destroy(memcg); 825 free(memcg); 826 827 return ret; 828 } 829 830 /* 831 * This test checks that memory.max limits the amount of 832 * memory which can be consumed by either anonymous memory 833 * or pagecache. 834 */ 835 static int test_memcg_max(const char *root) 836 { 837 int ret = KSFT_FAIL; 838 char *memcg; 839 long current, max; 840 841 memcg = cg_name(root, "memcg_test"); 842 if (!memcg) 843 goto cleanup; 844 845 if (cg_create(memcg)) 846 goto cleanup; 847 848 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 849 goto cleanup; 850 851 if (cg_write(memcg, "memory.swap.max", "0")) 852 goto cleanup; 853 854 if (cg_write(memcg, "memory.max", "30M")) 855 goto cleanup; 856 857 /* Should be killed by OOM killer */ 858 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 859 goto cleanup; 860 861 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 862 goto cleanup; 863 864 current = cg_read_long(memcg, "memory.current"); 865 if (current > MB(30) || !current) 866 goto cleanup; 867 868 max = cg_read_key_long(memcg, "memory.events", "max "); 869 if (max <= 0) 870 goto cleanup; 871 872 ret = KSFT_PASS; 873 874 cleanup: 875 cg_destroy(memcg); 876 free(memcg); 877 878 return ret; 879 } 880 881 /* 882 * Reclaim from @memcg until usage reaches @goal by writing to 883 * memory.reclaim. 884 * 885 * This function will return false if the usage is already below the 886 * goal. 887 * 888 * This function assumes that writing to memory.reclaim is the only 889 * source of change in memory.current (no concurrent allocations or 890 * reclaim). 891 * 892 * This function makes sure memory.reclaim is sane. It will return 893 * false if memory.reclaim's error codes do not make sense, even if 894 * the usage goal was satisfied. 895 */ 896 static bool reclaim_until(const char *memcg, long goal) 897 { 898 char buf[64]; 899 int retries, err; 900 long current, to_reclaim; 901 bool reclaimed = false; 902 903 for (retries = 5; retries > 0; retries--) { 904 current = cg_read_long(memcg, "memory.current"); 905 906 if (current < goal || values_close(current, goal, 3)) 907 break; 908 /* Did memory.reclaim return 0 incorrectly? */ 909 else if (reclaimed) 910 return false; 911 912 to_reclaim = current - goal; 913 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 914 err = cg_write(memcg, "memory.reclaim", buf); 915 if (!err) 916 reclaimed = true; 917 else if (err != -EAGAIN) 918 return false; 919 } 920 return reclaimed; 921 } 922 923 /* 924 * This test checks that memory.reclaim reclaims the given 925 * amount of memory (from both anon and file, if possible). 926 */ 927 static int test_memcg_reclaim(const char *root) 928 { 929 int ret = KSFT_FAIL; 930 int fd = -1; 931 int retries; 932 char *memcg; 933 long current, expected_usage; 934 935 memcg = cg_name(root, "memcg_test"); 936 if (!memcg) 937 goto cleanup; 938 939 if (cg_create(memcg)) 940 goto cleanup; 941 942 current = cg_read_long(memcg, "memory.current"); 943 if (current != 0) 944 goto cleanup; 945 946 fd = get_temp_fd(); 947 if (fd < 0) 948 goto cleanup; 949 950 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 951 952 /* 953 * If swap is enabled, try to reclaim from both anon and file, else try 954 * to reclaim from file only. 955 */ 956 if (is_swap_enabled()) { 957 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 958 expected_usage = MB(100); 959 } else 960 expected_usage = MB(50); 961 962 /* 963 * Wait until current usage reaches the expected usage (or we run out of 964 * retries). 965 */ 966 retries = 5; 967 while (!values_close(cg_read_long(memcg, "memory.current"), 968 expected_usage, 10)) { 969 if (retries--) { 970 sleep(1); 971 continue; 972 } else { 973 fprintf(stderr, 974 "failed to allocate %ld for memcg reclaim test\n", 975 expected_usage); 976 goto cleanup; 977 } 978 } 979 980 /* 981 * Reclaim until current reaches 30M, this makes sure we hit both anon 982 * and file if swap is enabled. 983 */ 984 if (!reclaim_until(memcg, MB(30))) 985 goto cleanup; 986 987 ret = KSFT_PASS; 988 cleanup: 989 cg_destroy(memcg); 990 free(memcg); 991 close(fd); 992 993 return ret; 994 } 995 996 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 997 { 998 long mem_max = (long)arg; 999 size_t size = MB(50); 1000 char *buf; 1001 long mem_current, swap_current; 1002 int ret = -1; 1003 1004 buf = alloc_and_populate_anon(size); 1005 if (!buf) 1006 return -1; 1007 1008 mem_current = cg_read_long(cgroup, "memory.current"); 1009 if (!mem_current || !values_close(mem_current, mem_max, 3)) 1010 goto cleanup; 1011 1012 swap_current = cg_read_long(cgroup, "memory.swap.current"); 1013 if (!swap_current || 1014 !values_close(mem_current + swap_current, size, 3)) 1015 goto cleanup; 1016 1017 ret = 0; 1018 cleanup: 1019 free(buf); 1020 return ret; 1021 } 1022 1023 /* 1024 * This test checks that memory.swap.max limits the amount of 1025 * anonymous memory which can be swapped out. Additionally, it verifies that 1026 * memory.swap.peak reflects the high watermark and can be reset. 1027 */ 1028 static int test_memcg_swap_max_peak(const char *root) 1029 { 1030 int ret = KSFT_FAIL; 1031 char *memcg; 1032 long max, peak; 1033 struct stat ss; 1034 int swap_peak_fd = -1, mem_peak_fd = -1; 1035 1036 /* any non-empty string resets */ 1037 static const char reset_string[] = "foobarbaz"; 1038 1039 if (!is_swap_enabled()) 1040 return KSFT_SKIP; 1041 1042 memcg = cg_name(root, "memcg_test"); 1043 if (!memcg) 1044 goto cleanup; 1045 1046 if (cg_create(memcg)) 1047 goto cleanup; 1048 1049 if (cg_read_long(memcg, "memory.swap.current")) { 1050 ret = KSFT_SKIP; 1051 goto cleanup; 1052 } 1053 1054 swap_peak_fd = cg_open(memcg, "memory.swap.peak", 1055 O_RDWR | O_APPEND | O_CLOEXEC); 1056 1057 if (swap_peak_fd == -1) { 1058 if (errno == ENOENT) 1059 ret = KSFT_SKIP; 1060 goto cleanup; 1061 } 1062 1063 /* 1064 * Before we try to use memory.swap.peak's fd, try to figure out 1065 * whether this kernel supports writing to that file in the first 1066 * place. (by checking the writable bit on the file's st_mode) 1067 */ 1068 if (fstat(swap_peak_fd, &ss)) 1069 goto cleanup; 1070 1071 if ((ss.st_mode & S_IWUSR) == 0) { 1072 ret = KSFT_SKIP; 1073 goto cleanup; 1074 } 1075 1076 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 1077 1078 if (mem_peak_fd == -1) 1079 goto cleanup; 1080 1081 if (cg_read_long(memcg, "memory.swap.peak")) 1082 goto cleanup; 1083 1084 if (cg_read_long_fd(swap_peak_fd)) 1085 goto cleanup; 1086 1087 /* switch the swap and mem fds into local-peak tracking mode*/ 1088 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1089 1090 if (peak_reset != sizeof(reset_string)) 1091 goto cleanup; 1092 1093 if (cg_read_long_fd(swap_peak_fd)) 1094 goto cleanup; 1095 1096 if (cg_read_long(memcg, "memory.peak")) 1097 goto cleanup; 1098 1099 if (cg_read_long_fd(mem_peak_fd)) 1100 goto cleanup; 1101 1102 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1103 if (peak_reset != sizeof(reset_string)) 1104 goto cleanup; 1105 1106 if (cg_read_long_fd(mem_peak_fd)) 1107 goto cleanup; 1108 1109 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 1110 goto cleanup; 1111 1112 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 1113 goto cleanup; 1114 1115 if (cg_write(memcg, "memory.swap.max", "30M")) 1116 goto cleanup; 1117 1118 if (cg_write(memcg, "memory.max", "30M")) 1119 goto cleanup; 1120 1121 /* Should be killed by OOM killer */ 1122 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1123 goto cleanup; 1124 1125 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1126 goto cleanup; 1127 1128 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1129 goto cleanup; 1130 1131 peak = cg_read_long(memcg, "memory.peak"); 1132 if (peak < MB(29)) 1133 goto cleanup; 1134 1135 peak = cg_read_long(memcg, "memory.swap.peak"); 1136 if (peak < MB(29)) 1137 goto cleanup; 1138 1139 peak = cg_read_long_fd(mem_peak_fd); 1140 if (peak < MB(29)) 1141 goto cleanup; 1142 1143 peak = cg_read_long_fd(swap_peak_fd); 1144 if (peak < MB(29)) 1145 goto cleanup; 1146 1147 /* 1148 * open, reset and close the peak swap on another FD to make sure 1149 * multiple extant fds don't corrupt the linked-list 1150 */ 1151 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 1152 if (peak_reset) 1153 goto cleanup; 1154 1155 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 1156 if (peak_reset) 1157 goto cleanup; 1158 1159 /* actually reset on the fds */ 1160 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1161 if (peak_reset != sizeof(reset_string)) 1162 goto cleanup; 1163 1164 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1165 if (peak_reset != sizeof(reset_string)) 1166 goto cleanup; 1167 1168 peak = cg_read_long_fd(swap_peak_fd); 1169 if (peak > MB(10)) 1170 goto cleanup; 1171 1172 /* 1173 * The cgroup is now empty, but there may be a page or two associated 1174 * with the open FD accounted to it. 1175 */ 1176 peak = cg_read_long_fd(mem_peak_fd); 1177 if (peak > MB(1)) 1178 goto cleanup; 1179 1180 if (cg_read_long(memcg, "memory.peak") < MB(29)) 1181 goto cleanup; 1182 1183 if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 1184 goto cleanup; 1185 1186 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1187 goto cleanup; 1188 1189 max = cg_read_key_long(memcg, "memory.events", "max "); 1190 if (max <= 0) 1191 goto cleanup; 1192 1193 peak = cg_read_long(memcg, "memory.peak"); 1194 if (peak < MB(29)) 1195 goto cleanup; 1196 1197 peak = cg_read_long(memcg, "memory.swap.peak"); 1198 if (peak < MB(29)) 1199 goto cleanup; 1200 1201 peak = cg_read_long_fd(mem_peak_fd); 1202 if (peak < MB(29)) 1203 goto cleanup; 1204 1205 peak = cg_read_long_fd(swap_peak_fd); 1206 if (peak < MB(19)) 1207 goto cleanup; 1208 1209 ret = KSFT_PASS; 1210 1211 cleanup: 1212 if (mem_peak_fd != -1 && close(mem_peak_fd)) 1213 ret = KSFT_FAIL; 1214 if (swap_peak_fd != -1 && close(swap_peak_fd)) 1215 ret = KSFT_FAIL; 1216 cg_destroy(memcg); 1217 free(memcg); 1218 1219 return ret; 1220 } 1221 1222 /* 1223 * This test disables swapping and tries to allocate anonymous memory 1224 * up to OOM. Then it checks for oom and oom_kill events in 1225 * memory.events. 1226 */ 1227 static int test_memcg_oom_events(const char *root) 1228 { 1229 int ret = KSFT_FAIL; 1230 char *memcg; 1231 1232 memcg = cg_name(root, "memcg_test"); 1233 if (!memcg) 1234 goto cleanup; 1235 1236 if (cg_create(memcg)) 1237 goto cleanup; 1238 1239 if (cg_write(memcg, "memory.max", "30M")) 1240 goto cleanup; 1241 1242 if (cg_write(memcg, "memory.swap.max", "0")) 1243 goto cleanup; 1244 1245 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1246 goto cleanup; 1247 1248 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 1249 goto cleanup; 1250 1251 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1252 goto cleanup; 1253 1254 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1255 goto cleanup; 1256 1257 ret = KSFT_PASS; 1258 1259 cleanup: 1260 cg_destroy(memcg); 1261 free(memcg); 1262 1263 return ret; 1264 } 1265 1266 struct tcp_server_args { 1267 unsigned short port; 1268 int ctl[2]; 1269 }; 1270 1271 static int tcp_server(const char *cgroup, void *arg) 1272 { 1273 struct tcp_server_args *srv_args = arg; 1274 struct sockaddr_in6 saddr = { 0 }; 1275 socklen_t slen = sizeof(saddr); 1276 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1277 1278 close(srv_args->ctl[0]); 1279 ctl_fd = srv_args->ctl[1]; 1280 1281 saddr.sin6_family = AF_INET6; 1282 saddr.sin6_addr = in6addr_any; 1283 saddr.sin6_port = htons(srv_args->port); 1284 1285 sk = socket(AF_INET6, SOCK_STREAM, 0); 1286 if (sk < 0) { 1287 /* Pass back errno to the ctl_fd */ 1288 write(ctl_fd, &errno, sizeof(errno)); 1289 return ret; 1290 } 1291 1292 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1293 goto cleanup; 1294 1295 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1296 write(ctl_fd, &errno, sizeof(errno)); 1297 goto cleanup; 1298 } 1299 1300 if (listen(sk, 1)) 1301 goto cleanup; 1302 1303 ret = 0; 1304 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1305 ret = -1; 1306 goto cleanup; 1307 } 1308 1309 client_sk = accept(sk, NULL, NULL); 1310 if (client_sk < 0) 1311 goto cleanup; 1312 1313 ret = -1; 1314 for (;;) { 1315 uint8_t buf[0x100000]; 1316 1317 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1318 if (errno == ECONNRESET) 1319 ret = 0; 1320 break; 1321 } 1322 } 1323 1324 close(client_sk); 1325 1326 cleanup: 1327 close(sk); 1328 return ret; 1329 } 1330 1331 static int tcp_client(const char *cgroup, unsigned short port) 1332 { 1333 const char server[] = "localhost"; 1334 struct addrinfo *ai; 1335 char servport[6]; 1336 int retries = 0x10; /* nice round number */ 1337 int sk, ret; 1338 long allocated; 1339 1340 allocated = cg_read_long(cgroup, "memory.current"); 1341 snprintf(servport, sizeof(servport), "%hd", port); 1342 ret = getaddrinfo(server, servport, NULL, &ai); 1343 if (ret) 1344 return ret; 1345 1346 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1347 if (sk < 0) 1348 goto free_ainfo; 1349 1350 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1351 if (ret < 0) 1352 goto close_sk; 1353 1354 ret = KSFT_FAIL; 1355 while (retries--) { 1356 uint8_t buf[0x100000]; 1357 long current, sock; 1358 1359 if (read(sk, buf, sizeof(buf)) <= 0) 1360 goto close_sk; 1361 1362 current = cg_read_long(cgroup, "memory.current"); 1363 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1364 1365 if (current < 0 || sock < 0) 1366 goto close_sk; 1367 1368 /* exclude the memory not related to socket connection */ 1369 if (values_close(current - allocated, sock, 10)) { 1370 ret = KSFT_PASS; 1371 break; 1372 } 1373 } 1374 1375 close_sk: 1376 close(sk); 1377 free_ainfo: 1378 freeaddrinfo(ai); 1379 return ret; 1380 } 1381 1382 /* 1383 * This test checks socket memory accounting. 1384 * The test forks a TCP server listens on a random port between 1000 1385 * and 61000. Once it gets a client connection, it starts writing to 1386 * its socket. 1387 * The TCP client interleaves reads from the socket with check whether 1388 * memory.current and memory.stat.sock are similar. 1389 */ 1390 static int test_memcg_sock(const char *root) 1391 { 1392 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1393 unsigned short port; 1394 char *memcg; 1395 long sock_post = -1; 1396 1397 memcg = cg_name(root, "memcg_test"); 1398 if (!memcg) 1399 goto cleanup; 1400 1401 if (cg_create(memcg)) 1402 goto cleanup; 1403 1404 while (bind_retries--) { 1405 struct tcp_server_args args; 1406 1407 if (pipe(args.ctl)) 1408 goto cleanup; 1409 1410 port = args.port = 1000 + rand() % 60000; 1411 1412 pid = cg_run_nowait(memcg, tcp_server, &args); 1413 if (pid < 0) 1414 goto cleanup; 1415 1416 close(args.ctl[1]); 1417 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1418 goto cleanup; 1419 close(args.ctl[0]); 1420 1421 /* Skip if address family not supported by protocol */ 1422 if (err == EAFNOSUPPORT) { 1423 ret = KSFT_SKIP; 1424 goto cleanup; 1425 } 1426 1427 if (!err) 1428 break; 1429 if (err != EADDRINUSE) 1430 goto cleanup; 1431 1432 waitpid(pid, NULL, 0); 1433 } 1434 1435 if (err == EADDRINUSE) { 1436 ret = KSFT_SKIP; 1437 goto cleanup; 1438 } 1439 1440 if (tcp_client(memcg, port) != KSFT_PASS) 1441 goto cleanup; 1442 1443 waitpid(pid, &err, 0); 1444 if (WEXITSTATUS(err)) 1445 goto cleanup; 1446 1447 if (cg_read_long(memcg, "memory.current") < 0) 1448 goto cleanup; 1449 1450 /* 1451 * memory.stat is updated asynchronously via the memcg rstat 1452 * flushing worker, which runs periodically (every 2 seconds, 1453 * see FLUSH_TIME). On a busy system, the "sock " counter may 1454 * stay non-zero for a short period of time after the TCP 1455 * connection is closed and all socket memory has been 1456 * uncharged. 1457 * 1458 * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some 1459 * scheduling slack) and require that the "sock " counter 1460 * eventually drops to zero. 1461 */ 1462 sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0, 1463 MEMCG_SOCKSTAT_WAIT_RETRIES, 1464 DEFAULT_WAIT_INTERVAL_US); 1465 if (sock_post) 1466 goto cleanup; 1467 1468 ret = KSFT_PASS; 1469 1470 cleanup: 1471 cg_destroy(memcg); 1472 free(memcg); 1473 1474 return ret; 1475 } 1476 1477 /* 1478 * This test disables swapping and tries to allocate anonymous memory 1479 * up to OOM with memory.group.oom set. Then it checks that all 1480 * processes in the leaf were killed. It also checks that oom_events 1481 * were propagated to the parent level. 1482 */ 1483 static int test_memcg_oom_group_leaf_events(const char *root) 1484 { 1485 int ret = KSFT_FAIL; 1486 char *parent, *child; 1487 long parent_oom_events; 1488 1489 parent = cg_name(root, "memcg_test_0"); 1490 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1491 1492 if (!parent || !child) 1493 goto cleanup; 1494 1495 if (cg_create(parent)) 1496 goto cleanup; 1497 1498 if (cg_create(child)) 1499 goto cleanup; 1500 1501 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1502 goto cleanup; 1503 1504 if (cg_write(child, "memory.max", "50M")) 1505 goto cleanup; 1506 1507 if (cg_write(child, "memory.swap.max", "0")) 1508 goto cleanup; 1509 1510 if (cg_write(child, "memory.oom.group", "1")) 1511 goto cleanup; 1512 1513 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1514 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1515 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1516 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1517 goto cleanup; 1518 1519 if (cg_test_proc_killed(child)) 1520 goto cleanup; 1521 1522 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1523 goto cleanup; 1524 1525 parent_oom_events = cg_read_key_long( 1526 parent, "memory.events", "oom_kill "); 1527 /* 1528 * If memory_localevents is not enabled (the default), the parent should 1529 * count OOM events in its children groups. Otherwise, it should not 1530 * have observed any events. 1531 */ 1532 if (has_localevents && parent_oom_events != 0) 1533 goto cleanup; 1534 else if (!has_localevents && parent_oom_events <= 0) 1535 goto cleanup; 1536 1537 ret = KSFT_PASS; 1538 1539 cleanup: 1540 if (child) 1541 cg_destroy(child); 1542 if (parent) 1543 cg_destroy(parent); 1544 free(child); 1545 free(parent); 1546 1547 return ret; 1548 } 1549 1550 /* 1551 * This test disables swapping and tries to allocate anonymous memory 1552 * up to OOM with memory.group.oom set. Then it checks that all 1553 * processes in the parent and leaf were killed. 1554 */ 1555 static int test_memcg_oom_group_parent_events(const char *root) 1556 { 1557 int ret = KSFT_FAIL; 1558 char *parent, *child; 1559 1560 parent = cg_name(root, "memcg_test_0"); 1561 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1562 1563 if (!parent || !child) 1564 goto cleanup; 1565 1566 if (cg_create(parent)) 1567 goto cleanup; 1568 1569 if (cg_create(child)) 1570 goto cleanup; 1571 1572 if (cg_write(parent, "memory.max", "80M")) 1573 goto cleanup; 1574 1575 if (cg_write(parent, "memory.swap.max", "0")) 1576 goto cleanup; 1577 1578 if (cg_write(parent, "memory.oom.group", "1")) 1579 goto cleanup; 1580 1581 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1582 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1583 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1584 1585 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1586 goto cleanup; 1587 1588 if (cg_test_proc_killed(child)) 1589 goto cleanup; 1590 if (cg_test_proc_killed(parent)) 1591 goto cleanup; 1592 1593 ret = KSFT_PASS; 1594 1595 cleanup: 1596 if (child) 1597 cg_destroy(child); 1598 if (parent) 1599 cg_destroy(parent); 1600 free(child); 1601 free(parent); 1602 1603 return ret; 1604 } 1605 1606 /* 1607 * This test disables swapping and tries to allocate anonymous memory 1608 * up to OOM with memory.group.oom set. Then it checks that all 1609 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1610 */ 1611 static int test_memcg_oom_group_score_events(const char *root) 1612 { 1613 int ret = KSFT_FAIL; 1614 char *memcg; 1615 int safe_pid; 1616 1617 memcg = cg_name(root, "memcg_test_0"); 1618 1619 if (!memcg) 1620 goto cleanup; 1621 1622 if (cg_create(memcg)) 1623 goto cleanup; 1624 1625 if (cg_write(memcg, "memory.max", "50M")) 1626 goto cleanup; 1627 1628 if (cg_write(memcg, "memory.swap.max", "0")) 1629 goto cleanup; 1630 1631 if (cg_write(memcg, "memory.oom.group", "1")) 1632 goto cleanup; 1633 1634 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1635 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1636 goto cleanup; 1637 1638 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1639 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1640 goto cleanup; 1641 1642 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1643 goto cleanup; 1644 1645 if (kill(safe_pid, SIGKILL)) 1646 goto cleanup; 1647 1648 ret = KSFT_PASS; 1649 1650 cleanup: 1651 if (memcg) 1652 cg_destroy(memcg); 1653 free(memcg); 1654 1655 return ret; 1656 } 1657 1658 static int read_event(int inotify_fd, int expected_event, int expected_wd) 1659 { 1660 struct inotify_event event; 1661 ssize_t len = 0; 1662 1663 len = read(inotify_fd, &event, sizeof(event)); 1664 if (len < (ssize_t)sizeof(event)) 1665 return -1; 1666 1667 if (event.mask != expected_event || event.wd != expected_wd) { 1668 fprintf(stderr, 1669 "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n", 1670 event.mask, expected_event, event.wd, expected_wd); 1671 return -1; 1672 } 1673 1674 return 0; 1675 } 1676 1677 static int test_memcg_inotify_delete_file(const char *root) 1678 { 1679 int ret = KSFT_FAIL; 1680 char *memcg = NULL; 1681 int fd, wd; 1682 1683 memcg = cg_name(root, "memcg_test_0"); 1684 1685 if (!memcg) 1686 goto cleanup; 1687 1688 if (cg_create(memcg)) 1689 goto cleanup; 1690 1691 fd = inotify_init1(0); 1692 if (fd == -1) 1693 goto cleanup; 1694 1695 wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF); 1696 if (wd == -1) 1697 goto cleanup; 1698 1699 if (cg_destroy(memcg)) 1700 goto cleanup; 1701 free(memcg); 1702 memcg = NULL; 1703 1704 if (read_event(fd, IN_DELETE_SELF, wd)) 1705 goto cleanup; 1706 1707 if (read_event(fd, IN_IGNORED, wd)) 1708 goto cleanup; 1709 1710 ret = KSFT_PASS; 1711 1712 cleanup: 1713 if (fd >= 0) 1714 close(fd); 1715 if (memcg) 1716 cg_destroy(memcg); 1717 free(memcg); 1718 1719 return ret; 1720 } 1721 1722 static int test_memcg_inotify_delete_dir(const char *root) 1723 { 1724 int ret = KSFT_FAIL; 1725 char *memcg = NULL; 1726 int fd, wd; 1727 1728 memcg = cg_name(root, "memcg_test_0"); 1729 1730 if (!memcg) 1731 goto cleanup; 1732 1733 if (cg_create(memcg)) 1734 goto cleanup; 1735 1736 fd = inotify_init1(0); 1737 if (fd == -1) 1738 goto cleanup; 1739 1740 wd = inotify_add_watch(fd, memcg, IN_DELETE_SELF); 1741 if (wd == -1) 1742 goto cleanup; 1743 1744 if (cg_destroy(memcg)) 1745 goto cleanup; 1746 free(memcg); 1747 memcg = NULL; 1748 1749 if (read_event(fd, IN_DELETE_SELF, wd)) 1750 goto cleanup; 1751 1752 if (read_event(fd, IN_IGNORED, wd)) 1753 goto cleanup; 1754 1755 ret = KSFT_PASS; 1756 1757 cleanup: 1758 if (fd >= 0) 1759 close(fd); 1760 if (memcg) 1761 cg_destroy(memcg); 1762 free(memcg); 1763 1764 return ret; 1765 } 1766 1767 #define T(x) { x, #x } 1768 struct memcg_test { 1769 int (*fn)(const char *root); 1770 const char *name; 1771 } tests[] = { 1772 T(test_memcg_subtree_control), 1773 T(test_memcg_current_peak), 1774 T(test_memcg_min), 1775 T(test_memcg_low), 1776 T(test_memcg_high), 1777 T(test_memcg_high_sync), 1778 T(test_memcg_max), 1779 T(test_memcg_reclaim), 1780 T(test_memcg_oom_events), 1781 T(test_memcg_swap_max_peak), 1782 T(test_memcg_sock), 1783 T(test_memcg_oom_group_leaf_events), 1784 T(test_memcg_oom_group_parent_events), 1785 T(test_memcg_oom_group_score_events), 1786 T(test_memcg_inotify_delete_file), 1787 T(test_memcg_inotify_delete_dir), 1788 }; 1789 #undef T 1790 1791 int main(int argc, char **argv) 1792 { 1793 char root[PATH_MAX]; 1794 int i, proc_status; 1795 1796 page_size = sysconf(_SC_PAGE_SIZE); 1797 if (page_size <= 0) 1798 page_size = BUF_SIZE; 1799 1800 ksft_print_header(); 1801 ksft_set_plan(ARRAY_SIZE(tests)); 1802 if (cg_find_unified_root(root, sizeof(root), NULL)) 1803 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1804 1805 /* 1806 * Check that memory controller is available: 1807 * memory is listed in cgroup.controllers 1808 */ 1809 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1810 ksft_exit_skip("memory controller isn't available\n"); 1811 1812 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1813 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1814 ksft_exit_skip("Failed to set memory controller\n"); 1815 1816 proc_status = proc_mount_contains("memory_recursiveprot"); 1817 if (proc_status < 0) 1818 ksft_exit_skip("Failed to query cgroup mount option\n"); 1819 has_recursiveprot = proc_status; 1820 1821 proc_status = proc_mount_contains("memory_localevents"); 1822 if (proc_status < 0) 1823 ksft_exit_skip("Failed to query cgroup mount option\n"); 1824 has_localevents = proc_status; 1825 1826 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1827 switch (tests[i].fn(root)) { 1828 case KSFT_PASS: 1829 ksft_test_result_pass("%s\n", tests[i].name); 1830 break; 1831 case KSFT_SKIP: 1832 ksft_test_result_skip("%s\n", tests[i].name); 1833 break; 1834 default: 1835 ksft_test_result_fail("%s\n", tests[i].name); 1836 break; 1837 } 1838 } 1839 1840 ksft_finished(); 1841 } 1842