1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/inotify.h> 14 #include <sys/socket.h> 15 #include <sys/wait.h> 16 #include <arpa/inet.h> 17 #include <netinet/in.h> 18 #include <netdb.h> 19 #include <errno.h> 20 #include <sys/mman.h> 21 22 #include "kselftest.h" 23 #include "cgroup_util.h" 24 25 #define MEMCG_SOCKSTAT_WAIT_RETRIES 30 26 27 static bool has_localevents; 28 static bool has_recursiveprot; 29 30 int get_temp_fd(void) 31 { 32 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 33 } 34 35 int alloc_pagecache(int fd, size_t size) 36 { 37 char buf[PAGE_SIZE]; 38 struct stat st; 39 int i; 40 41 if (fstat(fd, &st)) 42 goto cleanup; 43 44 size += st.st_size; 45 46 if (ftruncate(fd, size)) 47 goto cleanup; 48 49 for (i = 0; i < size; i += sizeof(buf)) 50 read(fd, buf, sizeof(buf)); 51 52 return 0; 53 54 cleanup: 55 return -1; 56 } 57 58 int alloc_anon(const char *cgroup, void *arg) 59 { 60 size_t size = (unsigned long)arg; 61 char *buf, *ptr; 62 63 buf = malloc(size); 64 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 65 *ptr = 0; 66 67 free(buf); 68 return 0; 69 } 70 71 int is_swap_enabled(void) 72 { 73 char buf[PAGE_SIZE]; 74 const char delim[] = "\n"; 75 int cnt = 0; 76 char *line; 77 78 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 79 return -1; 80 81 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 82 cnt++; 83 84 return cnt > 1; 85 } 86 87 int set_oom_adj_score(int pid, int score) 88 { 89 char path[PATH_MAX]; 90 int fd, len; 91 92 sprintf(path, "/proc/%d/oom_score_adj", pid); 93 94 fd = open(path, O_WRONLY | O_APPEND); 95 if (fd < 0) 96 return fd; 97 98 len = dprintf(fd, "%d", score); 99 if (len < 0) { 100 close(fd); 101 return len; 102 } 103 104 close(fd); 105 return 0; 106 } 107 108 /* 109 * This test creates two nested cgroups with and without enabling 110 * the memory controller. 111 */ 112 static int test_memcg_subtree_control(const char *root) 113 { 114 char *parent, *child, *parent2 = NULL, *child2 = NULL; 115 int ret = KSFT_FAIL; 116 char buf[PAGE_SIZE]; 117 118 /* Create two nested cgroups with the memory controller enabled */ 119 parent = cg_name(root, "memcg_test_0"); 120 child = cg_name(root, "memcg_test_0/memcg_test_1"); 121 if (!parent || !child) 122 goto cleanup_free; 123 124 if (cg_create(parent)) 125 goto cleanup_free; 126 127 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 128 goto cleanup_parent; 129 130 if (cg_create(child)) 131 goto cleanup_parent; 132 133 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 134 goto cleanup_child; 135 136 /* Create two nested cgroups without enabling memory controller */ 137 parent2 = cg_name(root, "memcg_test_1"); 138 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 139 if (!parent2 || !child2) 140 goto cleanup_free2; 141 142 if (cg_create(parent2)) 143 goto cleanup_free2; 144 145 if (cg_create(child2)) 146 goto cleanup_parent2; 147 148 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 149 goto cleanup_all; 150 151 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 152 goto cleanup_all; 153 154 ret = KSFT_PASS; 155 156 cleanup_all: 157 cg_destroy(child2); 158 cleanup_parent2: 159 cg_destroy(parent2); 160 cleanup_free2: 161 free(parent2); 162 free(child2); 163 cleanup_child: 164 cg_destroy(child); 165 cleanup_parent: 166 cg_destroy(parent); 167 cleanup_free: 168 free(parent); 169 free(child); 170 171 return ret; 172 } 173 174 static int alloc_anon_50M_check(const char *cgroup, void *arg) 175 { 176 size_t size = MB(50); 177 char *buf, *ptr; 178 long anon, current; 179 int ret = -1; 180 181 buf = malloc(size); 182 if (buf == NULL) { 183 fprintf(stderr, "malloc() failed\n"); 184 return -1; 185 } 186 187 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 188 *ptr = 0; 189 190 current = cg_read_long(cgroup, "memory.current"); 191 if (current < size) 192 goto cleanup; 193 194 if (!values_close(size, current, 3)) 195 goto cleanup; 196 197 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 198 if (anon < 0) 199 goto cleanup; 200 201 if (!values_close(anon, current, 3)) 202 goto cleanup; 203 204 ret = 0; 205 cleanup: 206 free(buf); 207 return ret; 208 } 209 210 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 211 { 212 size_t size = MB(50); 213 int ret = -1; 214 long current, file; 215 int fd; 216 217 fd = get_temp_fd(); 218 if (fd < 0) 219 return -1; 220 221 if (alloc_pagecache(fd, size)) 222 goto cleanup; 223 224 current = cg_read_long(cgroup, "memory.current"); 225 if (current < size) 226 goto cleanup; 227 228 file = cg_read_key_long(cgroup, "memory.stat", "file "); 229 if (file < 0) 230 goto cleanup; 231 232 if (!values_close(file, current, 10)) 233 goto cleanup; 234 235 ret = 0; 236 237 cleanup: 238 close(fd); 239 return ret; 240 } 241 242 /* 243 * This test create a memory cgroup, allocates 244 * some anonymous memory and some pagecache 245 * and checks memory.current, memory.peak, and some memory.stat values. 246 */ 247 static int test_memcg_current_peak(const char *root) 248 { 249 int ret = KSFT_FAIL; 250 long current, peak, peak_reset; 251 char *memcg; 252 bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 253 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 254 struct stat ss; 255 256 memcg = cg_name(root, "memcg_test"); 257 if (!memcg) 258 goto cleanup; 259 260 if (cg_create(memcg)) 261 goto cleanup; 262 263 current = cg_read_long(memcg, "memory.current"); 264 if (current != 0) 265 goto cleanup; 266 267 peak = cg_read_long(memcg, "memory.peak"); 268 if (peak != 0) 269 goto cleanup; 270 271 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 272 goto cleanup; 273 274 peak = cg_read_long(memcg, "memory.peak"); 275 if (peak < MB(50)) 276 goto cleanup; 277 278 /* 279 * We'll open a few FDs for the same memory.peak file to exercise the free-path 280 * We need at least three to be closed in a different order than writes occurred to test 281 * the linked-list handling. 282 */ 283 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 284 285 if (peak_fd == -1) { 286 if (errno == ENOENT) 287 ret = KSFT_SKIP; 288 goto cleanup; 289 } 290 291 /* 292 * Before we try to use memory.peak's fd, try to figure out whether 293 * this kernel supports writing to that file in the first place. (by 294 * checking the writable bit on the file's st_mode) 295 */ 296 if (fstat(peak_fd, &ss)) 297 goto cleanup; 298 299 if ((ss.st_mode & S_IWUSR) == 0) { 300 ret = KSFT_SKIP; 301 goto cleanup; 302 } 303 304 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 305 306 if (peak_fd2 == -1) 307 goto cleanup; 308 309 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 310 311 if (peak_fd3 == -1) 312 goto cleanup; 313 314 /* any non-empty string resets, but make it clear */ 315 static const char reset_string[] = "reset\n"; 316 317 peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 318 if (peak_reset != sizeof(reset_string)) 319 goto cleanup; 320 321 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 322 if (peak_reset != sizeof(reset_string)) 323 goto cleanup; 324 325 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 326 if (peak_reset != sizeof(reset_string)) 327 goto cleanup; 328 329 /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 330 peak = cg_read_long(memcg, "memory.peak"); 331 if (peak < MB(50)) 332 goto cleanup; 333 334 fd2_closed = true; 335 if (close(peak_fd2)) 336 goto cleanup; 337 338 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 339 340 if (peak_fd4 == -1) 341 goto cleanup; 342 343 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 344 if (peak_reset != sizeof(reset_string)) 345 goto cleanup; 346 347 peak = cg_read_long_fd(peak_fd); 348 if (peak > MB(30) || peak < 0) 349 goto cleanup; 350 351 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 352 goto cleanup; 353 354 peak = cg_read_long(memcg, "memory.peak"); 355 if (peak < MB(50)) 356 goto cleanup; 357 358 /* Make sure everything is back to normal */ 359 peak = cg_read_long_fd(peak_fd); 360 if (peak < MB(50)) 361 goto cleanup; 362 363 peak = cg_read_long_fd(peak_fd4); 364 if (peak < MB(50)) 365 goto cleanup; 366 367 fd3_closed = true; 368 if (close(peak_fd3)) 369 goto cleanup; 370 371 fd4_closed = true; 372 if (close(peak_fd4)) 373 goto cleanup; 374 375 ret = KSFT_PASS; 376 377 cleanup: 378 close(peak_fd); 379 if (!fd2_closed) 380 close(peak_fd2); 381 if (!fd3_closed) 382 close(peak_fd3); 383 if (!fd4_closed) 384 close(peak_fd4); 385 cg_destroy(memcg); 386 free(memcg); 387 388 return ret; 389 } 390 391 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 392 { 393 int fd = (long)arg; 394 int ppid = getppid(); 395 396 if (alloc_pagecache(fd, MB(50))) 397 return -1; 398 399 while (getppid() == ppid) 400 sleep(1); 401 402 return 0; 403 } 404 405 static int alloc_anon_noexit(const char *cgroup, void *arg) 406 { 407 int ppid = getppid(); 408 size_t size = (unsigned long)arg; 409 char *buf, *ptr; 410 411 buf = malloc(size); 412 if (buf == NULL) { 413 fprintf(stderr, "malloc() failed\n"); 414 return -1; 415 } 416 417 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 418 *ptr = 0; 419 420 while (getppid() == ppid) 421 sleep(1); 422 423 free(buf); 424 return 0; 425 } 426 427 /* 428 * Wait until processes are killed asynchronously by the OOM killer 429 * If we exceed a timeout, fail. 430 */ 431 static int cg_test_proc_killed(const char *cgroup) 432 { 433 int limit; 434 435 for (limit = 10; limit > 0; limit--) { 436 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 437 return 0; 438 439 usleep(100000); 440 } 441 return -1; 442 } 443 444 static bool reclaim_until(const char *memcg, long goal); 445 446 /* 447 * First, this test creates the following hierarchy: 448 * A memory.min = 0, memory.max = 200M 449 * A/B memory.min = 50M 450 * A/B/C memory.min = 75M, memory.current = 50M 451 * A/B/D memory.min = 25M, memory.current = 50M 452 * A/B/E memory.min = 0, memory.current = 50M 453 * A/B/F memory.min = 500M, memory.current = 0 454 * 455 * (or memory.low if we test soft protection) 456 * 457 * Usages are pagecache and the test keeps a running 458 * process in every leaf cgroup. 459 * Then it creates A/G and creates a significant 460 * memory pressure in A. 461 * 462 * Then it checks actual memory usages and expects that: 463 * A/B memory.current ~= 50M 464 * A/B/C memory.current ~= 29M [memory.events:low > 0] 465 * A/B/D memory.current ~= 21M [memory.events:low > 0] 466 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, 467 * undefined otherwise] 468 * A/B/F memory.current = 0 [memory.events:low == 0] 469 * (for origin of the numbers, see model in memcg_protection.m.) 470 * 471 * After that it tries to allocate more than there is 472 * unprotected memory in A available, and checks that: 473 * a) memory.min protects pagecache even in this case, 474 * b) memory.low allows reclaiming page cache with low events. 475 * 476 * Then we try to reclaim from A/B/C using memory.reclaim until its 477 * usage reaches 10M. 478 * This makes sure that: 479 * (a) We ignore the protection of the reclaim target memcg. 480 * (b) The previously calculated emin value (~29M) should be dismissed. 481 */ 482 static int test_memcg_protection(const char *root, bool min) 483 { 484 int ret = KSFT_FAIL, rc; 485 char *parent[3] = {NULL}; 486 char *children[4] = {NULL}; 487 const char *attribute = min ? "memory.min" : "memory.low"; 488 long c[4]; 489 long current; 490 int i, attempts; 491 int fd; 492 493 fd = get_temp_fd(); 494 if (fd < 0) 495 goto cleanup; 496 497 parent[0] = cg_name(root, "memcg_test_0"); 498 if (!parent[0]) 499 goto cleanup; 500 501 parent[1] = cg_name(parent[0], "memcg_test_1"); 502 if (!parent[1]) 503 goto cleanup; 504 505 parent[2] = cg_name(parent[0], "memcg_test_2"); 506 if (!parent[2]) 507 goto cleanup; 508 509 if (cg_create(parent[0])) 510 goto cleanup; 511 512 if (cg_read_long(parent[0], attribute)) { 513 /* No memory.min on older kernels is fine */ 514 if (min) 515 ret = KSFT_SKIP; 516 goto cleanup; 517 } 518 519 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 520 goto cleanup; 521 522 if (cg_write(parent[0], "memory.max", "200M")) 523 goto cleanup; 524 525 if (cg_write(parent[0], "memory.swap.max", "0")) 526 goto cleanup; 527 528 if (cg_create(parent[1])) 529 goto cleanup; 530 531 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 532 goto cleanup; 533 534 if (cg_create(parent[2])) 535 goto cleanup; 536 537 for (i = 0; i < ARRAY_SIZE(children); i++) { 538 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 539 if (!children[i]) 540 goto cleanup; 541 542 if (cg_create(children[i])) 543 goto cleanup; 544 545 if (i > 2) 546 continue; 547 548 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 549 (void *)(long)fd); 550 } 551 552 if (cg_write(parent[1], attribute, "50M")) 553 goto cleanup; 554 if (cg_write(children[0], attribute, "75M")) 555 goto cleanup; 556 if (cg_write(children[1], attribute, "25M")) 557 goto cleanup; 558 if (cg_write(children[2], attribute, "0")) 559 goto cleanup; 560 if (cg_write(children[3], attribute, "500M")) 561 goto cleanup; 562 563 attempts = 0; 564 while (!values_close(cg_read_long(parent[1], "memory.current"), 565 MB(150), 3)) { 566 if (attempts++ > 5) 567 break; 568 sleep(1); 569 } 570 571 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 572 goto cleanup; 573 574 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 575 goto cleanup; 576 577 for (i = 0; i < ARRAY_SIZE(children); i++) 578 c[i] = cg_read_long(children[i], "memory.current"); 579 580 if (!values_close(c[0], MB(29), 15)) 581 goto cleanup; 582 583 if (!values_close(c[1], MB(21), 20)) 584 goto cleanup; 585 586 if (c[3] != 0) 587 goto cleanup; 588 589 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 590 if (min && !rc) 591 goto cleanup; 592 else if (!min && rc) { 593 fprintf(stderr, 594 "memory.low prevents from allocating anon memory\n"); 595 goto cleanup; 596 } 597 598 current = min ? MB(50) : MB(30); 599 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 600 goto cleanup; 601 602 if (!reclaim_until(children[0], MB(10))) 603 goto cleanup; 604 605 if (min) { 606 ret = KSFT_PASS; 607 goto cleanup; 608 } 609 610 /* 611 * Child 2 has memory.low=0, but some low protection may still be 612 * distributed down from its parent with memory.low=50M if cgroup2 613 * memory_recursiveprot mount option is enabled. Ignore the low 614 * event count in this case. 615 */ 616 for (i = 0; i < ARRAY_SIZE(children); i++) { 617 int ignore_low_events_index = has_recursiveprot ? 2 : -1; 618 int no_low_events_index = 1; 619 long low, oom; 620 621 oom = cg_read_key_long(children[i], "memory.events", "oom "); 622 low = cg_read_key_long(children[i], "memory.events", "low "); 623 624 if (oom) 625 goto cleanup; 626 if (i == ignore_low_events_index) 627 continue; 628 if (i <= no_low_events_index && low <= 0) 629 goto cleanup; 630 if (i > no_low_events_index && low) 631 goto cleanup; 632 633 } 634 635 ret = KSFT_PASS; 636 637 cleanup: 638 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 639 if (!children[i]) 640 continue; 641 642 cg_destroy(children[i]); 643 free(children[i]); 644 } 645 646 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 647 if (!parent[i]) 648 continue; 649 650 cg_destroy(parent[i]); 651 free(parent[i]); 652 } 653 close(fd); 654 return ret; 655 } 656 657 static int test_memcg_min(const char *root) 658 { 659 return test_memcg_protection(root, true); 660 } 661 662 static int test_memcg_low(const char *root) 663 { 664 return test_memcg_protection(root, false); 665 } 666 667 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 668 { 669 size_t size = MB(50); 670 int ret = -1; 671 long current, high, max; 672 int fd; 673 674 high = cg_read_long(cgroup, "memory.high"); 675 max = cg_read_long(cgroup, "memory.max"); 676 if (high != MB(30) && max != MB(30)) 677 return -1; 678 679 fd = get_temp_fd(); 680 if (fd < 0) 681 return -1; 682 683 if (alloc_pagecache(fd, size)) 684 goto cleanup; 685 686 current = cg_read_long(cgroup, "memory.current"); 687 if (!values_close(current, MB(30), 5)) 688 goto cleanup; 689 690 ret = 0; 691 692 cleanup: 693 close(fd); 694 return ret; 695 696 } 697 698 /* 699 * This test checks that memory.high limits the amount of 700 * memory which can be consumed by either anonymous memory 701 * or pagecache. 702 */ 703 static int test_memcg_high(const char *root) 704 { 705 int ret = KSFT_FAIL; 706 char *memcg; 707 long high; 708 709 memcg = cg_name(root, "memcg_test"); 710 if (!memcg) 711 goto cleanup; 712 713 if (cg_create(memcg)) 714 goto cleanup; 715 716 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 717 goto cleanup; 718 719 if (cg_write(memcg, "memory.swap.max", "0")) 720 goto cleanup; 721 722 if (cg_write(memcg, "memory.high", "30M")) 723 goto cleanup; 724 725 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 726 goto cleanup; 727 728 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 729 goto cleanup; 730 731 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 732 goto cleanup; 733 734 high = cg_read_key_long(memcg, "memory.events", "high "); 735 if (high <= 0) 736 goto cleanup; 737 738 ret = KSFT_PASS; 739 740 cleanup: 741 cg_destroy(memcg); 742 free(memcg); 743 744 return ret; 745 } 746 747 static int alloc_anon_mlock(const char *cgroup, void *arg) 748 { 749 size_t size = (size_t)arg; 750 void *buf; 751 752 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 753 0, 0); 754 if (buf == MAP_FAILED) 755 return -1; 756 757 mlock(buf, size); 758 munmap(buf, size); 759 return 0; 760 } 761 762 /* 763 * This test checks that memory.high is able to throttle big single shot 764 * allocation i.e. large allocation within one kernel entry. 765 */ 766 static int test_memcg_high_sync(const char *root) 767 { 768 int ret = KSFT_FAIL, pid, fd = -1; 769 char *memcg; 770 long pre_high, pre_max; 771 long post_high, post_max; 772 773 memcg = cg_name(root, "memcg_test"); 774 if (!memcg) 775 goto cleanup; 776 777 if (cg_create(memcg)) 778 goto cleanup; 779 780 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 781 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 782 if (pre_high < 0 || pre_max < 0) 783 goto cleanup; 784 785 if (cg_write(memcg, "memory.swap.max", "0")) 786 goto cleanup; 787 788 if (cg_write(memcg, "memory.high", "30M")) 789 goto cleanup; 790 791 if (cg_write(memcg, "memory.max", "140M")) 792 goto cleanup; 793 794 fd = memcg_prepare_for_wait(memcg); 795 if (fd < 0) 796 goto cleanup; 797 798 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 799 if (pid < 0) 800 goto cleanup; 801 802 cg_wait_for(fd); 803 804 post_high = cg_read_key_long(memcg, "memory.events", "high "); 805 post_max = cg_read_key_long(memcg, "memory.events", "max "); 806 if (post_high < 0 || post_max < 0) 807 goto cleanup; 808 809 if (pre_high == post_high || pre_max != post_max) 810 goto cleanup; 811 812 ret = KSFT_PASS; 813 814 cleanup: 815 if (fd >= 0) 816 close(fd); 817 cg_destroy(memcg); 818 free(memcg); 819 820 return ret; 821 } 822 823 /* 824 * This test checks that memory.max limits the amount of 825 * memory which can be consumed by either anonymous memory 826 * or pagecache. 827 */ 828 static int test_memcg_max(const char *root) 829 { 830 int ret = KSFT_FAIL; 831 char *memcg; 832 long current, max; 833 834 memcg = cg_name(root, "memcg_test"); 835 if (!memcg) 836 goto cleanup; 837 838 if (cg_create(memcg)) 839 goto cleanup; 840 841 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 842 goto cleanup; 843 844 if (cg_write(memcg, "memory.swap.max", "0")) 845 goto cleanup; 846 847 if (cg_write(memcg, "memory.max", "30M")) 848 goto cleanup; 849 850 /* Should be killed by OOM killer */ 851 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 852 goto cleanup; 853 854 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 855 goto cleanup; 856 857 current = cg_read_long(memcg, "memory.current"); 858 if (current > MB(30) || !current) 859 goto cleanup; 860 861 max = cg_read_key_long(memcg, "memory.events", "max "); 862 if (max <= 0) 863 goto cleanup; 864 865 ret = KSFT_PASS; 866 867 cleanup: 868 cg_destroy(memcg); 869 free(memcg); 870 871 return ret; 872 } 873 874 /* 875 * Reclaim from @memcg until usage reaches @goal by writing to 876 * memory.reclaim. 877 * 878 * This function will return false if the usage is already below the 879 * goal. 880 * 881 * This function assumes that writing to memory.reclaim is the only 882 * source of change in memory.current (no concurrent allocations or 883 * reclaim). 884 * 885 * This function makes sure memory.reclaim is sane. It will return 886 * false if memory.reclaim's error codes do not make sense, even if 887 * the usage goal was satisfied. 888 */ 889 static bool reclaim_until(const char *memcg, long goal) 890 { 891 char buf[64]; 892 int retries, err; 893 long current, to_reclaim; 894 bool reclaimed = false; 895 896 for (retries = 5; retries > 0; retries--) { 897 current = cg_read_long(memcg, "memory.current"); 898 899 if (current < goal || values_close(current, goal, 3)) 900 break; 901 /* Did memory.reclaim return 0 incorrectly? */ 902 else if (reclaimed) 903 return false; 904 905 to_reclaim = current - goal; 906 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 907 err = cg_write(memcg, "memory.reclaim", buf); 908 if (!err) 909 reclaimed = true; 910 else if (err != -EAGAIN) 911 return false; 912 } 913 return reclaimed; 914 } 915 916 /* 917 * This test checks that memory.reclaim reclaims the given 918 * amount of memory (from both anon and file, if possible). 919 */ 920 static int test_memcg_reclaim(const char *root) 921 { 922 int ret = KSFT_FAIL; 923 int fd = -1; 924 int retries; 925 char *memcg; 926 long current, expected_usage; 927 928 memcg = cg_name(root, "memcg_test"); 929 if (!memcg) 930 goto cleanup; 931 932 if (cg_create(memcg)) 933 goto cleanup; 934 935 current = cg_read_long(memcg, "memory.current"); 936 if (current != 0) 937 goto cleanup; 938 939 fd = get_temp_fd(); 940 if (fd < 0) 941 goto cleanup; 942 943 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 944 945 /* 946 * If swap is enabled, try to reclaim from both anon and file, else try 947 * to reclaim from file only. 948 */ 949 if (is_swap_enabled()) { 950 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 951 expected_usage = MB(100); 952 } else 953 expected_usage = MB(50); 954 955 /* 956 * Wait until current usage reaches the expected usage (or we run out of 957 * retries). 958 */ 959 retries = 5; 960 while (!values_close(cg_read_long(memcg, "memory.current"), 961 expected_usage, 10)) { 962 if (retries--) { 963 sleep(1); 964 continue; 965 } else { 966 fprintf(stderr, 967 "failed to allocate %ld for memcg reclaim test\n", 968 expected_usage); 969 goto cleanup; 970 } 971 } 972 973 /* 974 * Reclaim until current reaches 30M, this makes sure we hit both anon 975 * and file if swap is enabled. 976 */ 977 if (!reclaim_until(memcg, MB(30))) 978 goto cleanup; 979 980 ret = KSFT_PASS; 981 cleanup: 982 cg_destroy(memcg); 983 free(memcg); 984 close(fd); 985 986 return ret; 987 } 988 989 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 990 { 991 long mem_max = (long)arg; 992 size_t size = MB(50); 993 char *buf, *ptr; 994 long mem_current, swap_current; 995 int ret = -1; 996 997 buf = malloc(size); 998 if (buf == NULL) { 999 fprintf(stderr, "malloc() failed\n"); 1000 return -1; 1001 } 1002 1003 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 1004 *ptr = 0; 1005 1006 mem_current = cg_read_long(cgroup, "memory.current"); 1007 if (!mem_current || !values_close(mem_current, mem_max, 3)) 1008 goto cleanup; 1009 1010 swap_current = cg_read_long(cgroup, "memory.swap.current"); 1011 if (!swap_current || 1012 !values_close(mem_current + swap_current, size, 3)) 1013 goto cleanup; 1014 1015 ret = 0; 1016 cleanup: 1017 free(buf); 1018 return ret; 1019 } 1020 1021 /* 1022 * This test checks that memory.swap.max limits the amount of 1023 * anonymous memory which can be swapped out. Additionally, it verifies that 1024 * memory.swap.peak reflects the high watermark and can be reset. 1025 */ 1026 static int test_memcg_swap_max_peak(const char *root) 1027 { 1028 int ret = KSFT_FAIL; 1029 char *memcg; 1030 long max, peak; 1031 struct stat ss; 1032 int swap_peak_fd = -1, mem_peak_fd = -1; 1033 1034 /* any non-empty string resets */ 1035 static const char reset_string[] = "foobarbaz"; 1036 1037 if (!is_swap_enabled()) 1038 return KSFT_SKIP; 1039 1040 memcg = cg_name(root, "memcg_test"); 1041 if (!memcg) 1042 goto cleanup; 1043 1044 if (cg_create(memcg)) 1045 goto cleanup; 1046 1047 if (cg_read_long(memcg, "memory.swap.current")) { 1048 ret = KSFT_SKIP; 1049 goto cleanup; 1050 } 1051 1052 swap_peak_fd = cg_open(memcg, "memory.swap.peak", 1053 O_RDWR | O_APPEND | O_CLOEXEC); 1054 1055 if (swap_peak_fd == -1) { 1056 if (errno == ENOENT) 1057 ret = KSFT_SKIP; 1058 goto cleanup; 1059 } 1060 1061 /* 1062 * Before we try to use memory.swap.peak's fd, try to figure out 1063 * whether this kernel supports writing to that file in the first 1064 * place. (by checking the writable bit on the file's st_mode) 1065 */ 1066 if (fstat(swap_peak_fd, &ss)) 1067 goto cleanup; 1068 1069 if ((ss.st_mode & S_IWUSR) == 0) { 1070 ret = KSFT_SKIP; 1071 goto cleanup; 1072 } 1073 1074 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 1075 1076 if (mem_peak_fd == -1) 1077 goto cleanup; 1078 1079 if (cg_read_long(memcg, "memory.swap.peak")) 1080 goto cleanup; 1081 1082 if (cg_read_long_fd(swap_peak_fd)) 1083 goto cleanup; 1084 1085 /* switch the swap and mem fds into local-peak tracking mode*/ 1086 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1087 1088 if (peak_reset != sizeof(reset_string)) 1089 goto cleanup; 1090 1091 if (cg_read_long_fd(swap_peak_fd)) 1092 goto cleanup; 1093 1094 if (cg_read_long(memcg, "memory.peak")) 1095 goto cleanup; 1096 1097 if (cg_read_long_fd(mem_peak_fd)) 1098 goto cleanup; 1099 1100 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1101 if (peak_reset != sizeof(reset_string)) 1102 goto cleanup; 1103 1104 if (cg_read_long_fd(mem_peak_fd)) 1105 goto cleanup; 1106 1107 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 1108 goto cleanup; 1109 1110 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 1111 goto cleanup; 1112 1113 if (cg_write(memcg, "memory.swap.max", "30M")) 1114 goto cleanup; 1115 1116 if (cg_write(memcg, "memory.max", "30M")) 1117 goto cleanup; 1118 1119 /* Should be killed by OOM killer */ 1120 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1121 goto cleanup; 1122 1123 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1124 goto cleanup; 1125 1126 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1127 goto cleanup; 1128 1129 peak = cg_read_long(memcg, "memory.peak"); 1130 if (peak < MB(29)) 1131 goto cleanup; 1132 1133 peak = cg_read_long(memcg, "memory.swap.peak"); 1134 if (peak < MB(29)) 1135 goto cleanup; 1136 1137 peak = cg_read_long_fd(mem_peak_fd); 1138 if (peak < MB(29)) 1139 goto cleanup; 1140 1141 peak = cg_read_long_fd(swap_peak_fd); 1142 if (peak < MB(29)) 1143 goto cleanup; 1144 1145 /* 1146 * open, reset and close the peak swap on another FD to make sure 1147 * multiple extant fds don't corrupt the linked-list 1148 */ 1149 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 1150 if (peak_reset) 1151 goto cleanup; 1152 1153 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 1154 if (peak_reset) 1155 goto cleanup; 1156 1157 /* actually reset on the fds */ 1158 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1159 if (peak_reset != sizeof(reset_string)) 1160 goto cleanup; 1161 1162 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1163 if (peak_reset != sizeof(reset_string)) 1164 goto cleanup; 1165 1166 peak = cg_read_long_fd(swap_peak_fd); 1167 if (peak > MB(10)) 1168 goto cleanup; 1169 1170 /* 1171 * The cgroup is now empty, but there may be a page or two associated 1172 * with the open FD accounted to it. 1173 */ 1174 peak = cg_read_long_fd(mem_peak_fd); 1175 if (peak > MB(1)) 1176 goto cleanup; 1177 1178 if (cg_read_long(memcg, "memory.peak") < MB(29)) 1179 goto cleanup; 1180 1181 if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 1182 goto cleanup; 1183 1184 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1185 goto cleanup; 1186 1187 max = cg_read_key_long(memcg, "memory.events", "max "); 1188 if (max <= 0) 1189 goto cleanup; 1190 1191 peak = cg_read_long(memcg, "memory.peak"); 1192 if (peak < MB(29)) 1193 goto cleanup; 1194 1195 peak = cg_read_long(memcg, "memory.swap.peak"); 1196 if (peak < MB(29)) 1197 goto cleanup; 1198 1199 peak = cg_read_long_fd(mem_peak_fd); 1200 if (peak < MB(29)) 1201 goto cleanup; 1202 1203 peak = cg_read_long_fd(swap_peak_fd); 1204 if (peak < MB(19)) 1205 goto cleanup; 1206 1207 ret = KSFT_PASS; 1208 1209 cleanup: 1210 if (mem_peak_fd != -1 && close(mem_peak_fd)) 1211 ret = KSFT_FAIL; 1212 if (swap_peak_fd != -1 && close(swap_peak_fd)) 1213 ret = KSFT_FAIL; 1214 cg_destroy(memcg); 1215 free(memcg); 1216 1217 return ret; 1218 } 1219 1220 /* 1221 * This test disables swapping and tries to allocate anonymous memory 1222 * up to OOM. Then it checks for oom and oom_kill events in 1223 * memory.events. 1224 */ 1225 static int test_memcg_oom_events(const char *root) 1226 { 1227 int ret = KSFT_FAIL; 1228 char *memcg; 1229 1230 memcg = cg_name(root, "memcg_test"); 1231 if (!memcg) 1232 goto cleanup; 1233 1234 if (cg_create(memcg)) 1235 goto cleanup; 1236 1237 if (cg_write(memcg, "memory.max", "30M")) 1238 goto cleanup; 1239 1240 if (cg_write(memcg, "memory.swap.max", "0")) 1241 goto cleanup; 1242 1243 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1244 goto cleanup; 1245 1246 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 1247 goto cleanup; 1248 1249 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1250 goto cleanup; 1251 1252 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1253 goto cleanup; 1254 1255 ret = KSFT_PASS; 1256 1257 cleanup: 1258 cg_destroy(memcg); 1259 free(memcg); 1260 1261 return ret; 1262 } 1263 1264 struct tcp_server_args { 1265 unsigned short port; 1266 int ctl[2]; 1267 }; 1268 1269 static int tcp_server(const char *cgroup, void *arg) 1270 { 1271 struct tcp_server_args *srv_args = arg; 1272 struct sockaddr_in6 saddr = { 0 }; 1273 socklen_t slen = sizeof(saddr); 1274 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1275 1276 close(srv_args->ctl[0]); 1277 ctl_fd = srv_args->ctl[1]; 1278 1279 saddr.sin6_family = AF_INET6; 1280 saddr.sin6_addr = in6addr_any; 1281 saddr.sin6_port = htons(srv_args->port); 1282 1283 sk = socket(AF_INET6, SOCK_STREAM, 0); 1284 if (sk < 0) 1285 return ret; 1286 1287 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1288 goto cleanup; 1289 1290 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1291 write(ctl_fd, &errno, sizeof(errno)); 1292 goto cleanup; 1293 } 1294 1295 if (listen(sk, 1)) 1296 goto cleanup; 1297 1298 ret = 0; 1299 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1300 ret = -1; 1301 goto cleanup; 1302 } 1303 1304 client_sk = accept(sk, NULL, NULL); 1305 if (client_sk < 0) 1306 goto cleanup; 1307 1308 ret = -1; 1309 for (;;) { 1310 uint8_t buf[0x100000]; 1311 1312 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1313 if (errno == ECONNRESET) 1314 ret = 0; 1315 break; 1316 } 1317 } 1318 1319 close(client_sk); 1320 1321 cleanup: 1322 close(sk); 1323 return ret; 1324 } 1325 1326 static int tcp_client(const char *cgroup, unsigned short port) 1327 { 1328 const char server[] = "localhost"; 1329 struct addrinfo *ai; 1330 char servport[6]; 1331 int retries = 0x10; /* nice round number */ 1332 int sk, ret; 1333 long allocated; 1334 1335 allocated = cg_read_long(cgroup, "memory.current"); 1336 snprintf(servport, sizeof(servport), "%hd", port); 1337 ret = getaddrinfo(server, servport, NULL, &ai); 1338 if (ret) 1339 return ret; 1340 1341 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1342 if (sk < 0) 1343 goto free_ainfo; 1344 1345 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1346 if (ret < 0) 1347 goto close_sk; 1348 1349 ret = KSFT_FAIL; 1350 while (retries--) { 1351 uint8_t buf[0x100000]; 1352 long current, sock; 1353 1354 if (read(sk, buf, sizeof(buf)) <= 0) 1355 goto close_sk; 1356 1357 current = cg_read_long(cgroup, "memory.current"); 1358 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1359 1360 if (current < 0 || sock < 0) 1361 goto close_sk; 1362 1363 /* exclude the memory not related to socket connection */ 1364 if (values_close(current - allocated, sock, 10)) { 1365 ret = KSFT_PASS; 1366 break; 1367 } 1368 } 1369 1370 close_sk: 1371 close(sk); 1372 free_ainfo: 1373 freeaddrinfo(ai); 1374 return ret; 1375 } 1376 1377 /* 1378 * This test checks socket memory accounting. 1379 * The test forks a TCP server listens on a random port between 1000 1380 * and 61000. Once it gets a client connection, it starts writing to 1381 * its socket. 1382 * The TCP client interleaves reads from the socket with check whether 1383 * memory.current and memory.stat.sock are similar. 1384 */ 1385 static int test_memcg_sock(const char *root) 1386 { 1387 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1388 unsigned short port; 1389 char *memcg; 1390 long sock_post = -1; 1391 1392 memcg = cg_name(root, "memcg_test"); 1393 if (!memcg) 1394 goto cleanup; 1395 1396 if (cg_create(memcg)) 1397 goto cleanup; 1398 1399 while (bind_retries--) { 1400 struct tcp_server_args args; 1401 1402 if (pipe(args.ctl)) 1403 goto cleanup; 1404 1405 port = args.port = 1000 + rand() % 60000; 1406 1407 pid = cg_run_nowait(memcg, tcp_server, &args); 1408 if (pid < 0) 1409 goto cleanup; 1410 1411 close(args.ctl[1]); 1412 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1413 goto cleanup; 1414 close(args.ctl[0]); 1415 1416 if (!err) 1417 break; 1418 if (err != EADDRINUSE) 1419 goto cleanup; 1420 1421 waitpid(pid, NULL, 0); 1422 } 1423 1424 if (err == EADDRINUSE) { 1425 ret = KSFT_SKIP; 1426 goto cleanup; 1427 } 1428 1429 if (tcp_client(memcg, port) != KSFT_PASS) 1430 goto cleanup; 1431 1432 waitpid(pid, &err, 0); 1433 if (WEXITSTATUS(err)) 1434 goto cleanup; 1435 1436 if (cg_read_long(memcg, "memory.current") < 0) 1437 goto cleanup; 1438 1439 /* 1440 * memory.stat is updated asynchronously via the memcg rstat 1441 * flushing worker, which runs periodically (every 2 seconds, 1442 * see FLUSH_TIME). On a busy system, the "sock " counter may 1443 * stay non-zero for a short period of time after the TCP 1444 * connection is closed and all socket memory has been 1445 * uncharged. 1446 * 1447 * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some 1448 * scheduling slack) and require that the "sock " counter 1449 * eventually drops to zero. 1450 */ 1451 sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0, 1452 MEMCG_SOCKSTAT_WAIT_RETRIES, 1453 DEFAULT_WAIT_INTERVAL_US); 1454 if (sock_post) 1455 goto cleanup; 1456 1457 ret = KSFT_PASS; 1458 1459 cleanup: 1460 cg_destroy(memcg); 1461 free(memcg); 1462 1463 return ret; 1464 } 1465 1466 /* 1467 * This test disables swapping and tries to allocate anonymous memory 1468 * up to OOM with memory.group.oom set. Then it checks that all 1469 * processes in the leaf were killed. It also checks that oom_events 1470 * were propagated to the parent level. 1471 */ 1472 static int test_memcg_oom_group_leaf_events(const char *root) 1473 { 1474 int ret = KSFT_FAIL; 1475 char *parent, *child; 1476 long parent_oom_events; 1477 1478 parent = cg_name(root, "memcg_test_0"); 1479 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1480 1481 if (!parent || !child) 1482 goto cleanup; 1483 1484 if (cg_create(parent)) 1485 goto cleanup; 1486 1487 if (cg_create(child)) 1488 goto cleanup; 1489 1490 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1491 goto cleanup; 1492 1493 if (cg_write(child, "memory.max", "50M")) 1494 goto cleanup; 1495 1496 if (cg_write(child, "memory.swap.max", "0")) 1497 goto cleanup; 1498 1499 if (cg_write(child, "memory.oom.group", "1")) 1500 goto cleanup; 1501 1502 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1503 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1504 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1505 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1506 goto cleanup; 1507 1508 if (cg_test_proc_killed(child)) 1509 goto cleanup; 1510 1511 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1512 goto cleanup; 1513 1514 parent_oom_events = cg_read_key_long( 1515 parent, "memory.events", "oom_kill "); 1516 /* 1517 * If memory_localevents is not enabled (the default), the parent should 1518 * count OOM events in its children groups. Otherwise, it should not 1519 * have observed any events. 1520 */ 1521 if (has_localevents && parent_oom_events != 0) 1522 goto cleanup; 1523 else if (!has_localevents && parent_oom_events <= 0) 1524 goto cleanup; 1525 1526 ret = KSFT_PASS; 1527 1528 cleanup: 1529 if (child) 1530 cg_destroy(child); 1531 if (parent) 1532 cg_destroy(parent); 1533 free(child); 1534 free(parent); 1535 1536 return ret; 1537 } 1538 1539 /* 1540 * This test disables swapping and tries to allocate anonymous memory 1541 * up to OOM with memory.group.oom set. Then it checks that all 1542 * processes in the parent and leaf were killed. 1543 */ 1544 static int test_memcg_oom_group_parent_events(const char *root) 1545 { 1546 int ret = KSFT_FAIL; 1547 char *parent, *child; 1548 1549 parent = cg_name(root, "memcg_test_0"); 1550 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1551 1552 if (!parent || !child) 1553 goto cleanup; 1554 1555 if (cg_create(parent)) 1556 goto cleanup; 1557 1558 if (cg_create(child)) 1559 goto cleanup; 1560 1561 if (cg_write(parent, "memory.max", "80M")) 1562 goto cleanup; 1563 1564 if (cg_write(parent, "memory.swap.max", "0")) 1565 goto cleanup; 1566 1567 if (cg_write(parent, "memory.oom.group", "1")) 1568 goto cleanup; 1569 1570 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1571 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1572 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1573 1574 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1575 goto cleanup; 1576 1577 if (cg_test_proc_killed(child)) 1578 goto cleanup; 1579 if (cg_test_proc_killed(parent)) 1580 goto cleanup; 1581 1582 ret = KSFT_PASS; 1583 1584 cleanup: 1585 if (child) 1586 cg_destroy(child); 1587 if (parent) 1588 cg_destroy(parent); 1589 free(child); 1590 free(parent); 1591 1592 return ret; 1593 } 1594 1595 /* 1596 * This test disables swapping and tries to allocate anonymous memory 1597 * up to OOM with memory.group.oom set. Then it checks that all 1598 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1599 */ 1600 static int test_memcg_oom_group_score_events(const char *root) 1601 { 1602 int ret = KSFT_FAIL; 1603 char *memcg; 1604 int safe_pid; 1605 1606 memcg = cg_name(root, "memcg_test_0"); 1607 1608 if (!memcg) 1609 goto cleanup; 1610 1611 if (cg_create(memcg)) 1612 goto cleanup; 1613 1614 if (cg_write(memcg, "memory.max", "50M")) 1615 goto cleanup; 1616 1617 if (cg_write(memcg, "memory.swap.max", "0")) 1618 goto cleanup; 1619 1620 if (cg_write(memcg, "memory.oom.group", "1")) 1621 goto cleanup; 1622 1623 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1624 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1625 goto cleanup; 1626 1627 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1628 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1629 goto cleanup; 1630 1631 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1632 goto cleanup; 1633 1634 if (kill(safe_pid, SIGKILL)) 1635 goto cleanup; 1636 1637 ret = KSFT_PASS; 1638 1639 cleanup: 1640 if (memcg) 1641 cg_destroy(memcg); 1642 free(memcg); 1643 1644 return ret; 1645 } 1646 1647 static int read_event(int inotify_fd, int expected_event, int expected_wd) 1648 { 1649 struct inotify_event event; 1650 ssize_t len = 0; 1651 1652 len = read(inotify_fd, &event, sizeof(event)); 1653 if (len < (ssize_t)sizeof(event)) 1654 return -1; 1655 1656 if (event.mask != expected_event || event.wd != expected_wd) { 1657 fprintf(stderr, 1658 "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n", 1659 event.mask, expected_event, event.wd, expected_wd); 1660 return -1; 1661 } 1662 1663 return 0; 1664 } 1665 1666 static int test_memcg_inotify_delete_file(const char *root) 1667 { 1668 int ret = KSFT_FAIL; 1669 char *memcg = NULL; 1670 int fd, wd; 1671 1672 memcg = cg_name(root, "memcg_test_0"); 1673 1674 if (!memcg) 1675 goto cleanup; 1676 1677 if (cg_create(memcg)) 1678 goto cleanup; 1679 1680 fd = inotify_init1(0); 1681 if (fd == -1) 1682 goto cleanup; 1683 1684 wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF); 1685 if (wd == -1) 1686 goto cleanup; 1687 1688 if (cg_destroy(memcg)) 1689 goto cleanup; 1690 free(memcg); 1691 memcg = NULL; 1692 1693 if (read_event(fd, IN_DELETE_SELF, wd)) 1694 goto cleanup; 1695 1696 if (read_event(fd, IN_IGNORED, wd)) 1697 goto cleanup; 1698 1699 ret = KSFT_PASS; 1700 1701 cleanup: 1702 if (fd >= 0) 1703 close(fd); 1704 if (memcg) 1705 cg_destroy(memcg); 1706 free(memcg); 1707 1708 return ret; 1709 } 1710 1711 static int test_memcg_inotify_delete_dir(const char *root) 1712 { 1713 int ret = KSFT_FAIL; 1714 char *memcg = NULL; 1715 int fd, wd; 1716 1717 memcg = cg_name(root, "memcg_test_0"); 1718 1719 if (!memcg) 1720 goto cleanup; 1721 1722 if (cg_create(memcg)) 1723 goto cleanup; 1724 1725 fd = inotify_init1(0); 1726 if (fd == -1) 1727 goto cleanup; 1728 1729 wd = inotify_add_watch(fd, memcg, IN_DELETE_SELF); 1730 if (wd == -1) 1731 goto cleanup; 1732 1733 if (cg_destroy(memcg)) 1734 goto cleanup; 1735 free(memcg); 1736 memcg = NULL; 1737 1738 if (read_event(fd, IN_DELETE_SELF, wd)) 1739 goto cleanup; 1740 1741 if (read_event(fd, IN_IGNORED, wd)) 1742 goto cleanup; 1743 1744 ret = KSFT_PASS; 1745 1746 cleanup: 1747 if (fd >= 0) 1748 close(fd); 1749 if (memcg) 1750 cg_destroy(memcg); 1751 free(memcg); 1752 1753 return ret; 1754 } 1755 1756 #define T(x) { x, #x } 1757 struct memcg_test { 1758 int (*fn)(const char *root); 1759 const char *name; 1760 } tests[] = { 1761 T(test_memcg_subtree_control), 1762 T(test_memcg_current_peak), 1763 T(test_memcg_min), 1764 T(test_memcg_low), 1765 T(test_memcg_high), 1766 T(test_memcg_high_sync), 1767 T(test_memcg_max), 1768 T(test_memcg_reclaim), 1769 T(test_memcg_oom_events), 1770 T(test_memcg_swap_max_peak), 1771 T(test_memcg_sock), 1772 T(test_memcg_oom_group_leaf_events), 1773 T(test_memcg_oom_group_parent_events), 1774 T(test_memcg_oom_group_score_events), 1775 T(test_memcg_inotify_delete_file), 1776 T(test_memcg_inotify_delete_dir), 1777 }; 1778 #undef T 1779 1780 int main(int argc, char **argv) 1781 { 1782 char root[PATH_MAX]; 1783 int i, proc_status; 1784 1785 ksft_print_header(); 1786 ksft_set_plan(ARRAY_SIZE(tests)); 1787 if (cg_find_unified_root(root, sizeof(root), NULL)) 1788 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1789 1790 /* 1791 * Check that memory controller is available: 1792 * memory is listed in cgroup.controllers 1793 */ 1794 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1795 ksft_exit_skip("memory controller isn't available\n"); 1796 1797 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1798 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1799 ksft_exit_skip("Failed to set memory controller\n"); 1800 1801 proc_status = proc_mount_contains("memory_recursiveprot"); 1802 if (proc_status < 0) 1803 ksft_exit_skip("Failed to query cgroup mount option\n"); 1804 has_recursiveprot = proc_status; 1805 1806 proc_status = proc_mount_contains("memory_localevents"); 1807 if (proc_status < 0) 1808 ksft_exit_skip("Failed to query cgroup mount option\n"); 1809 has_localevents = proc_status; 1810 1811 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1812 switch (tests[i].fn(root)) { 1813 case KSFT_PASS: 1814 ksft_test_result_pass("%s\n", tests[i].name); 1815 break; 1816 case KSFT_SKIP: 1817 ksft_test_result_skip("%s\n", tests[i].name); 1818 break; 1819 default: 1820 ksft_test_result_fail("%s\n", tests[i].name); 1821 break; 1822 } 1823 } 1824 1825 ksft_finished(); 1826 } 1827