1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/inotify.h> 14 #include <sys/socket.h> 15 #include <sys/wait.h> 16 #include <arpa/inet.h> 17 #include <netinet/in.h> 18 #include <netdb.h> 19 #include <errno.h> 20 #include <sys/mman.h> 21 22 #include "kselftest.h" 23 #include "cgroup_util.h" 24 25 #define MEMCG_SOCKSTAT_WAIT_RETRIES 30 26 27 static bool has_localevents; 28 static bool has_recursiveprot; 29 static int page_size; 30 31 int get_temp_fd(void) 32 { 33 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 34 } 35 36 int alloc_pagecache(int fd, size_t size) 37 { 38 char buf[BUF_SIZE]; 39 struct stat st; 40 int i; 41 42 if (fstat(fd, &st)) 43 goto cleanup; 44 45 size += st.st_size; 46 47 if (ftruncate(fd, size)) 48 goto cleanup; 49 50 for (i = 0; i < size; i += sizeof(buf)) 51 read(fd, buf, sizeof(buf)); 52 53 return 0; 54 55 cleanup: 56 return -1; 57 } 58 59 int alloc_anon(const char *cgroup, void *arg) 60 { 61 size_t size = (unsigned long)arg; 62 char *buf, *ptr; 63 64 buf = malloc(size); 65 for (ptr = buf; ptr < buf + size; ptr += page_size) 66 *ptr = 0; 67 68 free(buf); 69 return 0; 70 } 71 72 int is_swap_enabled(void) 73 { 74 char buf[BUF_SIZE]; 75 const char delim[] = "\n"; 76 int cnt = 0; 77 char *line; 78 79 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 80 return -1; 81 82 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 83 cnt++; 84 85 return cnt > 1; 86 } 87 88 int set_oom_adj_score(int pid, int score) 89 { 90 char path[PATH_MAX]; 91 int fd, len; 92 93 sprintf(path, "/proc/%d/oom_score_adj", pid); 94 95 fd = open(path, O_WRONLY | O_APPEND); 96 if (fd < 0) 97 return fd; 98 99 len = dprintf(fd, "%d", score); 100 if (len < 0) { 101 close(fd); 102 return len; 103 } 104 105 close(fd); 106 return 0; 107 } 108 109 /* 110 * This test creates two nested cgroups with and without enabling 111 * the memory controller. 112 */ 113 static int test_memcg_subtree_control(const char *root) 114 { 115 char *parent, *child, *parent2 = NULL, *child2 = NULL; 116 int ret = KSFT_FAIL; 117 char buf[BUF_SIZE]; 118 119 /* Create two nested cgroups with the memory controller enabled */ 120 parent = cg_name(root, "memcg_test_0"); 121 child = cg_name(root, "memcg_test_0/memcg_test_1"); 122 if (!parent || !child) 123 goto cleanup_free; 124 125 if (cg_create(parent)) 126 goto cleanup_free; 127 128 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 129 goto cleanup_parent; 130 131 if (cg_create(child)) 132 goto cleanup_parent; 133 134 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 135 goto cleanup_child; 136 137 /* Create two nested cgroups without enabling memory controller */ 138 parent2 = cg_name(root, "memcg_test_1"); 139 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 140 if (!parent2 || !child2) 141 goto cleanup_free2; 142 143 if (cg_create(parent2)) 144 goto cleanup_free2; 145 146 if (cg_create(child2)) 147 goto cleanup_parent2; 148 149 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 150 goto cleanup_all; 151 152 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 153 goto cleanup_all; 154 155 ret = KSFT_PASS; 156 157 cleanup_all: 158 cg_destroy(child2); 159 cleanup_parent2: 160 cg_destroy(parent2); 161 cleanup_free2: 162 free(parent2); 163 free(child2); 164 cleanup_child: 165 cg_destroy(child); 166 cleanup_parent: 167 cg_destroy(parent); 168 cleanup_free: 169 free(parent); 170 free(child); 171 172 return ret; 173 } 174 175 static int alloc_anon_50M_check(const char *cgroup, void *arg) 176 { 177 size_t size = MB(50); 178 char *buf, *ptr; 179 long anon, current; 180 int ret = -1; 181 182 buf = malloc(size); 183 if (buf == NULL) { 184 fprintf(stderr, "malloc() failed\n"); 185 return -1; 186 } 187 188 for (ptr = buf; ptr < buf + size; ptr += page_size) 189 *ptr = 0; 190 191 current = cg_read_long(cgroup, "memory.current"); 192 if (current < size) 193 goto cleanup; 194 195 if (!values_close(size, current, 3)) 196 goto cleanup; 197 198 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 199 if (anon < 0) 200 goto cleanup; 201 202 if (!values_close(anon, current, 3)) 203 goto cleanup; 204 205 ret = 0; 206 cleanup: 207 free(buf); 208 return ret; 209 } 210 211 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 212 { 213 size_t size = MB(50); 214 int ret = -1; 215 long current, file; 216 int fd; 217 218 fd = get_temp_fd(); 219 if (fd < 0) 220 return -1; 221 222 if (alloc_pagecache(fd, size)) 223 goto cleanup; 224 225 current = cg_read_long(cgroup, "memory.current"); 226 if (current < size) 227 goto cleanup; 228 229 file = cg_read_key_long(cgroup, "memory.stat", "file "); 230 if (file < 0) 231 goto cleanup; 232 233 if (!values_close(file, current, 10)) 234 goto cleanup; 235 236 ret = 0; 237 238 cleanup: 239 close(fd); 240 return ret; 241 } 242 243 /* 244 * This test create a memory cgroup, allocates 245 * some anonymous memory and some pagecache 246 * and checks memory.current, memory.peak, and some memory.stat values. 247 */ 248 static int test_memcg_current_peak(const char *root) 249 { 250 int ret = KSFT_FAIL; 251 long current, peak, peak_reset; 252 char *memcg; 253 bool fd2_closed = false, fd3_closed = false, fd4_closed = false; 254 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; 255 struct stat ss; 256 257 memcg = cg_name(root, "memcg_test"); 258 if (!memcg) 259 goto cleanup; 260 261 if (cg_create(memcg)) 262 goto cleanup; 263 264 current = cg_read_long(memcg, "memory.current"); 265 if (current != 0) 266 goto cleanup; 267 268 peak = cg_read_long(memcg, "memory.peak"); 269 if (peak != 0) 270 goto cleanup; 271 272 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 273 goto cleanup; 274 275 peak = cg_read_long(memcg, "memory.peak"); 276 if (peak < MB(50)) 277 goto cleanup; 278 279 /* 280 * We'll open a few FDs for the same memory.peak file to exercise the free-path 281 * We need at least three to be closed in a different order than writes occurred to test 282 * the linked-list handling. 283 */ 284 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 285 286 if (peak_fd == -1) { 287 if (errno == ENOENT) 288 ret = KSFT_SKIP; 289 goto cleanup; 290 } 291 292 /* 293 * Before we try to use memory.peak's fd, try to figure out whether 294 * this kernel supports writing to that file in the first place. (by 295 * checking the writable bit on the file's st_mode) 296 */ 297 if (fstat(peak_fd, &ss)) 298 goto cleanup; 299 300 if ((ss.st_mode & S_IWUSR) == 0) { 301 ret = KSFT_SKIP; 302 goto cleanup; 303 } 304 305 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 306 307 if (peak_fd2 == -1) 308 goto cleanup; 309 310 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 311 312 if (peak_fd3 == -1) 313 goto cleanup; 314 315 /* any non-empty string resets, but make it clear */ 316 static const char reset_string[] = "reset\n"; 317 318 peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); 319 if (peak_reset != sizeof(reset_string)) 320 goto cleanup; 321 322 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); 323 if (peak_reset != sizeof(reset_string)) 324 goto cleanup; 325 326 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); 327 if (peak_reset != sizeof(reset_string)) 328 goto cleanup; 329 330 /* Make sure a completely independent read isn't affected by our FD-local reset above*/ 331 peak = cg_read_long(memcg, "memory.peak"); 332 if (peak < MB(50)) 333 goto cleanup; 334 335 fd2_closed = true; 336 if (close(peak_fd2)) 337 goto cleanup; 338 339 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 340 341 if (peak_fd4 == -1) 342 goto cleanup; 343 344 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); 345 if (peak_reset != sizeof(reset_string)) 346 goto cleanup; 347 348 peak = cg_read_long_fd(peak_fd); 349 if (peak > MB(30) || peak < 0) 350 goto cleanup; 351 352 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 353 goto cleanup; 354 355 peak = cg_read_long(memcg, "memory.peak"); 356 if (peak < MB(50)) 357 goto cleanup; 358 359 /* Make sure everything is back to normal */ 360 peak = cg_read_long_fd(peak_fd); 361 if (peak < MB(50)) 362 goto cleanup; 363 364 peak = cg_read_long_fd(peak_fd4); 365 if (peak < MB(50)) 366 goto cleanup; 367 368 fd3_closed = true; 369 if (close(peak_fd3)) 370 goto cleanup; 371 372 fd4_closed = true; 373 if (close(peak_fd4)) 374 goto cleanup; 375 376 ret = KSFT_PASS; 377 378 cleanup: 379 close(peak_fd); 380 if (!fd2_closed) 381 close(peak_fd2); 382 if (!fd3_closed) 383 close(peak_fd3); 384 if (!fd4_closed) 385 close(peak_fd4); 386 cg_destroy(memcg); 387 free(memcg); 388 389 return ret; 390 } 391 392 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 393 { 394 int fd = (long)arg; 395 int ppid = getppid(); 396 397 if (alloc_pagecache(fd, MB(50))) 398 return -1; 399 400 while (getppid() == ppid) 401 sleep(1); 402 403 return 0; 404 } 405 406 static int alloc_anon_noexit(const char *cgroup, void *arg) 407 { 408 int ppid = getppid(); 409 size_t size = (unsigned long)arg; 410 char *buf, *ptr; 411 412 buf = malloc(size); 413 if (buf == NULL) { 414 fprintf(stderr, "malloc() failed\n"); 415 return -1; 416 } 417 418 for (ptr = buf; ptr < buf + size; ptr += page_size) 419 *ptr = 0; 420 421 while (getppid() == ppid) 422 sleep(1); 423 424 free(buf); 425 return 0; 426 } 427 428 /* 429 * Wait until processes are killed asynchronously by the OOM killer 430 * If we exceed a timeout, fail. 431 */ 432 static int cg_test_proc_killed(const char *cgroup) 433 { 434 int limit; 435 436 for (limit = 10; limit > 0; limit--) { 437 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 438 return 0; 439 440 usleep(100000); 441 } 442 return -1; 443 } 444 445 static bool reclaim_until(const char *memcg, long goal); 446 447 /* 448 * First, this test creates the following hierarchy: 449 * A memory.min = 0, memory.max = 200M 450 * A/B memory.min = 50M 451 * A/B/C memory.min = 75M, memory.current = 50M 452 * A/B/D memory.min = 25M, memory.current = 50M 453 * A/B/E memory.min = 0, memory.current = 50M 454 * A/B/F memory.min = 500M, memory.current = 0 455 * 456 * (or memory.low if we test soft protection) 457 * 458 * Usages are pagecache and the test keeps a running 459 * process in every leaf cgroup. 460 * Then it creates A/G and creates a significant 461 * memory pressure in A. 462 * 463 * Then it checks actual memory usages and expects that: 464 * A/B memory.current ~= 50M 465 * A/B/C memory.current ~= 29M [memory.events:low > 0] 466 * A/B/D memory.current ~= 21M [memory.events:low > 0] 467 * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, 468 * undefined otherwise] 469 * A/B/F memory.current = 0 [memory.events:low == 0] 470 * (for origin of the numbers, see model in memcg_protection.m.) 471 * 472 * After that it tries to allocate more than there is 473 * unprotected memory in A available, and checks that: 474 * a) memory.min protects pagecache even in this case, 475 * b) memory.low allows reclaiming page cache with low events. 476 * 477 * Then we try to reclaim from A/B/C using memory.reclaim until its 478 * usage reaches 10M. 479 * This makes sure that: 480 * (a) We ignore the protection of the reclaim target memcg. 481 * (b) The previously calculated emin value (~29M) should be dismissed. 482 */ 483 static int test_memcg_protection(const char *root, bool min) 484 { 485 int ret = KSFT_FAIL, rc; 486 char *parent[3] = {NULL}; 487 char *children[4] = {NULL}; 488 const char *attribute = min ? "memory.min" : "memory.low"; 489 long c[4]; 490 long current; 491 int i, attempts; 492 int fd; 493 494 fd = get_temp_fd(); 495 if (fd < 0) 496 goto cleanup; 497 498 parent[0] = cg_name(root, "memcg_test_0"); 499 if (!parent[0]) 500 goto cleanup; 501 502 parent[1] = cg_name(parent[0], "memcg_test_1"); 503 if (!parent[1]) 504 goto cleanup; 505 506 parent[2] = cg_name(parent[0], "memcg_test_2"); 507 if (!parent[2]) 508 goto cleanup; 509 510 if (cg_create(parent[0])) 511 goto cleanup; 512 513 if (cg_read_long(parent[0], attribute)) { 514 /* No memory.min on older kernels is fine */ 515 if (min) 516 ret = KSFT_SKIP; 517 goto cleanup; 518 } 519 520 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 521 goto cleanup; 522 523 if (cg_write(parent[0], "memory.max", "200M")) 524 goto cleanup; 525 526 if (cg_write(parent[0], "memory.swap.max", "0")) 527 goto cleanup; 528 529 if (cg_create(parent[1])) 530 goto cleanup; 531 532 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 533 goto cleanup; 534 535 if (cg_create(parent[2])) 536 goto cleanup; 537 538 for (i = 0; i < ARRAY_SIZE(children); i++) { 539 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 540 if (!children[i]) 541 goto cleanup; 542 543 if (cg_create(children[i])) 544 goto cleanup; 545 546 if (i > 2) 547 continue; 548 549 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 550 (void *)(long)fd); 551 } 552 553 if (cg_write(parent[1], attribute, "50M")) 554 goto cleanup; 555 if (cg_write(children[0], attribute, "75M")) 556 goto cleanup; 557 if (cg_write(children[1], attribute, "25M")) 558 goto cleanup; 559 if (cg_write(children[2], attribute, "0")) 560 goto cleanup; 561 if (cg_write(children[3], attribute, "500M")) 562 goto cleanup; 563 564 attempts = 0; 565 while (!values_close(cg_read_long(parent[1], "memory.current"), 566 MB(150), 3)) { 567 if (attempts++ > 5) 568 break; 569 sleep(1); 570 } 571 572 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 573 goto cleanup; 574 575 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 576 goto cleanup; 577 578 for (i = 0; i < ARRAY_SIZE(children); i++) 579 c[i] = cg_read_long(children[i], "memory.current"); 580 581 if (!values_close(c[0], MB(29), 15)) 582 goto cleanup; 583 584 if (!values_close(c[1], MB(21), 20)) 585 goto cleanup; 586 587 if (c[3] != 0) 588 goto cleanup; 589 590 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 591 if (min && !rc) 592 goto cleanup; 593 else if (!min && rc) { 594 fprintf(stderr, 595 "memory.low prevents from allocating anon memory\n"); 596 goto cleanup; 597 } 598 599 current = min ? MB(50) : MB(30); 600 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 601 goto cleanup; 602 603 if (!reclaim_until(children[0], MB(10))) 604 goto cleanup; 605 606 if (min) { 607 ret = KSFT_PASS; 608 goto cleanup; 609 } 610 611 /* 612 * Child 2 has memory.low=0, but some low protection may still be 613 * distributed down from its parent with memory.low=50M if cgroup2 614 * memory_recursiveprot mount option is enabled. Ignore the low 615 * event count in this case. 616 */ 617 for (i = 0; i < ARRAY_SIZE(children); i++) { 618 int ignore_low_events_index = has_recursiveprot ? 2 : -1; 619 int no_low_events_index = 1; 620 long low, oom; 621 622 oom = cg_read_key_long(children[i], "memory.events", "oom "); 623 low = cg_read_key_long(children[i], "memory.events", "low "); 624 625 if (oom) 626 goto cleanup; 627 if (i == ignore_low_events_index) 628 continue; 629 if (i <= no_low_events_index && low <= 0) 630 goto cleanup; 631 if (i > no_low_events_index && low) 632 goto cleanup; 633 634 } 635 636 ret = KSFT_PASS; 637 638 cleanup: 639 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 640 if (!children[i]) 641 continue; 642 643 cg_destroy(children[i]); 644 free(children[i]); 645 } 646 647 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 648 if (!parent[i]) 649 continue; 650 651 cg_destroy(parent[i]); 652 free(parent[i]); 653 } 654 close(fd); 655 return ret; 656 } 657 658 static int test_memcg_min(const char *root) 659 { 660 return test_memcg_protection(root, true); 661 } 662 663 static int test_memcg_low(const char *root) 664 { 665 return test_memcg_protection(root, false); 666 } 667 668 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 669 { 670 size_t size = MB(50); 671 int ret = -1; 672 long current, high, max; 673 int fd; 674 675 high = cg_read_long(cgroup, "memory.high"); 676 max = cg_read_long(cgroup, "memory.max"); 677 if (high != MB(30) && max != MB(30)) 678 return -1; 679 680 fd = get_temp_fd(); 681 if (fd < 0) 682 return -1; 683 684 if (alloc_pagecache(fd, size)) 685 goto cleanup; 686 687 current = cg_read_long(cgroup, "memory.current"); 688 if (!values_close(current, MB(30), 5)) 689 goto cleanup; 690 691 ret = 0; 692 693 cleanup: 694 close(fd); 695 return ret; 696 697 } 698 699 /* 700 * This test checks that memory.high limits the amount of 701 * memory which can be consumed by either anonymous memory 702 * or pagecache. 703 */ 704 static int test_memcg_high(const char *root) 705 { 706 int ret = KSFT_FAIL; 707 char *memcg; 708 long high; 709 710 memcg = cg_name(root, "memcg_test"); 711 if (!memcg) 712 goto cleanup; 713 714 if (cg_create(memcg)) 715 goto cleanup; 716 717 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 718 goto cleanup; 719 720 if (cg_write(memcg, "memory.swap.max", "0")) 721 goto cleanup; 722 723 if (cg_write(memcg, "memory.high", "30M")) 724 goto cleanup; 725 726 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 727 goto cleanup; 728 729 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 730 goto cleanup; 731 732 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 733 goto cleanup; 734 735 high = cg_read_key_long(memcg, "memory.events", "high "); 736 if (high <= 0) 737 goto cleanup; 738 739 ret = KSFT_PASS; 740 741 cleanup: 742 cg_destroy(memcg); 743 free(memcg); 744 745 return ret; 746 } 747 748 static int alloc_anon_mlock(const char *cgroup, void *arg) 749 { 750 size_t size = (size_t)arg; 751 void *buf; 752 753 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 754 0, 0); 755 if (buf == MAP_FAILED) 756 return -1; 757 758 mlock(buf, size); 759 munmap(buf, size); 760 return 0; 761 } 762 763 /* 764 * This test checks that memory.high is able to throttle big single shot 765 * allocation i.e. large allocation within one kernel entry. 766 */ 767 static int test_memcg_high_sync(const char *root) 768 { 769 int ret = KSFT_FAIL, pid, fd = -1; 770 char *memcg; 771 long pre_high, pre_max; 772 long post_high, post_max; 773 774 memcg = cg_name(root, "memcg_test"); 775 if (!memcg) 776 goto cleanup; 777 778 if (cg_create(memcg)) 779 goto cleanup; 780 781 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 782 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 783 if (pre_high < 0 || pre_max < 0) 784 goto cleanup; 785 786 if (cg_write(memcg, "memory.swap.max", "0")) 787 goto cleanup; 788 789 if (cg_write(memcg, "memory.high", "30M")) 790 goto cleanup; 791 792 if (cg_write(memcg, "memory.max", "140M")) 793 goto cleanup; 794 795 fd = memcg_prepare_for_wait(memcg); 796 if (fd < 0) 797 goto cleanup; 798 799 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 800 if (pid < 0) 801 goto cleanup; 802 803 cg_wait_for(fd); 804 805 post_high = cg_read_key_long(memcg, "memory.events", "high "); 806 post_max = cg_read_key_long(memcg, "memory.events", "max "); 807 if (post_high < 0 || post_max < 0) 808 goto cleanup; 809 810 if (pre_high == post_high || pre_max != post_max) 811 goto cleanup; 812 813 ret = KSFT_PASS; 814 815 cleanup: 816 if (fd >= 0) 817 close(fd); 818 cg_destroy(memcg); 819 free(memcg); 820 821 return ret; 822 } 823 824 /* 825 * This test checks that memory.max limits the amount of 826 * memory which can be consumed by either anonymous memory 827 * or pagecache. 828 */ 829 static int test_memcg_max(const char *root) 830 { 831 int ret = KSFT_FAIL; 832 char *memcg; 833 long current, max; 834 835 memcg = cg_name(root, "memcg_test"); 836 if (!memcg) 837 goto cleanup; 838 839 if (cg_create(memcg)) 840 goto cleanup; 841 842 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 843 goto cleanup; 844 845 if (cg_write(memcg, "memory.swap.max", "0")) 846 goto cleanup; 847 848 if (cg_write(memcg, "memory.max", "30M")) 849 goto cleanup; 850 851 /* Should be killed by OOM killer */ 852 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 853 goto cleanup; 854 855 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 856 goto cleanup; 857 858 current = cg_read_long(memcg, "memory.current"); 859 if (current > MB(30) || !current) 860 goto cleanup; 861 862 max = cg_read_key_long(memcg, "memory.events", "max "); 863 if (max <= 0) 864 goto cleanup; 865 866 ret = KSFT_PASS; 867 868 cleanup: 869 cg_destroy(memcg); 870 free(memcg); 871 872 return ret; 873 } 874 875 /* 876 * Reclaim from @memcg until usage reaches @goal by writing to 877 * memory.reclaim. 878 * 879 * This function will return false if the usage is already below the 880 * goal. 881 * 882 * This function assumes that writing to memory.reclaim is the only 883 * source of change in memory.current (no concurrent allocations or 884 * reclaim). 885 * 886 * This function makes sure memory.reclaim is sane. It will return 887 * false if memory.reclaim's error codes do not make sense, even if 888 * the usage goal was satisfied. 889 */ 890 static bool reclaim_until(const char *memcg, long goal) 891 { 892 char buf[64]; 893 int retries, err; 894 long current, to_reclaim; 895 bool reclaimed = false; 896 897 for (retries = 5; retries > 0; retries--) { 898 current = cg_read_long(memcg, "memory.current"); 899 900 if (current < goal || values_close(current, goal, 3)) 901 break; 902 /* Did memory.reclaim return 0 incorrectly? */ 903 else if (reclaimed) 904 return false; 905 906 to_reclaim = current - goal; 907 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 908 err = cg_write(memcg, "memory.reclaim", buf); 909 if (!err) 910 reclaimed = true; 911 else if (err != -EAGAIN) 912 return false; 913 } 914 return reclaimed; 915 } 916 917 /* 918 * This test checks that memory.reclaim reclaims the given 919 * amount of memory (from both anon and file, if possible). 920 */ 921 static int test_memcg_reclaim(const char *root) 922 { 923 int ret = KSFT_FAIL; 924 int fd = -1; 925 int retries; 926 char *memcg; 927 long current, expected_usage; 928 929 memcg = cg_name(root, "memcg_test"); 930 if (!memcg) 931 goto cleanup; 932 933 if (cg_create(memcg)) 934 goto cleanup; 935 936 current = cg_read_long(memcg, "memory.current"); 937 if (current != 0) 938 goto cleanup; 939 940 fd = get_temp_fd(); 941 if (fd < 0) 942 goto cleanup; 943 944 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 945 946 /* 947 * If swap is enabled, try to reclaim from both anon and file, else try 948 * to reclaim from file only. 949 */ 950 if (is_swap_enabled()) { 951 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 952 expected_usage = MB(100); 953 } else 954 expected_usage = MB(50); 955 956 /* 957 * Wait until current usage reaches the expected usage (or we run out of 958 * retries). 959 */ 960 retries = 5; 961 while (!values_close(cg_read_long(memcg, "memory.current"), 962 expected_usage, 10)) { 963 if (retries--) { 964 sleep(1); 965 continue; 966 } else { 967 fprintf(stderr, 968 "failed to allocate %ld for memcg reclaim test\n", 969 expected_usage); 970 goto cleanup; 971 } 972 } 973 974 /* 975 * Reclaim until current reaches 30M, this makes sure we hit both anon 976 * and file if swap is enabled. 977 */ 978 if (!reclaim_until(memcg, MB(30))) 979 goto cleanup; 980 981 ret = KSFT_PASS; 982 cleanup: 983 cg_destroy(memcg); 984 free(memcg); 985 close(fd); 986 987 return ret; 988 } 989 990 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 991 { 992 long mem_max = (long)arg; 993 size_t size = MB(50); 994 char *buf, *ptr; 995 long mem_current, swap_current; 996 int ret = -1; 997 998 buf = malloc(size); 999 if (buf == NULL) { 1000 fprintf(stderr, "malloc() failed\n"); 1001 return -1; 1002 } 1003 1004 for (ptr = buf; ptr < buf + size; ptr += page_size) 1005 *ptr = 0; 1006 1007 mem_current = cg_read_long(cgroup, "memory.current"); 1008 if (!mem_current || !values_close(mem_current, mem_max, 3)) 1009 goto cleanup; 1010 1011 swap_current = cg_read_long(cgroup, "memory.swap.current"); 1012 if (!swap_current || 1013 !values_close(mem_current + swap_current, size, 3)) 1014 goto cleanup; 1015 1016 ret = 0; 1017 cleanup: 1018 free(buf); 1019 return ret; 1020 } 1021 1022 /* 1023 * This test checks that memory.swap.max limits the amount of 1024 * anonymous memory which can be swapped out. Additionally, it verifies that 1025 * memory.swap.peak reflects the high watermark and can be reset. 1026 */ 1027 static int test_memcg_swap_max_peak(const char *root) 1028 { 1029 int ret = KSFT_FAIL; 1030 char *memcg; 1031 long max, peak; 1032 struct stat ss; 1033 int swap_peak_fd = -1, mem_peak_fd = -1; 1034 1035 /* any non-empty string resets */ 1036 static const char reset_string[] = "foobarbaz"; 1037 1038 if (!is_swap_enabled()) 1039 return KSFT_SKIP; 1040 1041 memcg = cg_name(root, "memcg_test"); 1042 if (!memcg) 1043 goto cleanup; 1044 1045 if (cg_create(memcg)) 1046 goto cleanup; 1047 1048 if (cg_read_long(memcg, "memory.swap.current")) { 1049 ret = KSFT_SKIP; 1050 goto cleanup; 1051 } 1052 1053 swap_peak_fd = cg_open(memcg, "memory.swap.peak", 1054 O_RDWR | O_APPEND | O_CLOEXEC); 1055 1056 if (swap_peak_fd == -1) { 1057 if (errno == ENOENT) 1058 ret = KSFT_SKIP; 1059 goto cleanup; 1060 } 1061 1062 /* 1063 * Before we try to use memory.swap.peak's fd, try to figure out 1064 * whether this kernel supports writing to that file in the first 1065 * place. (by checking the writable bit on the file's st_mode) 1066 */ 1067 if (fstat(swap_peak_fd, &ss)) 1068 goto cleanup; 1069 1070 if ((ss.st_mode & S_IWUSR) == 0) { 1071 ret = KSFT_SKIP; 1072 goto cleanup; 1073 } 1074 1075 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); 1076 1077 if (mem_peak_fd == -1) 1078 goto cleanup; 1079 1080 if (cg_read_long(memcg, "memory.swap.peak")) 1081 goto cleanup; 1082 1083 if (cg_read_long_fd(swap_peak_fd)) 1084 goto cleanup; 1085 1086 /* switch the swap and mem fds into local-peak tracking mode*/ 1087 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1088 1089 if (peak_reset != sizeof(reset_string)) 1090 goto cleanup; 1091 1092 if (cg_read_long_fd(swap_peak_fd)) 1093 goto cleanup; 1094 1095 if (cg_read_long(memcg, "memory.peak")) 1096 goto cleanup; 1097 1098 if (cg_read_long_fd(mem_peak_fd)) 1099 goto cleanup; 1100 1101 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1102 if (peak_reset != sizeof(reset_string)) 1103 goto cleanup; 1104 1105 if (cg_read_long_fd(mem_peak_fd)) 1106 goto cleanup; 1107 1108 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 1109 goto cleanup; 1110 1111 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 1112 goto cleanup; 1113 1114 if (cg_write(memcg, "memory.swap.max", "30M")) 1115 goto cleanup; 1116 1117 if (cg_write(memcg, "memory.max", "30M")) 1118 goto cleanup; 1119 1120 /* Should be killed by OOM killer */ 1121 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1122 goto cleanup; 1123 1124 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1125 goto cleanup; 1126 1127 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1128 goto cleanup; 1129 1130 peak = cg_read_long(memcg, "memory.peak"); 1131 if (peak < MB(29)) 1132 goto cleanup; 1133 1134 peak = cg_read_long(memcg, "memory.swap.peak"); 1135 if (peak < MB(29)) 1136 goto cleanup; 1137 1138 peak = cg_read_long_fd(mem_peak_fd); 1139 if (peak < MB(29)) 1140 goto cleanup; 1141 1142 peak = cg_read_long_fd(swap_peak_fd); 1143 if (peak < MB(29)) 1144 goto cleanup; 1145 1146 /* 1147 * open, reset and close the peak swap on another FD to make sure 1148 * multiple extant fds don't corrupt the linked-list 1149 */ 1150 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); 1151 if (peak_reset) 1152 goto cleanup; 1153 1154 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); 1155 if (peak_reset) 1156 goto cleanup; 1157 1158 /* actually reset on the fds */ 1159 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); 1160 if (peak_reset != sizeof(reset_string)) 1161 goto cleanup; 1162 1163 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); 1164 if (peak_reset != sizeof(reset_string)) 1165 goto cleanup; 1166 1167 peak = cg_read_long_fd(swap_peak_fd); 1168 if (peak > MB(10)) 1169 goto cleanup; 1170 1171 /* 1172 * The cgroup is now empty, but there may be a page or two associated 1173 * with the open FD accounted to it. 1174 */ 1175 peak = cg_read_long_fd(mem_peak_fd); 1176 if (peak > MB(1)) 1177 goto cleanup; 1178 1179 if (cg_read_long(memcg, "memory.peak") < MB(29)) 1180 goto cleanup; 1181 1182 if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) 1183 goto cleanup; 1184 1185 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 1186 goto cleanup; 1187 1188 max = cg_read_key_long(memcg, "memory.events", "max "); 1189 if (max <= 0) 1190 goto cleanup; 1191 1192 peak = cg_read_long(memcg, "memory.peak"); 1193 if (peak < MB(29)) 1194 goto cleanup; 1195 1196 peak = cg_read_long(memcg, "memory.swap.peak"); 1197 if (peak < MB(29)) 1198 goto cleanup; 1199 1200 peak = cg_read_long_fd(mem_peak_fd); 1201 if (peak < MB(29)) 1202 goto cleanup; 1203 1204 peak = cg_read_long_fd(swap_peak_fd); 1205 if (peak < MB(19)) 1206 goto cleanup; 1207 1208 ret = KSFT_PASS; 1209 1210 cleanup: 1211 if (mem_peak_fd != -1 && close(mem_peak_fd)) 1212 ret = KSFT_FAIL; 1213 if (swap_peak_fd != -1 && close(swap_peak_fd)) 1214 ret = KSFT_FAIL; 1215 cg_destroy(memcg); 1216 free(memcg); 1217 1218 return ret; 1219 } 1220 1221 /* 1222 * This test disables swapping and tries to allocate anonymous memory 1223 * up to OOM. Then it checks for oom and oom_kill events in 1224 * memory.events. 1225 */ 1226 static int test_memcg_oom_events(const char *root) 1227 { 1228 int ret = KSFT_FAIL; 1229 char *memcg; 1230 1231 memcg = cg_name(root, "memcg_test"); 1232 if (!memcg) 1233 goto cleanup; 1234 1235 if (cg_create(memcg)) 1236 goto cleanup; 1237 1238 if (cg_write(memcg, "memory.max", "30M")) 1239 goto cleanup; 1240 1241 if (cg_write(memcg, "memory.swap.max", "0")) 1242 goto cleanup; 1243 1244 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1245 goto cleanup; 1246 1247 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 1248 goto cleanup; 1249 1250 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 1251 goto cleanup; 1252 1253 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1254 goto cleanup; 1255 1256 ret = KSFT_PASS; 1257 1258 cleanup: 1259 cg_destroy(memcg); 1260 free(memcg); 1261 1262 return ret; 1263 } 1264 1265 struct tcp_server_args { 1266 unsigned short port; 1267 int ctl[2]; 1268 }; 1269 1270 static int tcp_server(const char *cgroup, void *arg) 1271 { 1272 struct tcp_server_args *srv_args = arg; 1273 struct sockaddr_in6 saddr = { 0 }; 1274 socklen_t slen = sizeof(saddr); 1275 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1276 1277 close(srv_args->ctl[0]); 1278 ctl_fd = srv_args->ctl[1]; 1279 1280 saddr.sin6_family = AF_INET6; 1281 saddr.sin6_addr = in6addr_any; 1282 saddr.sin6_port = htons(srv_args->port); 1283 1284 sk = socket(AF_INET6, SOCK_STREAM, 0); 1285 if (sk < 0) { 1286 /* Pass back errno to the ctl_fd */ 1287 write(ctl_fd, &errno, sizeof(errno)); 1288 return ret; 1289 } 1290 1291 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1292 goto cleanup; 1293 1294 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1295 write(ctl_fd, &errno, sizeof(errno)); 1296 goto cleanup; 1297 } 1298 1299 if (listen(sk, 1)) 1300 goto cleanup; 1301 1302 ret = 0; 1303 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1304 ret = -1; 1305 goto cleanup; 1306 } 1307 1308 client_sk = accept(sk, NULL, NULL); 1309 if (client_sk < 0) 1310 goto cleanup; 1311 1312 ret = -1; 1313 for (;;) { 1314 uint8_t buf[0x100000]; 1315 1316 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1317 if (errno == ECONNRESET) 1318 ret = 0; 1319 break; 1320 } 1321 } 1322 1323 close(client_sk); 1324 1325 cleanup: 1326 close(sk); 1327 return ret; 1328 } 1329 1330 static int tcp_client(const char *cgroup, unsigned short port) 1331 { 1332 const char server[] = "localhost"; 1333 struct addrinfo *ai; 1334 char servport[6]; 1335 int retries = 0x10; /* nice round number */ 1336 int sk, ret; 1337 long allocated; 1338 1339 allocated = cg_read_long(cgroup, "memory.current"); 1340 snprintf(servport, sizeof(servport), "%hd", port); 1341 ret = getaddrinfo(server, servport, NULL, &ai); 1342 if (ret) 1343 return ret; 1344 1345 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1346 if (sk < 0) 1347 goto free_ainfo; 1348 1349 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1350 if (ret < 0) 1351 goto close_sk; 1352 1353 ret = KSFT_FAIL; 1354 while (retries--) { 1355 uint8_t buf[0x100000]; 1356 long current, sock; 1357 1358 if (read(sk, buf, sizeof(buf)) <= 0) 1359 goto close_sk; 1360 1361 current = cg_read_long(cgroup, "memory.current"); 1362 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1363 1364 if (current < 0 || sock < 0) 1365 goto close_sk; 1366 1367 /* exclude the memory not related to socket connection */ 1368 if (values_close(current - allocated, sock, 10)) { 1369 ret = KSFT_PASS; 1370 break; 1371 } 1372 } 1373 1374 close_sk: 1375 close(sk); 1376 free_ainfo: 1377 freeaddrinfo(ai); 1378 return ret; 1379 } 1380 1381 /* 1382 * This test checks socket memory accounting. 1383 * The test forks a TCP server listens on a random port between 1000 1384 * and 61000. Once it gets a client connection, it starts writing to 1385 * its socket. 1386 * The TCP client interleaves reads from the socket with check whether 1387 * memory.current and memory.stat.sock are similar. 1388 */ 1389 static int test_memcg_sock(const char *root) 1390 { 1391 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1392 unsigned short port; 1393 char *memcg; 1394 long sock_post = -1; 1395 1396 memcg = cg_name(root, "memcg_test"); 1397 if (!memcg) 1398 goto cleanup; 1399 1400 if (cg_create(memcg)) 1401 goto cleanup; 1402 1403 while (bind_retries--) { 1404 struct tcp_server_args args; 1405 1406 if (pipe(args.ctl)) 1407 goto cleanup; 1408 1409 port = args.port = 1000 + rand() % 60000; 1410 1411 pid = cg_run_nowait(memcg, tcp_server, &args); 1412 if (pid < 0) 1413 goto cleanup; 1414 1415 close(args.ctl[1]); 1416 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1417 goto cleanup; 1418 close(args.ctl[0]); 1419 1420 /* Skip if address family not supported by protocol */ 1421 if (err == EAFNOSUPPORT) { 1422 ret = KSFT_SKIP; 1423 goto cleanup; 1424 } 1425 1426 if (!err) 1427 break; 1428 if (err != EADDRINUSE) 1429 goto cleanup; 1430 1431 waitpid(pid, NULL, 0); 1432 } 1433 1434 if (err == EADDRINUSE) { 1435 ret = KSFT_SKIP; 1436 goto cleanup; 1437 } 1438 1439 if (tcp_client(memcg, port) != KSFT_PASS) 1440 goto cleanup; 1441 1442 waitpid(pid, &err, 0); 1443 if (WEXITSTATUS(err)) 1444 goto cleanup; 1445 1446 if (cg_read_long(memcg, "memory.current") < 0) 1447 goto cleanup; 1448 1449 /* 1450 * memory.stat is updated asynchronously via the memcg rstat 1451 * flushing worker, which runs periodically (every 2 seconds, 1452 * see FLUSH_TIME). On a busy system, the "sock " counter may 1453 * stay non-zero for a short period of time after the TCP 1454 * connection is closed and all socket memory has been 1455 * uncharged. 1456 * 1457 * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some 1458 * scheduling slack) and require that the "sock " counter 1459 * eventually drops to zero. 1460 */ 1461 sock_post = cg_read_key_long_poll(memcg, "memory.stat", "sock ", 0, 1462 MEMCG_SOCKSTAT_WAIT_RETRIES, 1463 DEFAULT_WAIT_INTERVAL_US); 1464 if (sock_post) 1465 goto cleanup; 1466 1467 ret = KSFT_PASS; 1468 1469 cleanup: 1470 cg_destroy(memcg); 1471 free(memcg); 1472 1473 return ret; 1474 } 1475 1476 /* 1477 * This test disables swapping and tries to allocate anonymous memory 1478 * up to OOM with memory.group.oom set. Then it checks that all 1479 * processes in the leaf were killed. It also checks that oom_events 1480 * were propagated to the parent level. 1481 */ 1482 static int test_memcg_oom_group_leaf_events(const char *root) 1483 { 1484 int ret = KSFT_FAIL; 1485 char *parent, *child; 1486 long parent_oom_events; 1487 1488 parent = cg_name(root, "memcg_test_0"); 1489 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1490 1491 if (!parent || !child) 1492 goto cleanup; 1493 1494 if (cg_create(parent)) 1495 goto cleanup; 1496 1497 if (cg_create(child)) 1498 goto cleanup; 1499 1500 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1501 goto cleanup; 1502 1503 if (cg_write(child, "memory.max", "50M")) 1504 goto cleanup; 1505 1506 if (cg_write(child, "memory.swap.max", "0")) 1507 goto cleanup; 1508 1509 if (cg_write(child, "memory.oom.group", "1")) 1510 goto cleanup; 1511 1512 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1513 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1514 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1515 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1516 goto cleanup; 1517 1518 if (cg_test_proc_killed(child)) 1519 goto cleanup; 1520 1521 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1522 goto cleanup; 1523 1524 parent_oom_events = cg_read_key_long( 1525 parent, "memory.events", "oom_kill "); 1526 /* 1527 * If memory_localevents is not enabled (the default), the parent should 1528 * count OOM events in its children groups. Otherwise, it should not 1529 * have observed any events. 1530 */ 1531 if (has_localevents && parent_oom_events != 0) 1532 goto cleanup; 1533 else if (!has_localevents && parent_oom_events <= 0) 1534 goto cleanup; 1535 1536 ret = KSFT_PASS; 1537 1538 cleanup: 1539 if (child) 1540 cg_destroy(child); 1541 if (parent) 1542 cg_destroy(parent); 1543 free(child); 1544 free(parent); 1545 1546 return ret; 1547 } 1548 1549 /* 1550 * This test disables swapping and tries to allocate anonymous memory 1551 * up to OOM with memory.group.oom set. Then it checks that all 1552 * processes in the parent and leaf were killed. 1553 */ 1554 static int test_memcg_oom_group_parent_events(const char *root) 1555 { 1556 int ret = KSFT_FAIL; 1557 char *parent, *child; 1558 1559 parent = cg_name(root, "memcg_test_0"); 1560 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1561 1562 if (!parent || !child) 1563 goto cleanup; 1564 1565 if (cg_create(parent)) 1566 goto cleanup; 1567 1568 if (cg_create(child)) 1569 goto cleanup; 1570 1571 if (cg_write(parent, "memory.max", "80M")) 1572 goto cleanup; 1573 1574 if (cg_write(parent, "memory.swap.max", "0")) 1575 goto cleanup; 1576 1577 if (cg_write(parent, "memory.oom.group", "1")) 1578 goto cleanup; 1579 1580 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1581 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1582 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1583 1584 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1585 goto cleanup; 1586 1587 if (cg_test_proc_killed(child)) 1588 goto cleanup; 1589 if (cg_test_proc_killed(parent)) 1590 goto cleanup; 1591 1592 ret = KSFT_PASS; 1593 1594 cleanup: 1595 if (child) 1596 cg_destroy(child); 1597 if (parent) 1598 cg_destroy(parent); 1599 free(child); 1600 free(parent); 1601 1602 return ret; 1603 } 1604 1605 /* 1606 * This test disables swapping and tries to allocate anonymous memory 1607 * up to OOM with memory.group.oom set. Then it checks that all 1608 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1609 */ 1610 static int test_memcg_oom_group_score_events(const char *root) 1611 { 1612 int ret = KSFT_FAIL; 1613 char *memcg; 1614 int safe_pid; 1615 1616 memcg = cg_name(root, "memcg_test_0"); 1617 1618 if (!memcg) 1619 goto cleanup; 1620 1621 if (cg_create(memcg)) 1622 goto cleanup; 1623 1624 if (cg_write(memcg, "memory.max", "50M")) 1625 goto cleanup; 1626 1627 if (cg_write(memcg, "memory.swap.max", "0")) 1628 goto cleanup; 1629 1630 if (cg_write(memcg, "memory.oom.group", "1")) 1631 goto cleanup; 1632 1633 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1634 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1635 goto cleanup; 1636 1637 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1638 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1639 goto cleanup; 1640 1641 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1642 goto cleanup; 1643 1644 if (kill(safe_pid, SIGKILL)) 1645 goto cleanup; 1646 1647 ret = KSFT_PASS; 1648 1649 cleanup: 1650 if (memcg) 1651 cg_destroy(memcg); 1652 free(memcg); 1653 1654 return ret; 1655 } 1656 1657 static int read_event(int inotify_fd, int expected_event, int expected_wd) 1658 { 1659 struct inotify_event event; 1660 ssize_t len = 0; 1661 1662 len = read(inotify_fd, &event, sizeof(event)); 1663 if (len < (ssize_t)sizeof(event)) 1664 return -1; 1665 1666 if (event.mask != expected_event || event.wd != expected_wd) { 1667 fprintf(stderr, 1668 "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n", 1669 event.mask, expected_event, event.wd, expected_wd); 1670 return -1; 1671 } 1672 1673 return 0; 1674 } 1675 1676 static int test_memcg_inotify_delete_file(const char *root) 1677 { 1678 int ret = KSFT_FAIL; 1679 char *memcg = NULL; 1680 int fd, wd; 1681 1682 memcg = cg_name(root, "memcg_test_0"); 1683 1684 if (!memcg) 1685 goto cleanup; 1686 1687 if (cg_create(memcg)) 1688 goto cleanup; 1689 1690 fd = inotify_init1(0); 1691 if (fd == -1) 1692 goto cleanup; 1693 1694 wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF); 1695 if (wd == -1) 1696 goto cleanup; 1697 1698 if (cg_destroy(memcg)) 1699 goto cleanup; 1700 free(memcg); 1701 memcg = NULL; 1702 1703 if (read_event(fd, IN_DELETE_SELF, wd)) 1704 goto cleanup; 1705 1706 if (read_event(fd, IN_IGNORED, wd)) 1707 goto cleanup; 1708 1709 ret = KSFT_PASS; 1710 1711 cleanup: 1712 if (fd >= 0) 1713 close(fd); 1714 if (memcg) 1715 cg_destroy(memcg); 1716 free(memcg); 1717 1718 return ret; 1719 } 1720 1721 static int test_memcg_inotify_delete_dir(const char *root) 1722 { 1723 int ret = KSFT_FAIL; 1724 char *memcg = NULL; 1725 int fd, wd; 1726 1727 memcg = cg_name(root, "memcg_test_0"); 1728 1729 if (!memcg) 1730 goto cleanup; 1731 1732 if (cg_create(memcg)) 1733 goto cleanup; 1734 1735 fd = inotify_init1(0); 1736 if (fd == -1) 1737 goto cleanup; 1738 1739 wd = inotify_add_watch(fd, memcg, IN_DELETE_SELF); 1740 if (wd == -1) 1741 goto cleanup; 1742 1743 if (cg_destroy(memcg)) 1744 goto cleanup; 1745 free(memcg); 1746 memcg = NULL; 1747 1748 if (read_event(fd, IN_DELETE_SELF, wd)) 1749 goto cleanup; 1750 1751 if (read_event(fd, IN_IGNORED, wd)) 1752 goto cleanup; 1753 1754 ret = KSFT_PASS; 1755 1756 cleanup: 1757 if (fd >= 0) 1758 close(fd); 1759 if (memcg) 1760 cg_destroy(memcg); 1761 free(memcg); 1762 1763 return ret; 1764 } 1765 1766 #define T(x) { x, #x } 1767 struct memcg_test { 1768 int (*fn)(const char *root); 1769 const char *name; 1770 } tests[] = { 1771 T(test_memcg_subtree_control), 1772 T(test_memcg_current_peak), 1773 T(test_memcg_min), 1774 T(test_memcg_low), 1775 T(test_memcg_high), 1776 T(test_memcg_high_sync), 1777 T(test_memcg_max), 1778 T(test_memcg_reclaim), 1779 T(test_memcg_oom_events), 1780 T(test_memcg_swap_max_peak), 1781 T(test_memcg_sock), 1782 T(test_memcg_oom_group_leaf_events), 1783 T(test_memcg_oom_group_parent_events), 1784 T(test_memcg_oom_group_score_events), 1785 T(test_memcg_inotify_delete_file), 1786 T(test_memcg_inotify_delete_dir), 1787 }; 1788 #undef T 1789 1790 int main(int argc, char **argv) 1791 { 1792 char root[PATH_MAX]; 1793 int i, proc_status; 1794 1795 page_size = sysconf(_SC_PAGE_SIZE); 1796 if (page_size <= 0) 1797 page_size = BUF_SIZE; 1798 1799 ksft_print_header(); 1800 ksft_set_plan(ARRAY_SIZE(tests)); 1801 if (cg_find_unified_root(root, sizeof(root), NULL)) 1802 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1803 1804 /* 1805 * Check that memory controller is available: 1806 * memory is listed in cgroup.controllers 1807 */ 1808 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1809 ksft_exit_skip("memory controller isn't available\n"); 1810 1811 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1812 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1813 ksft_exit_skip("Failed to set memory controller\n"); 1814 1815 proc_status = proc_mount_contains("memory_recursiveprot"); 1816 if (proc_status < 0) 1817 ksft_exit_skip("Failed to query cgroup mount option\n"); 1818 has_recursiveprot = proc_status; 1819 1820 proc_status = proc_mount_contains("memory_localevents"); 1821 if (proc_status < 0) 1822 ksft_exit_skip("Failed to query cgroup mount option\n"); 1823 has_localevents = proc_status; 1824 1825 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1826 switch (tests[i].fn(root)) { 1827 case KSFT_PASS: 1828 ksft_test_result_pass("%s\n", tests[i].name); 1829 break; 1830 case KSFT_SKIP: 1831 ksft_test_result_skip("%s\n", tests[i].name); 1832 break; 1833 default: 1834 ksft_test_result_fail("%s\n", tests[i].name); 1835 break; 1836 } 1837 } 1838 1839 ksft_finished(); 1840 } 1841