1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../../clone3/clone3_selftests.h" 21 22 bool cg_test_v1_named; 23 24 /* Returns read len on success, or -errno on failure. */ 25 ssize_t read_text(const char *path, char *buf, size_t max_len) 26 { 27 ssize_t len; 28 int fd; 29 30 fd = open(path, O_RDONLY); 31 if (fd < 0) 32 return -errno; 33 34 len = read(fd, buf, max_len - 1); 35 36 if (len >= 0) 37 buf[len] = 0; 38 39 close(fd); 40 return len < 0 ? -errno : len; 41 } 42 43 /* Returns written len on success, or -errno on failure. */ 44 ssize_t write_text(const char *path, char *buf, ssize_t len) 45 { 46 int fd; 47 48 fd = open(path, O_WRONLY | O_APPEND); 49 if (fd < 0) 50 return -errno; 51 52 len = write(fd, buf, len); 53 close(fd); 54 return len < 0 ? -errno : len; 55 } 56 57 char *cg_name(const char *root, const char *name) 58 { 59 size_t len = strlen(root) + strlen(name) + 2; 60 char *ret = malloc(len); 61 62 if (ret) 63 snprintf(ret, len, "%s/%s", root, name); 64 65 return ret; 66 } 67 68 char *cg_name_indexed(const char *root, const char *name, int index) 69 { 70 size_t len = strlen(root) + strlen(name) + 10; 71 char *ret = malloc(len); 72 73 if (ret) 74 snprintf(ret, len, "%s/%s_%d", root, name, index); 75 76 return ret; 77 } 78 79 char *cg_control(const char *cgroup, const char *control) 80 { 81 size_t len = strlen(cgroup) + strlen(control) + 2; 82 char *ret = malloc(len); 83 84 if (ret) 85 snprintf(ret, len, "%s/%s", cgroup, control); 86 87 return ret; 88 } 89 90 /* Returns 0 on success, or -errno on failure. */ 91 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 92 { 93 char path[PATH_MAX]; 94 ssize_t ret; 95 96 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 97 98 ret = read_text(path, buf, len); 99 return ret >= 0 ? 0 : ret; 100 } 101 102 int cg_read_strcmp(const char *cgroup, const char *control, 103 const char *expected) 104 { 105 size_t size; 106 char *buf; 107 int ret; 108 109 /* Handle the case of comparing against empty string */ 110 if (!expected) 111 return -1; 112 113 /* needs size > 1, otherwise cg_read() reads 0 bytes */ 114 size = (expected[0] == '\0') ? 2 : strlen(expected) + 1; 115 116 buf = malloc(size); 117 if (!buf) 118 return -1; 119 120 if (cg_read(cgroup, control, buf, size)) { 121 free(buf); 122 return -1; 123 } 124 125 ret = strcmp(expected, buf); 126 free(buf); 127 return ret; 128 } 129 130 int cg_read_strcmp_wait(const char *cgroup, const char *control, 131 const char *expected) 132 { 133 int i, ret; 134 135 for (i = 0; i < 100; i++) { 136 ret = cg_read_strcmp(cgroup, control, expected); 137 if (!ret) 138 return ret; 139 usleep(10000); 140 } 141 142 return ret; 143 } 144 145 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 146 { 147 char buf[BUF_SIZE]; 148 149 if (cg_read(cgroup, control, buf, sizeof(buf))) 150 return -1; 151 152 return strstr(buf, needle) ? 0 : -1; 153 } 154 155 long cg_read_long(const char *cgroup, const char *control) 156 { 157 char buf[128]; 158 159 if (cg_read(cgroup, control, buf, sizeof(buf))) 160 return -1; 161 162 return atol(buf); 163 } 164 165 long cg_read_long_fd(int fd) 166 { 167 char buf[128]; 168 169 if (pread(fd, buf, sizeof(buf), 0) <= 0) 170 return -1; 171 172 return atol(buf); 173 } 174 175 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 176 { 177 char buf[BUF_SIZE]; 178 char *ptr; 179 180 if (cg_read(cgroup, control, buf, sizeof(buf))) 181 return -1; 182 183 ptr = strstr(buf, key); 184 if (!ptr) 185 return -1; 186 187 return atol(ptr + strlen(key)); 188 } 189 190 long cg_read_key_long_poll(const char *cgroup, const char *control, 191 const char *key, long expected, int retries, 192 useconds_t wait_interval_us) 193 { 194 long val = -1; 195 int i; 196 197 for (i = 0; i < retries; i++) { 198 val = cg_read_key_long(cgroup, control, key); 199 if (val < 0) 200 return val; 201 202 if (val == expected) 203 break; 204 205 usleep(wait_interval_us); 206 } 207 208 return val; 209 } 210 211 long cg_read_lc(const char *cgroup, const char *control) 212 { 213 char buf[BUF_SIZE]; 214 const char delim[] = "\n"; 215 char *line; 216 long cnt = 0; 217 218 if (cg_read(cgroup, control, buf, sizeof(buf))) 219 return -1; 220 221 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 222 cnt++; 223 224 return cnt; 225 } 226 227 /* Returns 0 on success, or -errno on failure. */ 228 int cg_write(const char *cgroup, const char *control, char *buf) 229 { 230 char path[PATH_MAX]; 231 ssize_t len = strlen(buf), ret; 232 233 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 234 ret = write_text(path, buf, len); 235 return ret == len ? 0 : ret; 236 } 237 238 /* 239 * Returns fd on success, or -1 on failure. 240 * (fd should be closed with close() as usual) 241 */ 242 int cg_open(const char *cgroup, const char *control, int flags) 243 { 244 char path[PATH_MAX]; 245 246 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 247 return open(path, flags); 248 } 249 250 int cg_write_numeric(const char *cgroup, const char *control, long value) 251 { 252 char buf[64]; 253 int ret; 254 255 ret = sprintf(buf, "%lu", value); 256 if (ret < 0) 257 return ret; 258 259 return cg_write(cgroup, control, buf); 260 } 261 262 static int cg_find_root(char *root, size_t len, const char *controller, 263 bool *nsdelegate) 264 { 265 char buf[10 * BUF_SIZE]; 266 char *fs, *mount, *type, *options; 267 const char delim[] = "\n\t "; 268 269 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 270 return -1; 271 272 /* 273 * Example: 274 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 275 */ 276 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 277 mount = strtok(NULL, delim); 278 type = strtok(NULL, delim); 279 options = strtok(NULL, delim); 280 strtok(NULL, delim); 281 strtok(NULL, delim); 282 if (strcmp(type, "cgroup") == 0) { 283 if (!controller || !strstr(options, controller)) 284 continue; 285 } else if (strcmp(type, "cgroup2") == 0) { 286 if (controller && 287 cg_read_strstr(mount, "cgroup.controllers", controller)) 288 continue; 289 } else { 290 continue; 291 } 292 strncpy(root, mount, len); 293 294 if (nsdelegate) 295 *nsdelegate = !!strstr(options, "nsdelegate"); 296 return 0; 297 298 } 299 300 return -1; 301 } 302 303 int cg_find_controller_root(char *root, size_t len, const char *controller) 304 { 305 return cg_find_root(root, len, controller, NULL); 306 } 307 308 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) 309 { 310 return cg_find_root(root, len, NULL, nsdelegate); 311 } 312 313 int cg_create(const char *cgroup) 314 { 315 return mkdir(cgroup, 0755); 316 } 317 318 int cg_wait_for_proc_count(const char *cgroup, int count) 319 { 320 char buf[10 * BUF_SIZE] = {0}; 321 int attempts; 322 char *ptr; 323 324 for (attempts = 10; attempts >= 0; attempts--) { 325 int nr = 0; 326 327 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 328 break; 329 330 for (ptr = buf; *ptr; ptr++) 331 if (*ptr == '\n') 332 nr++; 333 334 if (nr >= count) 335 return 0; 336 337 usleep(100000); 338 } 339 340 return -1; 341 } 342 343 int cg_killall(const char *cgroup) 344 { 345 char buf[BUF_SIZE]; 346 char *ptr = buf; 347 348 /* If cgroup.kill exists use it. */ 349 if (!cg_write(cgroup, "cgroup.kill", "1")) 350 return 0; 351 352 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 353 return -1; 354 355 while (ptr < buf + sizeof(buf)) { 356 int pid = strtol(ptr, &ptr, 10); 357 358 if (pid == 0) 359 break; 360 if (*ptr) 361 ptr++; 362 else 363 break; 364 if (kill(pid, SIGKILL)) 365 return -1; 366 } 367 368 return 0; 369 } 370 371 int cg_destroy(const char *cgroup) 372 { 373 int ret; 374 375 if (!cgroup) 376 return 0; 377 retry: 378 ret = rmdir(cgroup); 379 if (ret && errno == EBUSY) { 380 cg_killall(cgroup); 381 usleep(100); 382 goto retry; 383 } 384 385 if (ret && errno == ENOENT) 386 ret = 0; 387 388 return ret; 389 } 390 391 int cg_enter(const char *cgroup, int pid) 392 { 393 char pidbuf[64]; 394 395 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 396 return cg_write(cgroup, "cgroup.procs", pidbuf); 397 } 398 399 int cg_enter_current(const char *cgroup) 400 { 401 return cg_write(cgroup, "cgroup.procs", "0"); 402 } 403 404 int cg_enter_current_thread(const char *cgroup) 405 { 406 return cg_write(cgroup, CG_THREADS_FILE, "0"); 407 } 408 409 int cg_run(const char *cgroup, 410 int (*fn)(const char *cgroup, void *arg), 411 void *arg) 412 { 413 int pid, retcode; 414 415 pid = fork(); 416 if (pid < 0) { 417 return pid; 418 } else if (pid == 0) { 419 char buf[64]; 420 421 snprintf(buf, sizeof(buf), "%d", getpid()); 422 if (cg_write(cgroup, "cgroup.procs", buf)) 423 exit(EXIT_FAILURE); 424 exit(fn(cgroup, arg)); 425 } else { 426 waitpid(pid, &retcode, 0); 427 if (WIFEXITED(retcode)) 428 return WEXITSTATUS(retcode); 429 else 430 return -1; 431 } 432 } 433 434 pid_t clone_into_cgroup(int cgroup_fd) 435 { 436 #ifdef CLONE_ARGS_SIZE_VER2 437 pid_t pid; 438 439 struct __clone_args args = { 440 .flags = CLONE_INTO_CGROUP, 441 .exit_signal = SIGCHLD, 442 .cgroup = cgroup_fd, 443 }; 444 445 pid = sys_clone3(&args, sizeof(struct __clone_args)); 446 /* 447 * Verify that this is a genuine test failure: 448 * ENOSYS -> clone3() not available 449 * E2BIG -> CLONE_INTO_CGROUP not available 450 */ 451 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 452 goto pretend_enosys; 453 454 return pid; 455 456 pretend_enosys: 457 #endif 458 errno = ENOSYS; 459 return -ENOSYS; 460 } 461 462 int clone_reap(pid_t pid, int options) 463 { 464 int ret; 465 siginfo_t info = { 466 .si_signo = 0, 467 }; 468 469 again: 470 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 471 if (ret < 0) { 472 if (errno == EINTR) 473 goto again; 474 return -1; 475 } 476 477 if (options & WEXITED) { 478 if (WIFEXITED(info.si_status)) 479 return WEXITSTATUS(info.si_status); 480 } 481 482 if (options & WSTOPPED) { 483 if (WIFSTOPPED(info.si_status)) 484 return WSTOPSIG(info.si_status); 485 } 486 487 if (options & WCONTINUED) { 488 if (WIFCONTINUED(info.si_status)) 489 return 0; 490 } 491 492 return -1; 493 } 494 495 int dirfd_open_opath(const char *dir) 496 { 497 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 498 } 499 500 #define close_prot_errno(fd) \ 501 if (fd >= 0) { \ 502 int _e_ = errno; \ 503 close(fd); \ 504 errno = _e_; \ 505 } 506 507 static int clone_into_cgroup_run_nowait(const char *cgroup, 508 int (*fn)(const char *cgroup, void *arg), 509 void *arg) 510 { 511 int cgroup_fd; 512 pid_t pid; 513 514 cgroup_fd = dirfd_open_opath(cgroup); 515 if (cgroup_fd < 0) 516 return -1; 517 518 pid = clone_into_cgroup(cgroup_fd); 519 close_prot_errno(cgroup_fd); 520 if (pid == 0) 521 exit(fn(cgroup, arg)); 522 523 return pid; 524 } 525 526 int cg_run_nowait(const char *cgroup, 527 int (*fn)(const char *cgroup, void *arg), 528 void *arg) 529 { 530 int pid; 531 532 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 533 if (pid > 0) 534 return pid; 535 536 /* Genuine test failure. */ 537 if (pid < 0 && errno != ENOSYS) 538 return -1; 539 540 pid = fork(); 541 if (pid == 0) { 542 char buf[64]; 543 544 snprintf(buf, sizeof(buf), "%d", getpid()); 545 if (cg_write(cgroup, "cgroup.procs", buf)) 546 exit(EXIT_FAILURE); 547 exit(fn(cgroup, arg)); 548 } 549 550 return pid; 551 } 552 553 int proc_mount_contains(const char *option) 554 { 555 char buf[4 * BUF_SIZE]; 556 ssize_t read; 557 558 read = read_text("/proc/mounts", buf, sizeof(buf)); 559 if (read < 0) 560 return read; 561 562 return strstr(buf, option) != NULL; 563 } 564 565 int cgroup_feature(const char *feature) 566 { 567 char buf[BUF_SIZE]; 568 ssize_t read; 569 570 read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); 571 if (read < 0) 572 return read; 573 574 return strstr(buf, feature) != NULL; 575 } 576 577 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 578 { 579 char path[PATH_MAX]; 580 ssize_t ret; 581 582 if (!pid) 583 snprintf(path, sizeof(path), "/proc/%s/%s", 584 thread ? "thread-self" : "self", item); 585 else 586 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 587 588 ret = read_text(path, buf, size); 589 return ret < 0 ? -1 : ret; 590 } 591 592 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 593 { 594 char buf[BUF_SIZE]; 595 596 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 597 return -1; 598 599 return strstr(buf, needle) ? 0 : -1; 600 } 601 602 int clone_into_cgroup_run_wait(const char *cgroup) 603 { 604 int cgroup_fd; 605 pid_t pid; 606 607 cgroup_fd = dirfd_open_opath(cgroup); 608 if (cgroup_fd < 0) 609 return -1; 610 611 pid = clone_into_cgroup(cgroup_fd); 612 close_prot_errno(cgroup_fd); 613 if (pid < 0) 614 return -1; 615 616 if (pid == 0) 617 exit(EXIT_SUCCESS); 618 619 /* 620 * We don't care whether this fails. We only care whether the initial 621 * clone succeeded. 622 */ 623 (void)clone_reap(pid, WEXITED); 624 return 0; 625 } 626 627 static int __prepare_for_wait(const char *cgroup, const char *filename) 628 { 629 int fd, ret = -1; 630 631 fd = inotify_init1(0); 632 if (fd == -1) 633 return fd; 634 635 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 636 if (ret == -1) { 637 close(fd); 638 fd = -1; 639 } 640 641 return fd; 642 } 643 644 int cg_prepare_for_wait(const char *cgroup) 645 { 646 return __prepare_for_wait(cgroup, "cgroup.events"); 647 } 648 649 int memcg_prepare_for_wait(const char *cgroup) 650 { 651 return __prepare_for_wait(cgroup, "memory.events"); 652 } 653 654 int cg_wait_for(int fd) 655 { 656 int ret = -1; 657 struct pollfd fds = { 658 .fd = fd, 659 .events = POLLIN, 660 }; 661 662 while (true) { 663 ret = poll(&fds, 1, 10000); 664 665 if (ret == -1) { 666 if (errno == EINTR) 667 continue; 668 669 break; 670 } 671 672 if (ret > 0 && fds.revents & POLLIN) { 673 ret = 0; 674 break; 675 } 676 } 677 678 return ret; 679 } 680