1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../../clone3/clone3_selftests.h" 21 22 bool cg_test_v1_named; 23 24 /* Returns read len on success, or -errno on failure. */ 25 ssize_t read_text(const char *path, char *buf, size_t max_len) 26 { 27 ssize_t len; 28 int fd; 29 30 fd = open(path, O_RDONLY); 31 if (fd < 0) 32 return -errno; 33 34 len = read(fd, buf, max_len - 1); 35 36 if (len >= 0) 37 buf[len] = 0; 38 39 close(fd); 40 return len < 0 ? -errno : len; 41 } 42 43 /* Returns written len on success, or -errno on failure. */ 44 ssize_t write_text(const char *path, char *buf, ssize_t len) 45 { 46 int fd; 47 48 fd = open(path, O_WRONLY | O_APPEND); 49 if (fd < 0) 50 return -errno; 51 52 len = write(fd, buf, len); 53 close(fd); 54 return len < 0 ? -errno : len; 55 } 56 57 char *cg_name(const char *root, const char *name) 58 { 59 size_t len = strlen(root) + strlen(name) + 2; 60 char *ret = malloc(len); 61 62 snprintf(ret, len, "%s/%s", root, name); 63 64 return ret; 65 } 66 67 char *cg_name_indexed(const char *root, const char *name, int index) 68 { 69 size_t len = strlen(root) + strlen(name) + 10; 70 char *ret = malloc(len); 71 72 snprintf(ret, len, "%s/%s_%d", root, name, index); 73 74 return ret; 75 } 76 77 char *cg_control(const char *cgroup, const char *control) 78 { 79 size_t len = strlen(cgroup) + strlen(control) + 2; 80 char *ret = malloc(len); 81 82 snprintf(ret, len, "%s/%s", cgroup, control); 83 84 return ret; 85 } 86 87 /* Returns 0 on success, or -errno on failure. */ 88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 89 { 90 char path[PATH_MAX]; 91 ssize_t ret; 92 93 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 94 95 ret = read_text(path, buf, len); 96 return ret >= 0 ? 0 : ret; 97 } 98 99 int cg_read_strcmp(const char *cgroup, const char *control, 100 const char *expected) 101 { 102 size_t size; 103 char *buf; 104 int ret; 105 106 /* Handle the case of comparing against empty string */ 107 if (!expected) 108 return -1; 109 110 /* needs size > 1, otherwise cg_read() reads 0 bytes */ 111 size = (expected[0] == '\0') ? 2 : strlen(expected) + 1; 112 113 buf = malloc(size); 114 if (!buf) 115 return -1; 116 117 if (cg_read(cgroup, control, buf, size)) { 118 free(buf); 119 return -1; 120 } 121 122 ret = strcmp(expected, buf); 123 free(buf); 124 return ret; 125 } 126 127 int cg_read_strcmp_wait(const char *cgroup, const char *control, 128 const char *expected) 129 { 130 int i, ret; 131 132 for (i = 0; i < 100; i++) { 133 ret = cg_read_strcmp(cgroup, control, expected); 134 if (!ret) 135 return ret; 136 usleep(10000); 137 } 138 139 return ret; 140 } 141 142 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 143 { 144 char buf[PAGE_SIZE]; 145 146 if (cg_read(cgroup, control, buf, sizeof(buf))) 147 return -1; 148 149 return strstr(buf, needle) ? 0 : -1; 150 } 151 152 long cg_read_long(const char *cgroup, const char *control) 153 { 154 char buf[128]; 155 156 if (cg_read(cgroup, control, buf, sizeof(buf))) 157 return -1; 158 159 return atol(buf); 160 } 161 162 long cg_read_long_fd(int fd) 163 { 164 char buf[128]; 165 166 if (pread(fd, buf, sizeof(buf), 0) <= 0) 167 return -1; 168 169 return atol(buf); 170 } 171 172 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 173 { 174 char buf[PAGE_SIZE]; 175 char *ptr; 176 177 if (cg_read(cgroup, control, buf, sizeof(buf))) 178 return -1; 179 180 ptr = strstr(buf, key); 181 if (!ptr) 182 return -1; 183 184 return atol(ptr + strlen(key)); 185 } 186 187 long cg_read_key_long_poll(const char *cgroup, const char *control, 188 const char *key, long expected, int retries, 189 useconds_t wait_interval_us) 190 { 191 long val = -1; 192 int i; 193 194 for (i = 0; i < retries; i++) { 195 val = cg_read_key_long(cgroup, control, key); 196 if (val < 0) 197 return val; 198 199 if (val == expected) 200 break; 201 202 usleep(wait_interval_us); 203 } 204 205 return val; 206 } 207 208 long cg_read_lc(const char *cgroup, const char *control) 209 { 210 char buf[PAGE_SIZE]; 211 const char delim[] = "\n"; 212 char *line; 213 long cnt = 0; 214 215 if (cg_read(cgroup, control, buf, sizeof(buf))) 216 return -1; 217 218 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 219 cnt++; 220 221 return cnt; 222 } 223 224 /* Returns 0 on success, or -errno on failure. */ 225 int cg_write(const char *cgroup, const char *control, char *buf) 226 { 227 char path[PATH_MAX]; 228 ssize_t len = strlen(buf), ret; 229 230 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 231 ret = write_text(path, buf, len); 232 return ret == len ? 0 : ret; 233 } 234 235 /* 236 * Returns fd on success, or -1 on failure. 237 * (fd should be closed with close() as usual) 238 */ 239 int cg_open(const char *cgroup, const char *control, int flags) 240 { 241 char path[PATH_MAX]; 242 243 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 244 return open(path, flags); 245 } 246 247 int cg_write_numeric(const char *cgroup, const char *control, long value) 248 { 249 char buf[64]; 250 int ret; 251 252 ret = sprintf(buf, "%lu", value); 253 if (ret < 0) 254 return ret; 255 256 return cg_write(cgroup, control, buf); 257 } 258 259 static int cg_find_root(char *root, size_t len, const char *controller, 260 bool *nsdelegate) 261 { 262 char buf[10 * PAGE_SIZE]; 263 char *fs, *mount, *type, *options; 264 const char delim[] = "\n\t "; 265 266 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 267 return -1; 268 269 /* 270 * Example: 271 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 272 */ 273 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 274 mount = strtok(NULL, delim); 275 type = strtok(NULL, delim); 276 options = strtok(NULL, delim); 277 strtok(NULL, delim); 278 strtok(NULL, delim); 279 if (strcmp(type, "cgroup") == 0) { 280 if (!controller || !strstr(options, controller)) 281 continue; 282 } else if (strcmp(type, "cgroup2") == 0) { 283 if (controller && 284 cg_read_strstr(mount, "cgroup.controllers", controller)) 285 continue; 286 } else { 287 continue; 288 } 289 strncpy(root, mount, len); 290 291 if (nsdelegate) 292 *nsdelegate = !!strstr(options, "nsdelegate"); 293 return 0; 294 295 } 296 297 return -1; 298 } 299 300 int cg_find_controller_root(char *root, size_t len, const char *controller) 301 { 302 return cg_find_root(root, len, controller, NULL); 303 } 304 305 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) 306 { 307 return cg_find_root(root, len, NULL, nsdelegate); 308 } 309 310 int cg_create(const char *cgroup) 311 { 312 return mkdir(cgroup, 0755); 313 } 314 315 int cg_wait_for_proc_count(const char *cgroup, int count) 316 { 317 char buf[10 * PAGE_SIZE] = {0}; 318 int attempts; 319 char *ptr; 320 321 for (attempts = 10; attempts >= 0; attempts--) { 322 int nr = 0; 323 324 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 325 break; 326 327 for (ptr = buf; *ptr; ptr++) 328 if (*ptr == '\n') 329 nr++; 330 331 if (nr >= count) 332 return 0; 333 334 usleep(100000); 335 } 336 337 return -1; 338 } 339 340 int cg_killall(const char *cgroup) 341 { 342 char buf[PAGE_SIZE]; 343 char *ptr = buf; 344 345 /* If cgroup.kill exists use it. */ 346 if (!cg_write(cgroup, "cgroup.kill", "1")) 347 return 0; 348 349 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 350 return -1; 351 352 while (ptr < buf + sizeof(buf)) { 353 int pid = strtol(ptr, &ptr, 10); 354 355 if (pid == 0) 356 break; 357 if (*ptr) 358 ptr++; 359 else 360 break; 361 if (kill(pid, SIGKILL)) 362 return -1; 363 } 364 365 return 0; 366 } 367 368 int cg_destroy(const char *cgroup) 369 { 370 int ret; 371 372 if (!cgroup) 373 return 0; 374 retry: 375 ret = rmdir(cgroup); 376 if (ret && errno == EBUSY) { 377 cg_killall(cgroup); 378 usleep(100); 379 goto retry; 380 } 381 382 if (ret && errno == ENOENT) 383 ret = 0; 384 385 return ret; 386 } 387 388 int cg_enter(const char *cgroup, int pid) 389 { 390 char pidbuf[64]; 391 392 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 393 return cg_write(cgroup, "cgroup.procs", pidbuf); 394 } 395 396 int cg_enter_current(const char *cgroup) 397 { 398 return cg_write(cgroup, "cgroup.procs", "0"); 399 } 400 401 int cg_enter_current_thread(const char *cgroup) 402 { 403 return cg_write(cgroup, CG_THREADS_FILE, "0"); 404 } 405 406 int cg_run(const char *cgroup, 407 int (*fn)(const char *cgroup, void *arg), 408 void *arg) 409 { 410 int pid, retcode; 411 412 pid = fork(); 413 if (pid < 0) { 414 return pid; 415 } else if (pid == 0) { 416 char buf[64]; 417 418 snprintf(buf, sizeof(buf), "%d", getpid()); 419 if (cg_write(cgroup, "cgroup.procs", buf)) 420 exit(EXIT_FAILURE); 421 exit(fn(cgroup, arg)); 422 } else { 423 waitpid(pid, &retcode, 0); 424 if (WIFEXITED(retcode)) 425 return WEXITSTATUS(retcode); 426 else 427 return -1; 428 } 429 } 430 431 pid_t clone_into_cgroup(int cgroup_fd) 432 { 433 #ifdef CLONE_ARGS_SIZE_VER2 434 pid_t pid; 435 436 struct __clone_args args = { 437 .flags = CLONE_INTO_CGROUP, 438 .exit_signal = SIGCHLD, 439 .cgroup = cgroup_fd, 440 }; 441 442 pid = sys_clone3(&args, sizeof(struct __clone_args)); 443 /* 444 * Verify that this is a genuine test failure: 445 * ENOSYS -> clone3() not available 446 * E2BIG -> CLONE_INTO_CGROUP not available 447 */ 448 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 449 goto pretend_enosys; 450 451 return pid; 452 453 pretend_enosys: 454 #endif 455 errno = ENOSYS; 456 return -ENOSYS; 457 } 458 459 int clone_reap(pid_t pid, int options) 460 { 461 int ret; 462 siginfo_t info = { 463 .si_signo = 0, 464 }; 465 466 again: 467 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 468 if (ret < 0) { 469 if (errno == EINTR) 470 goto again; 471 return -1; 472 } 473 474 if (options & WEXITED) { 475 if (WIFEXITED(info.si_status)) 476 return WEXITSTATUS(info.si_status); 477 } 478 479 if (options & WSTOPPED) { 480 if (WIFSTOPPED(info.si_status)) 481 return WSTOPSIG(info.si_status); 482 } 483 484 if (options & WCONTINUED) { 485 if (WIFCONTINUED(info.si_status)) 486 return 0; 487 } 488 489 return -1; 490 } 491 492 int dirfd_open_opath(const char *dir) 493 { 494 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 495 } 496 497 #define close_prot_errno(fd) \ 498 if (fd >= 0) { \ 499 int _e_ = errno; \ 500 close(fd); \ 501 errno = _e_; \ 502 } 503 504 static int clone_into_cgroup_run_nowait(const char *cgroup, 505 int (*fn)(const char *cgroup, void *arg), 506 void *arg) 507 { 508 int cgroup_fd; 509 pid_t pid; 510 511 cgroup_fd = dirfd_open_opath(cgroup); 512 if (cgroup_fd < 0) 513 return -1; 514 515 pid = clone_into_cgroup(cgroup_fd); 516 close_prot_errno(cgroup_fd); 517 if (pid == 0) 518 exit(fn(cgroup, arg)); 519 520 return pid; 521 } 522 523 int cg_run_nowait(const char *cgroup, 524 int (*fn)(const char *cgroup, void *arg), 525 void *arg) 526 { 527 int pid; 528 529 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 530 if (pid > 0) 531 return pid; 532 533 /* Genuine test failure. */ 534 if (pid < 0 && errno != ENOSYS) 535 return -1; 536 537 pid = fork(); 538 if (pid == 0) { 539 char buf[64]; 540 541 snprintf(buf, sizeof(buf), "%d", getpid()); 542 if (cg_write(cgroup, "cgroup.procs", buf)) 543 exit(EXIT_FAILURE); 544 exit(fn(cgroup, arg)); 545 } 546 547 return pid; 548 } 549 550 int proc_mount_contains(const char *option) 551 { 552 char buf[4 * PAGE_SIZE]; 553 ssize_t read; 554 555 read = read_text("/proc/mounts", buf, sizeof(buf)); 556 if (read < 0) 557 return read; 558 559 return strstr(buf, option) != NULL; 560 } 561 562 int cgroup_feature(const char *feature) 563 { 564 char buf[PAGE_SIZE]; 565 ssize_t read; 566 567 read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); 568 if (read < 0) 569 return read; 570 571 return strstr(buf, feature) != NULL; 572 } 573 574 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 575 { 576 char path[PATH_MAX]; 577 ssize_t ret; 578 579 if (!pid) 580 snprintf(path, sizeof(path), "/proc/%s/%s", 581 thread ? "thread-self" : "self", item); 582 else 583 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 584 585 ret = read_text(path, buf, size); 586 return ret < 0 ? -1 : ret; 587 } 588 589 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 590 { 591 char buf[PAGE_SIZE]; 592 593 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 594 return -1; 595 596 return strstr(buf, needle) ? 0 : -1; 597 } 598 599 int clone_into_cgroup_run_wait(const char *cgroup) 600 { 601 int cgroup_fd; 602 pid_t pid; 603 604 cgroup_fd = dirfd_open_opath(cgroup); 605 if (cgroup_fd < 0) 606 return -1; 607 608 pid = clone_into_cgroup(cgroup_fd); 609 close_prot_errno(cgroup_fd); 610 if (pid < 0) 611 return -1; 612 613 if (pid == 0) 614 exit(EXIT_SUCCESS); 615 616 /* 617 * We don't care whether this fails. We only care whether the initial 618 * clone succeeded. 619 */ 620 (void)clone_reap(pid, WEXITED); 621 return 0; 622 } 623 624 static int __prepare_for_wait(const char *cgroup, const char *filename) 625 { 626 int fd, ret = -1; 627 628 fd = inotify_init1(0); 629 if (fd == -1) 630 return fd; 631 632 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 633 if (ret == -1) { 634 close(fd); 635 fd = -1; 636 } 637 638 return fd; 639 } 640 641 int cg_prepare_for_wait(const char *cgroup) 642 { 643 return __prepare_for_wait(cgroup, "cgroup.events"); 644 } 645 646 int memcg_prepare_for_wait(const char *cgroup) 647 { 648 return __prepare_for_wait(cgroup, "memory.events"); 649 } 650 651 int cg_wait_for(int fd) 652 { 653 int ret = -1; 654 struct pollfd fds = { 655 .fd = fd, 656 .events = POLLIN, 657 }; 658 659 while (true) { 660 ret = poll(&fds, 1, 10000); 661 662 if (ret == -1) { 663 if (errno == EINTR) 664 continue; 665 666 break; 667 } 668 669 if (ret > 0 && fds.revents & POLLIN) { 670 ret = 0; 671 break; 672 } 673 } 674 675 return ret; 676 } 677