1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../../clone3/clone3_selftests.h" 21 22 bool cg_test_v1_named; 23 24 /* Returns read len on success, or -errno on failure. */ 25 ssize_t read_text(const char *path, char *buf, size_t max_len) 26 { 27 ssize_t len; 28 int fd; 29 30 fd = open(path, O_RDONLY); 31 if (fd < 0) 32 return -errno; 33 34 len = read(fd, buf, max_len - 1); 35 36 if (len >= 0) 37 buf[len] = 0; 38 39 close(fd); 40 return len < 0 ? -errno : len; 41 } 42 43 /* Returns written len on success, or -errno on failure. */ 44 ssize_t write_text(const char *path, char *buf, ssize_t len) 45 { 46 int fd; 47 48 fd = open(path, O_WRONLY | O_APPEND); 49 if (fd < 0) 50 return -errno; 51 52 len = write(fd, buf, len); 53 close(fd); 54 return len < 0 ? -errno : len; 55 } 56 57 char *cg_name(const char *root, const char *name) 58 { 59 size_t len = strlen(root) + strlen(name) + 2; 60 char *ret = malloc(len); 61 62 snprintf(ret, len, "%s/%s", root, name); 63 64 return ret; 65 } 66 67 char *cg_name_indexed(const char *root, const char *name, int index) 68 { 69 size_t len = strlen(root) + strlen(name) + 10; 70 char *ret = malloc(len); 71 72 snprintf(ret, len, "%s/%s_%d", root, name, index); 73 74 return ret; 75 } 76 77 char *cg_control(const char *cgroup, const char *control) 78 { 79 size_t len = strlen(cgroup) + strlen(control) + 2; 80 char *ret = malloc(len); 81 82 snprintf(ret, len, "%s/%s", cgroup, control); 83 84 return ret; 85 } 86 87 /* Returns 0 on success, or -errno on failure. */ 88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 89 { 90 char path[PATH_MAX]; 91 ssize_t ret; 92 93 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 94 95 ret = read_text(path, buf, len); 96 return ret >= 0 ? 0 : ret; 97 } 98 99 int cg_read_strcmp(const char *cgroup, const char *control, 100 const char *expected) 101 { 102 size_t size; 103 char *buf; 104 int ret; 105 106 /* Handle the case of comparing against empty string */ 107 if (!expected) 108 return -1; 109 else 110 size = strlen(expected) + 1; 111 112 buf = malloc(size); 113 if (!buf) 114 return -1; 115 116 if (cg_read(cgroup, control, buf, size)) { 117 free(buf); 118 return -1; 119 } 120 121 ret = strcmp(expected, buf); 122 free(buf); 123 return ret; 124 } 125 126 int cg_read_strcmp_wait(const char *cgroup, const char *control, 127 const char *expected) 128 { 129 int i, ret; 130 131 for (i = 0; i < 100; i++) { 132 ret = cg_read_strcmp(cgroup, control, expected); 133 if (!ret) 134 return ret; 135 usleep(10000); 136 } 137 138 return ret; 139 } 140 141 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 142 { 143 char buf[PAGE_SIZE]; 144 145 if (cg_read(cgroup, control, buf, sizeof(buf))) 146 return -1; 147 148 return strstr(buf, needle) ? 0 : -1; 149 } 150 151 long cg_read_long(const char *cgroup, const char *control) 152 { 153 char buf[128]; 154 155 if (cg_read(cgroup, control, buf, sizeof(buf))) 156 return -1; 157 158 return atol(buf); 159 } 160 161 long cg_read_long_fd(int fd) 162 { 163 char buf[128]; 164 165 if (pread(fd, buf, sizeof(buf), 0) <= 0) 166 return -1; 167 168 return atol(buf); 169 } 170 171 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 172 { 173 char buf[PAGE_SIZE]; 174 char *ptr; 175 176 if (cg_read(cgroup, control, buf, sizeof(buf))) 177 return -1; 178 179 ptr = strstr(buf, key); 180 if (!ptr) 181 return -1; 182 183 return atol(ptr + strlen(key)); 184 } 185 186 long cg_read_key_long_poll(const char *cgroup, const char *control, 187 const char *key, long expected, int retries, 188 useconds_t wait_interval_us) 189 { 190 long val = -1; 191 int i; 192 193 for (i = 0; i < retries; i++) { 194 val = cg_read_key_long(cgroup, control, key); 195 if (val < 0) 196 return val; 197 198 if (val == expected) 199 break; 200 201 usleep(wait_interval_us); 202 } 203 204 return val; 205 } 206 207 long cg_read_lc(const char *cgroup, const char *control) 208 { 209 char buf[PAGE_SIZE]; 210 const char delim[] = "\n"; 211 char *line; 212 long cnt = 0; 213 214 if (cg_read(cgroup, control, buf, sizeof(buf))) 215 return -1; 216 217 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 218 cnt++; 219 220 return cnt; 221 } 222 223 /* Returns 0 on success, or -errno on failure. */ 224 int cg_write(const char *cgroup, const char *control, char *buf) 225 { 226 char path[PATH_MAX]; 227 ssize_t len = strlen(buf), ret; 228 229 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 230 ret = write_text(path, buf, len); 231 return ret == len ? 0 : ret; 232 } 233 234 /* 235 * Returns fd on success, or -1 on failure. 236 * (fd should be closed with close() as usual) 237 */ 238 int cg_open(const char *cgroup, const char *control, int flags) 239 { 240 char path[PATH_MAX]; 241 242 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 243 return open(path, flags); 244 } 245 246 int cg_write_numeric(const char *cgroup, const char *control, long value) 247 { 248 char buf[64]; 249 int ret; 250 251 ret = sprintf(buf, "%lu", value); 252 if (ret < 0) 253 return ret; 254 255 return cg_write(cgroup, control, buf); 256 } 257 258 static int cg_find_root(char *root, size_t len, const char *controller, 259 bool *nsdelegate) 260 { 261 char buf[10 * PAGE_SIZE]; 262 char *fs, *mount, *type, *options; 263 const char delim[] = "\n\t "; 264 265 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 266 return -1; 267 268 /* 269 * Example: 270 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 271 */ 272 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 273 mount = strtok(NULL, delim); 274 type = strtok(NULL, delim); 275 options = strtok(NULL, delim); 276 strtok(NULL, delim); 277 strtok(NULL, delim); 278 if (strcmp(type, "cgroup") == 0) { 279 if (!controller || !strstr(options, controller)) 280 continue; 281 } else if (strcmp(type, "cgroup2") == 0) { 282 if (controller && 283 cg_read_strstr(mount, "cgroup.controllers", controller)) 284 continue; 285 } else { 286 continue; 287 } 288 strncpy(root, mount, len); 289 290 if (nsdelegate) 291 *nsdelegate = !!strstr(options, "nsdelegate"); 292 return 0; 293 294 } 295 296 return -1; 297 } 298 299 int cg_find_controller_root(char *root, size_t len, const char *controller) 300 { 301 return cg_find_root(root, len, controller, NULL); 302 } 303 304 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) 305 { 306 return cg_find_root(root, len, NULL, nsdelegate); 307 } 308 309 int cg_create(const char *cgroup) 310 { 311 return mkdir(cgroup, 0755); 312 } 313 314 int cg_wait_for_proc_count(const char *cgroup, int count) 315 { 316 char buf[10 * PAGE_SIZE] = {0}; 317 int attempts; 318 char *ptr; 319 320 for (attempts = 10; attempts >= 0; attempts--) { 321 int nr = 0; 322 323 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 324 break; 325 326 for (ptr = buf; *ptr; ptr++) 327 if (*ptr == '\n') 328 nr++; 329 330 if (nr >= count) 331 return 0; 332 333 usleep(100000); 334 } 335 336 return -1; 337 } 338 339 int cg_killall(const char *cgroup) 340 { 341 char buf[PAGE_SIZE]; 342 char *ptr = buf; 343 344 /* If cgroup.kill exists use it. */ 345 if (!cg_write(cgroup, "cgroup.kill", "1")) 346 return 0; 347 348 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 349 return -1; 350 351 while (ptr < buf + sizeof(buf)) { 352 int pid = strtol(ptr, &ptr, 10); 353 354 if (pid == 0) 355 break; 356 if (*ptr) 357 ptr++; 358 else 359 break; 360 if (kill(pid, SIGKILL)) 361 return -1; 362 } 363 364 return 0; 365 } 366 367 int cg_destroy(const char *cgroup) 368 { 369 int ret; 370 371 if (!cgroup) 372 return 0; 373 retry: 374 ret = rmdir(cgroup); 375 if (ret && errno == EBUSY) { 376 cg_killall(cgroup); 377 usleep(100); 378 goto retry; 379 } 380 381 if (ret && errno == ENOENT) 382 ret = 0; 383 384 return ret; 385 } 386 387 int cg_enter(const char *cgroup, int pid) 388 { 389 char pidbuf[64]; 390 391 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 392 return cg_write(cgroup, "cgroup.procs", pidbuf); 393 } 394 395 int cg_enter_current(const char *cgroup) 396 { 397 return cg_write(cgroup, "cgroup.procs", "0"); 398 } 399 400 int cg_enter_current_thread(const char *cgroup) 401 { 402 return cg_write(cgroup, CG_THREADS_FILE, "0"); 403 } 404 405 int cg_run(const char *cgroup, 406 int (*fn)(const char *cgroup, void *arg), 407 void *arg) 408 { 409 int pid, retcode; 410 411 pid = fork(); 412 if (pid < 0) { 413 return pid; 414 } else if (pid == 0) { 415 char buf[64]; 416 417 snprintf(buf, sizeof(buf), "%d", getpid()); 418 if (cg_write(cgroup, "cgroup.procs", buf)) 419 exit(EXIT_FAILURE); 420 exit(fn(cgroup, arg)); 421 } else { 422 waitpid(pid, &retcode, 0); 423 if (WIFEXITED(retcode)) 424 return WEXITSTATUS(retcode); 425 else 426 return -1; 427 } 428 } 429 430 pid_t clone_into_cgroup(int cgroup_fd) 431 { 432 #ifdef CLONE_ARGS_SIZE_VER2 433 pid_t pid; 434 435 struct __clone_args args = { 436 .flags = CLONE_INTO_CGROUP, 437 .exit_signal = SIGCHLD, 438 .cgroup = cgroup_fd, 439 }; 440 441 pid = sys_clone3(&args, sizeof(struct __clone_args)); 442 /* 443 * Verify that this is a genuine test failure: 444 * ENOSYS -> clone3() not available 445 * E2BIG -> CLONE_INTO_CGROUP not available 446 */ 447 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 448 goto pretend_enosys; 449 450 return pid; 451 452 pretend_enosys: 453 #endif 454 errno = ENOSYS; 455 return -ENOSYS; 456 } 457 458 int clone_reap(pid_t pid, int options) 459 { 460 int ret; 461 siginfo_t info = { 462 .si_signo = 0, 463 }; 464 465 again: 466 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 467 if (ret < 0) { 468 if (errno == EINTR) 469 goto again; 470 return -1; 471 } 472 473 if (options & WEXITED) { 474 if (WIFEXITED(info.si_status)) 475 return WEXITSTATUS(info.si_status); 476 } 477 478 if (options & WSTOPPED) { 479 if (WIFSTOPPED(info.si_status)) 480 return WSTOPSIG(info.si_status); 481 } 482 483 if (options & WCONTINUED) { 484 if (WIFCONTINUED(info.si_status)) 485 return 0; 486 } 487 488 return -1; 489 } 490 491 int dirfd_open_opath(const char *dir) 492 { 493 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 494 } 495 496 #define close_prot_errno(fd) \ 497 if (fd >= 0) { \ 498 int _e_ = errno; \ 499 close(fd); \ 500 errno = _e_; \ 501 } 502 503 static int clone_into_cgroup_run_nowait(const char *cgroup, 504 int (*fn)(const char *cgroup, void *arg), 505 void *arg) 506 { 507 int cgroup_fd; 508 pid_t pid; 509 510 cgroup_fd = dirfd_open_opath(cgroup); 511 if (cgroup_fd < 0) 512 return -1; 513 514 pid = clone_into_cgroup(cgroup_fd); 515 close_prot_errno(cgroup_fd); 516 if (pid == 0) 517 exit(fn(cgroup, arg)); 518 519 return pid; 520 } 521 522 int cg_run_nowait(const char *cgroup, 523 int (*fn)(const char *cgroup, void *arg), 524 void *arg) 525 { 526 int pid; 527 528 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 529 if (pid > 0) 530 return pid; 531 532 /* Genuine test failure. */ 533 if (pid < 0 && errno != ENOSYS) 534 return -1; 535 536 pid = fork(); 537 if (pid == 0) { 538 char buf[64]; 539 540 snprintf(buf, sizeof(buf), "%d", getpid()); 541 if (cg_write(cgroup, "cgroup.procs", buf)) 542 exit(EXIT_FAILURE); 543 exit(fn(cgroup, arg)); 544 } 545 546 return pid; 547 } 548 549 int proc_mount_contains(const char *option) 550 { 551 char buf[4 * PAGE_SIZE]; 552 ssize_t read; 553 554 read = read_text("/proc/mounts", buf, sizeof(buf)); 555 if (read < 0) 556 return read; 557 558 return strstr(buf, option) != NULL; 559 } 560 561 int cgroup_feature(const char *feature) 562 { 563 char buf[PAGE_SIZE]; 564 ssize_t read; 565 566 read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); 567 if (read < 0) 568 return read; 569 570 return strstr(buf, feature) != NULL; 571 } 572 573 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 574 { 575 char path[PATH_MAX]; 576 ssize_t ret; 577 578 if (!pid) 579 snprintf(path, sizeof(path), "/proc/%s/%s", 580 thread ? "thread-self" : "self", item); 581 else 582 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 583 584 ret = read_text(path, buf, size); 585 return ret < 0 ? -1 : ret; 586 } 587 588 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 589 { 590 char buf[PAGE_SIZE]; 591 592 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 593 return -1; 594 595 return strstr(buf, needle) ? 0 : -1; 596 } 597 598 int clone_into_cgroup_run_wait(const char *cgroup) 599 { 600 int cgroup_fd; 601 pid_t pid; 602 603 cgroup_fd = dirfd_open_opath(cgroup); 604 if (cgroup_fd < 0) 605 return -1; 606 607 pid = clone_into_cgroup(cgroup_fd); 608 close_prot_errno(cgroup_fd); 609 if (pid < 0) 610 return -1; 611 612 if (pid == 0) 613 exit(EXIT_SUCCESS); 614 615 /* 616 * We don't care whether this fails. We only care whether the initial 617 * clone succeeded. 618 */ 619 (void)clone_reap(pid, WEXITED); 620 return 0; 621 } 622 623 static int __prepare_for_wait(const char *cgroup, const char *filename) 624 { 625 int fd, ret = -1; 626 627 fd = inotify_init1(0); 628 if (fd == -1) 629 return fd; 630 631 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 632 if (ret == -1) { 633 close(fd); 634 fd = -1; 635 } 636 637 return fd; 638 } 639 640 int cg_prepare_for_wait(const char *cgroup) 641 { 642 return __prepare_for_wait(cgroup, "cgroup.events"); 643 } 644 645 int memcg_prepare_for_wait(const char *cgroup) 646 { 647 return __prepare_for_wait(cgroup, "memory.events"); 648 } 649 650 int cg_wait_for(int fd) 651 { 652 int ret = -1; 653 struct pollfd fds = { 654 .fd = fd, 655 .events = POLLIN, 656 }; 657 658 while (true) { 659 ret = poll(&fds, 1, 10000); 660 661 if (ret == -1) { 662 if (errno == EINTR) 663 continue; 664 665 break; 666 } 667 668 if (ret > 0 && fds.revents & POLLIN) { 669 ret = 0; 670 break; 671 } 672 } 673 674 return ret; 675 } 676