1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../../clone3/clone3_selftests.h" 21 22 bool cg_test_v1_named; 23 24 /* Returns read len on success, or -errno on failure. */ 25 ssize_t read_text(const char *path, char *buf, size_t max_len) 26 { 27 ssize_t len; 28 int fd; 29 30 fd = open(path, O_RDONLY); 31 if (fd < 0) 32 return -errno; 33 34 len = read(fd, buf, max_len - 1); 35 36 if (len >= 0) 37 buf[len] = 0; 38 39 close(fd); 40 return len < 0 ? -errno : len; 41 } 42 43 /* Returns written len on success, or -errno on failure. */ 44 ssize_t write_text(const char *path, char *buf, ssize_t len) 45 { 46 int fd; 47 48 fd = open(path, O_WRONLY | O_APPEND); 49 if (fd < 0) 50 return -errno; 51 52 len = write(fd, buf, len); 53 close(fd); 54 return len < 0 ? -errno : len; 55 } 56 57 char *cg_name(const char *root, const char *name) 58 { 59 size_t len = strlen(root) + strlen(name) + 2; 60 char *ret = malloc(len); 61 62 snprintf(ret, len, "%s/%s", root, name); 63 64 return ret; 65 } 66 67 char *cg_name_indexed(const char *root, const char *name, int index) 68 { 69 size_t len = strlen(root) + strlen(name) + 10; 70 char *ret = malloc(len); 71 72 snprintf(ret, len, "%s/%s_%d", root, name, index); 73 74 return ret; 75 } 76 77 char *cg_control(const char *cgroup, const char *control) 78 { 79 size_t len = strlen(cgroup) + strlen(control) + 2; 80 char *ret = malloc(len); 81 82 snprintf(ret, len, "%s/%s", cgroup, control); 83 84 return ret; 85 } 86 87 /* Returns 0 on success, or -errno on failure. */ 88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 89 { 90 char path[PATH_MAX]; 91 ssize_t ret; 92 93 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 94 95 ret = read_text(path, buf, len); 96 return ret >= 0 ? 0 : ret; 97 } 98 99 int cg_read_strcmp(const char *cgroup, const char *control, 100 const char *expected) 101 { 102 size_t size; 103 char *buf; 104 int ret; 105 106 /* Handle the case of comparing against empty string */ 107 if (!expected) 108 return -1; 109 else 110 size = strlen(expected) + 1; 111 112 buf = malloc(size); 113 if (!buf) 114 return -1; 115 116 if (cg_read(cgroup, control, buf, size)) { 117 free(buf); 118 return -1; 119 } 120 121 ret = strcmp(expected, buf); 122 free(buf); 123 return ret; 124 } 125 126 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 127 { 128 char buf[PAGE_SIZE]; 129 130 if (cg_read(cgroup, control, buf, sizeof(buf))) 131 return -1; 132 133 return strstr(buf, needle) ? 0 : -1; 134 } 135 136 long cg_read_long(const char *cgroup, const char *control) 137 { 138 char buf[128]; 139 140 if (cg_read(cgroup, control, buf, sizeof(buf))) 141 return -1; 142 143 return atol(buf); 144 } 145 146 long cg_read_long_fd(int fd) 147 { 148 char buf[128]; 149 150 if (pread(fd, buf, sizeof(buf), 0) <= 0) 151 return -1; 152 153 return atol(buf); 154 } 155 156 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 157 { 158 char buf[PAGE_SIZE]; 159 char *ptr; 160 161 if (cg_read(cgroup, control, buf, sizeof(buf))) 162 return -1; 163 164 ptr = strstr(buf, key); 165 if (!ptr) 166 return -1; 167 168 return atol(ptr + strlen(key)); 169 } 170 171 long cg_read_key_long_poll(const char *cgroup, const char *control, 172 const char *key, long expected, int retries, 173 useconds_t wait_interval_us) 174 { 175 long val = -1; 176 int i; 177 178 for (i = 0; i < retries; i++) { 179 val = cg_read_key_long(cgroup, control, key); 180 if (val < 0) 181 return val; 182 183 if (val == expected) 184 break; 185 186 usleep(wait_interval_us); 187 } 188 189 return val; 190 } 191 192 long cg_read_lc(const char *cgroup, const char *control) 193 { 194 char buf[PAGE_SIZE]; 195 const char delim[] = "\n"; 196 char *line; 197 long cnt = 0; 198 199 if (cg_read(cgroup, control, buf, sizeof(buf))) 200 return -1; 201 202 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 203 cnt++; 204 205 return cnt; 206 } 207 208 /* Returns 0 on success, or -errno on failure. */ 209 int cg_write(const char *cgroup, const char *control, char *buf) 210 { 211 char path[PATH_MAX]; 212 ssize_t len = strlen(buf), ret; 213 214 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 215 ret = write_text(path, buf, len); 216 return ret == len ? 0 : ret; 217 } 218 219 /* 220 * Returns fd on success, or -1 on failure. 221 * (fd should be closed with close() as usual) 222 */ 223 int cg_open(const char *cgroup, const char *control, int flags) 224 { 225 char path[PATH_MAX]; 226 227 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 228 return open(path, flags); 229 } 230 231 int cg_write_numeric(const char *cgroup, const char *control, long value) 232 { 233 char buf[64]; 234 int ret; 235 236 ret = sprintf(buf, "%lu", value); 237 if (ret < 0) 238 return ret; 239 240 return cg_write(cgroup, control, buf); 241 } 242 243 static int cg_find_root(char *root, size_t len, const char *controller, 244 bool *nsdelegate) 245 { 246 char buf[10 * PAGE_SIZE]; 247 char *fs, *mount, *type, *options; 248 const char delim[] = "\n\t "; 249 250 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 251 return -1; 252 253 /* 254 * Example: 255 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 256 */ 257 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 258 mount = strtok(NULL, delim); 259 type = strtok(NULL, delim); 260 options = strtok(NULL, delim); 261 strtok(NULL, delim); 262 strtok(NULL, delim); 263 if (strcmp(type, "cgroup") == 0) { 264 if (!controller || !strstr(options, controller)) 265 continue; 266 } else if (strcmp(type, "cgroup2") == 0) { 267 if (controller && 268 cg_read_strstr(mount, "cgroup.controllers", controller)) 269 continue; 270 } else { 271 continue; 272 } 273 strncpy(root, mount, len); 274 275 if (nsdelegate) 276 *nsdelegate = !!strstr(options, "nsdelegate"); 277 return 0; 278 279 } 280 281 return -1; 282 } 283 284 int cg_find_controller_root(char *root, size_t len, const char *controller) 285 { 286 return cg_find_root(root, len, controller, NULL); 287 } 288 289 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) 290 { 291 return cg_find_root(root, len, NULL, nsdelegate); 292 } 293 294 int cg_create(const char *cgroup) 295 { 296 return mkdir(cgroup, 0755); 297 } 298 299 int cg_wait_for_proc_count(const char *cgroup, int count) 300 { 301 char buf[10 * PAGE_SIZE] = {0}; 302 int attempts; 303 char *ptr; 304 305 for (attempts = 10; attempts >= 0; attempts--) { 306 int nr = 0; 307 308 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 309 break; 310 311 for (ptr = buf; *ptr; ptr++) 312 if (*ptr == '\n') 313 nr++; 314 315 if (nr >= count) 316 return 0; 317 318 usleep(100000); 319 } 320 321 return -1; 322 } 323 324 int cg_killall(const char *cgroup) 325 { 326 char buf[PAGE_SIZE]; 327 char *ptr = buf; 328 329 /* If cgroup.kill exists use it. */ 330 if (!cg_write(cgroup, "cgroup.kill", "1")) 331 return 0; 332 333 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 334 return -1; 335 336 while (ptr < buf + sizeof(buf)) { 337 int pid = strtol(ptr, &ptr, 10); 338 339 if (pid == 0) 340 break; 341 if (*ptr) 342 ptr++; 343 else 344 break; 345 if (kill(pid, SIGKILL)) 346 return -1; 347 } 348 349 return 0; 350 } 351 352 int cg_destroy(const char *cgroup) 353 { 354 int ret; 355 356 if (!cgroup) 357 return 0; 358 retry: 359 ret = rmdir(cgroup); 360 if (ret && errno == EBUSY) { 361 cg_killall(cgroup); 362 usleep(100); 363 goto retry; 364 } 365 366 if (ret && errno == ENOENT) 367 ret = 0; 368 369 return ret; 370 } 371 372 int cg_enter(const char *cgroup, int pid) 373 { 374 char pidbuf[64]; 375 376 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 377 return cg_write(cgroup, "cgroup.procs", pidbuf); 378 } 379 380 int cg_enter_current(const char *cgroup) 381 { 382 return cg_write(cgroup, "cgroup.procs", "0"); 383 } 384 385 int cg_enter_current_thread(const char *cgroup) 386 { 387 return cg_write(cgroup, CG_THREADS_FILE, "0"); 388 } 389 390 int cg_run(const char *cgroup, 391 int (*fn)(const char *cgroup, void *arg), 392 void *arg) 393 { 394 int pid, retcode; 395 396 pid = fork(); 397 if (pid < 0) { 398 return pid; 399 } else if (pid == 0) { 400 char buf[64]; 401 402 snprintf(buf, sizeof(buf), "%d", getpid()); 403 if (cg_write(cgroup, "cgroup.procs", buf)) 404 exit(EXIT_FAILURE); 405 exit(fn(cgroup, arg)); 406 } else { 407 waitpid(pid, &retcode, 0); 408 if (WIFEXITED(retcode)) 409 return WEXITSTATUS(retcode); 410 else 411 return -1; 412 } 413 } 414 415 pid_t clone_into_cgroup(int cgroup_fd) 416 { 417 #ifdef CLONE_ARGS_SIZE_VER2 418 pid_t pid; 419 420 struct __clone_args args = { 421 .flags = CLONE_INTO_CGROUP, 422 .exit_signal = SIGCHLD, 423 .cgroup = cgroup_fd, 424 }; 425 426 pid = sys_clone3(&args, sizeof(struct __clone_args)); 427 /* 428 * Verify that this is a genuine test failure: 429 * ENOSYS -> clone3() not available 430 * E2BIG -> CLONE_INTO_CGROUP not available 431 */ 432 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 433 goto pretend_enosys; 434 435 return pid; 436 437 pretend_enosys: 438 #endif 439 errno = ENOSYS; 440 return -ENOSYS; 441 } 442 443 int clone_reap(pid_t pid, int options) 444 { 445 int ret; 446 siginfo_t info = { 447 .si_signo = 0, 448 }; 449 450 again: 451 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 452 if (ret < 0) { 453 if (errno == EINTR) 454 goto again; 455 return -1; 456 } 457 458 if (options & WEXITED) { 459 if (WIFEXITED(info.si_status)) 460 return WEXITSTATUS(info.si_status); 461 } 462 463 if (options & WSTOPPED) { 464 if (WIFSTOPPED(info.si_status)) 465 return WSTOPSIG(info.si_status); 466 } 467 468 if (options & WCONTINUED) { 469 if (WIFCONTINUED(info.si_status)) 470 return 0; 471 } 472 473 return -1; 474 } 475 476 int dirfd_open_opath(const char *dir) 477 { 478 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 479 } 480 481 #define close_prot_errno(fd) \ 482 if (fd >= 0) { \ 483 int _e_ = errno; \ 484 close(fd); \ 485 errno = _e_; \ 486 } 487 488 static int clone_into_cgroup_run_nowait(const char *cgroup, 489 int (*fn)(const char *cgroup, void *arg), 490 void *arg) 491 { 492 int cgroup_fd; 493 pid_t pid; 494 495 cgroup_fd = dirfd_open_opath(cgroup); 496 if (cgroup_fd < 0) 497 return -1; 498 499 pid = clone_into_cgroup(cgroup_fd); 500 close_prot_errno(cgroup_fd); 501 if (pid == 0) 502 exit(fn(cgroup, arg)); 503 504 return pid; 505 } 506 507 int cg_run_nowait(const char *cgroup, 508 int (*fn)(const char *cgroup, void *arg), 509 void *arg) 510 { 511 int pid; 512 513 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 514 if (pid > 0) 515 return pid; 516 517 /* Genuine test failure. */ 518 if (pid < 0 && errno != ENOSYS) 519 return -1; 520 521 pid = fork(); 522 if (pid == 0) { 523 char buf[64]; 524 525 snprintf(buf, sizeof(buf), "%d", getpid()); 526 if (cg_write(cgroup, "cgroup.procs", buf)) 527 exit(EXIT_FAILURE); 528 exit(fn(cgroup, arg)); 529 } 530 531 return pid; 532 } 533 534 int proc_mount_contains(const char *option) 535 { 536 char buf[4 * PAGE_SIZE]; 537 ssize_t read; 538 539 read = read_text("/proc/mounts", buf, sizeof(buf)); 540 if (read < 0) 541 return read; 542 543 return strstr(buf, option) != NULL; 544 } 545 546 int cgroup_feature(const char *feature) 547 { 548 char buf[PAGE_SIZE]; 549 ssize_t read; 550 551 read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); 552 if (read < 0) 553 return read; 554 555 return strstr(buf, feature) != NULL; 556 } 557 558 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 559 { 560 char path[PATH_MAX]; 561 ssize_t ret; 562 563 if (!pid) 564 snprintf(path, sizeof(path), "/proc/%s/%s", 565 thread ? "thread-self" : "self", item); 566 else 567 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 568 569 ret = read_text(path, buf, size); 570 return ret < 0 ? -1 : ret; 571 } 572 573 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 574 { 575 char buf[PAGE_SIZE]; 576 577 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 578 return -1; 579 580 return strstr(buf, needle) ? 0 : -1; 581 } 582 583 int clone_into_cgroup_run_wait(const char *cgroup) 584 { 585 int cgroup_fd; 586 pid_t pid; 587 588 cgroup_fd = dirfd_open_opath(cgroup); 589 if (cgroup_fd < 0) 590 return -1; 591 592 pid = clone_into_cgroup(cgroup_fd); 593 close_prot_errno(cgroup_fd); 594 if (pid < 0) 595 return -1; 596 597 if (pid == 0) 598 exit(EXIT_SUCCESS); 599 600 /* 601 * We don't care whether this fails. We only care whether the initial 602 * clone succeeded. 603 */ 604 (void)clone_reap(pid, WEXITED); 605 return 0; 606 } 607 608 static int __prepare_for_wait(const char *cgroup, const char *filename) 609 { 610 int fd, ret = -1; 611 612 fd = inotify_init1(0); 613 if (fd == -1) 614 return fd; 615 616 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 617 if (ret == -1) { 618 close(fd); 619 fd = -1; 620 } 621 622 return fd; 623 } 624 625 int cg_prepare_for_wait(const char *cgroup) 626 { 627 return __prepare_for_wait(cgroup, "cgroup.events"); 628 } 629 630 int memcg_prepare_for_wait(const char *cgroup) 631 { 632 return __prepare_for_wait(cgroup, "memory.events"); 633 } 634 635 int cg_wait_for(int fd) 636 { 637 int ret = -1; 638 struct pollfd fds = { 639 .fd = fd, 640 .events = POLLIN, 641 }; 642 643 while (true) { 644 ret = poll(&fds, 1, 10000); 645 646 if (ret == -1) { 647 if (errno == EINTR) 648 continue; 649 650 break; 651 } 652 653 if (ret > 0 && fds.revents & POLLIN) { 654 ret = 0; 655 break; 656 } 657 } 658 659 return ret; 660 } 661