1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../../clone3/clone3_selftests.h" 21 22 bool cg_test_v1_named; 23 24 /* Returns read len on success, or -errno on failure. */ 25 ssize_t read_text(const char *path, char *buf, size_t max_len) 26 { 27 ssize_t len; 28 int fd; 29 30 fd = open(path, O_RDONLY); 31 if (fd < 0) 32 return -errno; 33 34 len = read(fd, buf, max_len - 1); 35 36 if (len >= 0) 37 buf[len] = 0; 38 39 close(fd); 40 return len < 0 ? -errno : len; 41 } 42 43 /* Returns written len on success, or -errno on failure. */ 44 ssize_t write_text(const char *path, char *buf, ssize_t len) 45 { 46 int fd; 47 48 fd = open(path, O_WRONLY | O_APPEND); 49 if (fd < 0) 50 return -errno; 51 52 len = write(fd, buf, len); 53 close(fd); 54 return len < 0 ? -errno : len; 55 } 56 57 char *cg_name(const char *root, const char *name) 58 { 59 size_t len = strlen(root) + strlen(name) + 2; 60 char *ret = malloc(len); 61 62 snprintf(ret, len, "%s/%s", root, name); 63 64 return ret; 65 } 66 67 char *cg_name_indexed(const char *root, const char *name, int index) 68 { 69 size_t len = strlen(root) + strlen(name) + 10; 70 char *ret = malloc(len); 71 72 snprintf(ret, len, "%s/%s_%d", root, name, index); 73 74 return ret; 75 } 76 77 char *cg_control(const char *cgroup, const char *control) 78 { 79 size_t len = strlen(cgroup) + strlen(control) + 2; 80 char *ret = malloc(len); 81 82 snprintf(ret, len, "%s/%s", cgroup, control); 83 84 return ret; 85 } 86 87 /* Returns 0 on success, or -errno on failure. */ 88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 89 { 90 char path[PATH_MAX]; 91 ssize_t ret; 92 93 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 94 95 ret = read_text(path, buf, len); 96 return ret >= 0 ? 0 : ret; 97 } 98 99 int cg_read_strcmp(const char *cgroup, const char *control, 100 const char *expected) 101 { 102 size_t size; 103 char *buf; 104 int ret; 105 106 /* Handle the case of comparing against empty string */ 107 if (!expected) 108 return -1; 109 else 110 size = strlen(expected) + 1; 111 112 buf = malloc(size); 113 if (!buf) 114 return -1; 115 116 if (cg_read(cgroup, control, buf, size)) { 117 free(buf); 118 return -1; 119 } 120 121 ret = strcmp(expected, buf); 122 free(buf); 123 return ret; 124 } 125 126 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 127 { 128 char buf[PAGE_SIZE]; 129 130 if (cg_read(cgroup, control, buf, sizeof(buf))) 131 return -1; 132 133 return strstr(buf, needle) ? 0 : -1; 134 } 135 136 long cg_read_long(const char *cgroup, const char *control) 137 { 138 char buf[128]; 139 140 if (cg_read(cgroup, control, buf, sizeof(buf))) 141 return -1; 142 143 return atol(buf); 144 } 145 146 long cg_read_long_fd(int fd) 147 { 148 char buf[128]; 149 150 if (pread(fd, buf, sizeof(buf), 0) <= 0) 151 return -1; 152 153 return atol(buf); 154 } 155 156 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 157 { 158 char buf[PAGE_SIZE]; 159 char *ptr; 160 161 if (cg_read(cgroup, control, buf, sizeof(buf))) 162 return -1; 163 164 ptr = strstr(buf, key); 165 if (!ptr) 166 return -1; 167 168 return atol(ptr + strlen(key)); 169 } 170 171 long cg_read_lc(const char *cgroup, const char *control) 172 { 173 char buf[PAGE_SIZE]; 174 const char delim[] = "\n"; 175 char *line; 176 long cnt = 0; 177 178 if (cg_read(cgroup, control, buf, sizeof(buf))) 179 return -1; 180 181 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 182 cnt++; 183 184 return cnt; 185 } 186 187 /* Returns 0 on success, or -errno on failure. */ 188 int cg_write(const char *cgroup, const char *control, char *buf) 189 { 190 char path[PATH_MAX]; 191 ssize_t len = strlen(buf), ret; 192 193 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 194 ret = write_text(path, buf, len); 195 return ret == len ? 0 : ret; 196 } 197 198 /* 199 * Returns fd on success, or -1 on failure. 200 * (fd should be closed with close() as usual) 201 */ 202 int cg_open(const char *cgroup, const char *control, int flags) 203 { 204 char path[PATH_MAX]; 205 206 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 207 return open(path, flags); 208 } 209 210 int cg_write_numeric(const char *cgroup, const char *control, long value) 211 { 212 char buf[64]; 213 int ret; 214 215 ret = sprintf(buf, "%lu", value); 216 if (ret < 0) 217 return ret; 218 219 return cg_write(cgroup, control, buf); 220 } 221 222 static int cg_find_root(char *root, size_t len, const char *controller, 223 bool *nsdelegate) 224 { 225 char buf[10 * PAGE_SIZE]; 226 char *fs, *mount, *type, *options; 227 const char delim[] = "\n\t "; 228 229 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 230 return -1; 231 232 /* 233 * Example: 234 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 235 */ 236 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 237 mount = strtok(NULL, delim); 238 type = strtok(NULL, delim); 239 options = strtok(NULL, delim); 240 strtok(NULL, delim); 241 strtok(NULL, delim); 242 if (strcmp(type, "cgroup") == 0) { 243 if (!controller || !strstr(options, controller)) 244 continue; 245 } else if (strcmp(type, "cgroup2") == 0) { 246 if (controller && 247 cg_read_strstr(mount, "cgroup.controllers", controller)) 248 continue; 249 } else { 250 continue; 251 } 252 strncpy(root, mount, len); 253 254 if (nsdelegate) 255 *nsdelegate = !!strstr(options, "nsdelegate"); 256 return 0; 257 258 } 259 260 return -1; 261 } 262 263 int cg_find_controller_root(char *root, size_t len, const char *controller) 264 { 265 return cg_find_root(root, len, controller, NULL); 266 } 267 268 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) 269 { 270 return cg_find_root(root, len, NULL, nsdelegate); 271 } 272 273 int cg_create(const char *cgroup) 274 { 275 return mkdir(cgroup, 0755); 276 } 277 278 int cg_wait_for_proc_count(const char *cgroup, int count) 279 { 280 char buf[10 * PAGE_SIZE] = {0}; 281 int attempts; 282 char *ptr; 283 284 for (attempts = 10; attempts >= 0; attempts--) { 285 int nr = 0; 286 287 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 288 break; 289 290 for (ptr = buf; *ptr; ptr++) 291 if (*ptr == '\n') 292 nr++; 293 294 if (nr >= count) 295 return 0; 296 297 usleep(100000); 298 } 299 300 return -1; 301 } 302 303 int cg_killall(const char *cgroup) 304 { 305 char buf[PAGE_SIZE]; 306 char *ptr = buf; 307 308 /* If cgroup.kill exists use it. */ 309 if (!cg_write(cgroup, "cgroup.kill", "1")) 310 return 0; 311 312 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 313 return -1; 314 315 while (ptr < buf + sizeof(buf)) { 316 int pid = strtol(ptr, &ptr, 10); 317 318 if (pid == 0) 319 break; 320 if (*ptr) 321 ptr++; 322 else 323 break; 324 if (kill(pid, SIGKILL)) 325 return -1; 326 } 327 328 return 0; 329 } 330 331 int cg_destroy(const char *cgroup) 332 { 333 int ret; 334 335 if (!cgroup) 336 return 0; 337 retry: 338 ret = rmdir(cgroup); 339 if (ret && errno == EBUSY) { 340 cg_killall(cgroup); 341 usleep(100); 342 goto retry; 343 } 344 345 if (ret && errno == ENOENT) 346 ret = 0; 347 348 return ret; 349 } 350 351 int cg_enter(const char *cgroup, int pid) 352 { 353 char pidbuf[64]; 354 355 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 356 return cg_write(cgroup, "cgroup.procs", pidbuf); 357 } 358 359 int cg_enter_current(const char *cgroup) 360 { 361 return cg_write(cgroup, "cgroup.procs", "0"); 362 } 363 364 int cg_enter_current_thread(const char *cgroup) 365 { 366 return cg_write(cgroup, CG_THREADS_FILE, "0"); 367 } 368 369 int cg_run(const char *cgroup, 370 int (*fn)(const char *cgroup, void *arg), 371 void *arg) 372 { 373 int pid, retcode; 374 375 pid = fork(); 376 if (pid < 0) { 377 return pid; 378 } else if (pid == 0) { 379 char buf[64]; 380 381 snprintf(buf, sizeof(buf), "%d", getpid()); 382 if (cg_write(cgroup, "cgroup.procs", buf)) 383 exit(EXIT_FAILURE); 384 exit(fn(cgroup, arg)); 385 } else { 386 waitpid(pid, &retcode, 0); 387 if (WIFEXITED(retcode)) 388 return WEXITSTATUS(retcode); 389 else 390 return -1; 391 } 392 } 393 394 pid_t clone_into_cgroup(int cgroup_fd) 395 { 396 #ifdef CLONE_ARGS_SIZE_VER2 397 pid_t pid; 398 399 struct __clone_args args = { 400 .flags = CLONE_INTO_CGROUP, 401 .exit_signal = SIGCHLD, 402 .cgroup = cgroup_fd, 403 }; 404 405 pid = sys_clone3(&args, sizeof(struct __clone_args)); 406 /* 407 * Verify that this is a genuine test failure: 408 * ENOSYS -> clone3() not available 409 * E2BIG -> CLONE_INTO_CGROUP not available 410 */ 411 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 412 goto pretend_enosys; 413 414 return pid; 415 416 pretend_enosys: 417 #endif 418 errno = ENOSYS; 419 return -ENOSYS; 420 } 421 422 int clone_reap(pid_t pid, int options) 423 { 424 int ret; 425 siginfo_t info = { 426 .si_signo = 0, 427 }; 428 429 again: 430 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 431 if (ret < 0) { 432 if (errno == EINTR) 433 goto again; 434 return -1; 435 } 436 437 if (options & WEXITED) { 438 if (WIFEXITED(info.si_status)) 439 return WEXITSTATUS(info.si_status); 440 } 441 442 if (options & WSTOPPED) { 443 if (WIFSTOPPED(info.si_status)) 444 return WSTOPSIG(info.si_status); 445 } 446 447 if (options & WCONTINUED) { 448 if (WIFCONTINUED(info.si_status)) 449 return 0; 450 } 451 452 return -1; 453 } 454 455 int dirfd_open_opath(const char *dir) 456 { 457 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 458 } 459 460 #define close_prot_errno(fd) \ 461 if (fd >= 0) { \ 462 int _e_ = errno; \ 463 close(fd); \ 464 errno = _e_; \ 465 } 466 467 static int clone_into_cgroup_run_nowait(const char *cgroup, 468 int (*fn)(const char *cgroup, void *arg), 469 void *arg) 470 { 471 int cgroup_fd; 472 pid_t pid; 473 474 cgroup_fd = dirfd_open_opath(cgroup); 475 if (cgroup_fd < 0) 476 return -1; 477 478 pid = clone_into_cgroup(cgroup_fd); 479 close_prot_errno(cgroup_fd); 480 if (pid == 0) 481 exit(fn(cgroup, arg)); 482 483 return pid; 484 } 485 486 int cg_run_nowait(const char *cgroup, 487 int (*fn)(const char *cgroup, void *arg), 488 void *arg) 489 { 490 int pid; 491 492 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 493 if (pid > 0) 494 return pid; 495 496 /* Genuine test failure. */ 497 if (pid < 0 && errno != ENOSYS) 498 return -1; 499 500 pid = fork(); 501 if (pid == 0) { 502 char buf[64]; 503 504 snprintf(buf, sizeof(buf), "%d", getpid()); 505 if (cg_write(cgroup, "cgroup.procs", buf)) 506 exit(EXIT_FAILURE); 507 exit(fn(cgroup, arg)); 508 } 509 510 return pid; 511 } 512 513 int proc_mount_contains(const char *option) 514 { 515 char buf[4 * PAGE_SIZE]; 516 ssize_t read; 517 518 read = read_text("/proc/mounts", buf, sizeof(buf)); 519 if (read < 0) 520 return read; 521 522 return strstr(buf, option) != NULL; 523 } 524 525 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 526 { 527 char path[PATH_MAX]; 528 ssize_t ret; 529 530 if (!pid) 531 snprintf(path, sizeof(path), "/proc/%s/%s", 532 thread ? "thread-self" : "self", item); 533 else 534 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 535 536 ret = read_text(path, buf, size); 537 return ret < 0 ? -1 : ret; 538 } 539 540 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 541 { 542 char buf[PAGE_SIZE]; 543 544 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 545 return -1; 546 547 return strstr(buf, needle) ? 0 : -1; 548 } 549 550 int clone_into_cgroup_run_wait(const char *cgroup) 551 { 552 int cgroup_fd; 553 pid_t pid; 554 555 cgroup_fd = dirfd_open_opath(cgroup); 556 if (cgroup_fd < 0) 557 return -1; 558 559 pid = clone_into_cgroup(cgroup_fd); 560 close_prot_errno(cgroup_fd); 561 if (pid < 0) 562 return -1; 563 564 if (pid == 0) 565 exit(EXIT_SUCCESS); 566 567 /* 568 * We don't care whether this fails. We only care whether the initial 569 * clone succeeded. 570 */ 571 (void)clone_reap(pid, WEXITED); 572 return 0; 573 } 574 575 static int __prepare_for_wait(const char *cgroup, const char *filename) 576 { 577 int fd, ret = -1; 578 579 fd = inotify_init1(0); 580 if (fd == -1) 581 return fd; 582 583 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 584 if (ret == -1) { 585 close(fd); 586 fd = -1; 587 } 588 589 return fd; 590 } 591 592 int cg_prepare_for_wait(const char *cgroup) 593 { 594 return __prepare_for_wait(cgroup, "cgroup.events"); 595 } 596 597 int memcg_prepare_for_wait(const char *cgroup) 598 { 599 return __prepare_for_wait(cgroup, "memory.events"); 600 } 601 602 int cg_wait_for(int fd) 603 { 604 int ret = -1; 605 struct pollfd fds = { 606 .fd = fd, 607 .events = POLLIN, 608 }; 609 610 while (true) { 611 ret = poll(&fds, 1, 10000); 612 613 if (ret == -1) { 614 if (errno == EINTR) 615 continue; 616 617 break; 618 } 619 620 if (ret > 0 && fds.revents & POLLIN) { 621 ret = 0; 622 break; 623 } 624 } 625 626 return ret; 627 } 628