1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <sys/xattr.h> 8 #include <linux/limits.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <linux/sched.h> 12 #include <fcntl.h> 13 #include <unistd.h> 14 #include <ftw.h> 15 16 #include "cgroup_helpers.h" 17 #include "bpf_util.h" 18 19 /* 20 * To avoid relying on the system setup, when setup_cgroup_env is called 21 * we create a new mount namespace, and cgroup namespace. The cgroupv2 22 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 23 * have cgroupv2 enabled at this point in time. It's easier to create our 24 * own mount namespace and manage it ourselves. We assume /mnt exists. 25 * 26 * Related cgroupv1 helpers are named *classid*(), since we only use the 27 * net_cls controller for tagging net_cls.classid. We assume the default 28 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 29 * vast majority of users. 30 */ 31 32 #define WALK_FD_LIMIT 16 33 34 #define CGROUP_MOUNT_PATH "/mnt" 35 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 36 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 37 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 38 39 #define format_cgroup_path_pid(buf, path, pid) \ 40 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 41 CGROUP_WORK_DIR, pid, path) 42 43 #define format_cgroup_path(buf, path) \ 44 format_cgroup_path_pid(buf, path, getpid()) 45 46 #define format_parent_cgroup_path(buf, path) \ 47 format_cgroup_path_pid(buf, path, getppid()) 48 49 #define format_classid_path_pid(buf, pid) \ 50 snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ 51 CGROUP_WORK_DIR, pid) 52 53 #define format_classid_path(buf) \ 54 format_classid_path_pid(buf, getpid()) 55 56 static __thread bool cgroup_workdir_mounted; 57 58 static void __cleanup_cgroup_environment(void); 59 60 static int __enable_controllers(const char *cgroup_path, const char *controllers) 61 { 62 char path[PATH_MAX + 1]; 63 char enable[PATH_MAX + 1]; 64 char *c, *c2; 65 int fd, cfd; 66 ssize_t len; 67 68 /* If not controllers are passed, enable all available controllers */ 69 if (!controllers) { 70 snprintf(path, sizeof(path), "%s/cgroup.controllers", 71 cgroup_path); 72 fd = open(path, O_RDONLY); 73 if (fd < 0) { 74 log_err("Opening cgroup.controllers: %s", path); 75 return 1; 76 } 77 len = read(fd, enable, sizeof(enable) - 1); 78 if (len < 0) { 79 close(fd); 80 log_err("Reading cgroup.controllers: %s", path); 81 return 1; 82 } else if (len == 0) { /* No controllers to enable */ 83 close(fd); 84 return 0; 85 } 86 enable[len] = 0; 87 close(fd); 88 } else { 89 bpf_strlcpy(enable, controllers, sizeof(enable)); 90 } 91 92 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 93 cfd = open(path, O_RDWR); 94 if (cfd < 0) { 95 log_err("Opening cgroup.subtree_control: %s", path); 96 return 1; 97 } 98 99 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 100 if (dprintf(cfd, "+%s\n", c) <= 0) { 101 log_err("Enabling controller %s: %s", c, path); 102 close(cfd); 103 return 1; 104 } 105 } 106 close(cfd); 107 return 0; 108 } 109 110 /** 111 * enable_controllers() - Enable cgroup v2 controllers 112 * @relative_path: The cgroup path, relative to the workdir 113 * @controllers: List of controllers to enable in cgroup.controllers format 114 * 115 * 116 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 117 * available controllers. 118 * 119 * If successful, 0 is returned. 120 */ 121 int enable_controllers(const char *relative_path, const char *controllers) 122 { 123 char cgroup_path[PATH_MAX + 1]; 124 125 format_cgroup_path(cgroup_path, relative_path); 126 return __enable_controllers(cgroup_path, controllers); 127 } 128 129 static int __write_cgroup_file(const char *cgroup_path, const char *file, 130 const char *buf) 131 { 132 char file_path[PATH_MAX + 1]; 133 int fd; 134 135 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 136 fd = open(file_path, O_RDWR); 137 if (fd < 0) { 138 log_err("Opening %s", file_path); 139 return 1; 140 } 141 142 if (dprintf(fd, "%s", buf) <= 0) { 143 log_err("Writing to %s", file_path); 144 close(fd); 145 return 1; 146 } 147 close(fd); 148 return 0; 149 } 150 151 /** 152 * write_cgroup_file() - Write to a cgroup file 153 * @relative_path: The cgroup path, relative to the workdir 154 * @file: The name of the file in cgroupfs to write to 155 * @buf: Buffer to write to the file 156 * 157 * Write to a file in the given cgroup's directory. 158 * 159 * If successful, 0 is returned. 160 */ 161 int write_cgroup_file(const char *relative_path, const char *file, 162 const char *buf) 163 { 164 char cgroup_path[PATH_MAX - 24]; 165 166 format_cgroup_path(cgroup_path, relative_path); 167 return __write_cgroup_file(cgroup_path, file, buf); 168 } 169 170 /** 171 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 172 * workdir 173 * @relative_path: The cgroup path, relative to the parent process workdir 174 * @file: The name of the file in cgroupfs to write to 175 * @buf: Buffer to write to the file 176 * 177 * Write to a file in the given cgroup's directory under the parent process 178 * workdir. 179 * 180 * If successful, 0 is returned. 181 */ 182 int write_cgroup_file_parent(const char *relative_path, const char *file, 183 const char *buf) 184 { 185 char cgroup_path[PATH_MAX - 24]; 186 187 format_parent_cgroup_path(cgroup_path, relative_path); 188 return __write_cgroup_file(cgroup_path, file, buf); 189 } 190 191 /** 192 * setup_cgroup_environment() - Setup the cgroup environment 193 * 194 * After calling this function, cleanup_cgroup_environment should be called 195 * once testing is complete. 196 * 197 * This function will print an error to stderr and return 1 if it is unable 198 * to setup the cgroup environment. If setup is successful, 0 is returned. 199 */ 200 int setup_cgroup_environment(void) 201 { 202 char cgroup_workdir[PATH_MAX - 24]; 203 204 format_cgroup_path(cgroup_workdir, ""); 205 206 if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { 207 log_err("mkdir mount"); 208 return 1; 209 } 210 211 if (unshare(CLONE_NEWNS)) { 212 log_err("unshare"); 213 return 1; 214 } 215 216 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 217 log_err("mount fakeroot"); 218 return 1; 219 } 220 221 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 222 log_err("mount cgroup2"); 223 return 1; 224 } 225 cgroup_workdir_mounted = true; 226 227 /* Cleanup existing failed runs, now that the environment is setup */ 228 __cleanup_cgroup_environment(); 229 230 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 231 log_err("mkdir cgroup work dir"); 232 return 1; 233 } 234 235 /* Enable all available controllers to increase test coverage */ 236 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 237 __enable_controllers(cgroup_workdir, NULL)) 238 return 1; 239 240 return 0; 241 } 242 243 static int nftwfunc(const char *filename, const struct stat *statptr, 244 int fileflags, struct FTW *pfwt) 245 { 246 if ((fileflags & FTW_D) && rmdir(filename)) 247 log_err("Removing cgroup: %s", filename); 248 return 0; 249 } 250 251 static int join_cgroup_from_top(const char *cgroup_path) 252 { 253 char cgroup_procs_path[PATH_MAX + 1]; 254 pid_t pid = getpid(); 255 int fd, rc = 0; 256 257 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 258 "%s/cgroup.procs", cgroup_path); 259 260 fd = open(cgroup_procs_path, O_WRONLY); 261 if (fd < 0) { 262 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 263 return 1; 264 } 265 266 if (dprintf(fd, "%d\n", pid) < 0) { 267 log_err("Joining Cgroup"); 268 rc = 1; 269 } 270 271 close(fd); 272 return rc; 273 } 274 275 /** 276 * join_cgroup() - Join a cgroup 277 * @relative_path: The cgroup path, relative to the workdir, to join 278 * 279 * This function expects a cgroup to already be created, relative to the cgroup 280 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 281 * would actually put the calling process into the cgroup 282 * "/cgroup-test-work-dir/my-cgroup" 283 * 284 * On success, it returns 0, otherwise on failure it returns 1. 285 */ 286 int join_cgroup(const char *relative_path) 287 { 288 char cgroup_path[PATH_MAX + 1]; 289 290 format_cgroup_path(cgroup_path, relative_path); 291 return join_cgroup_from_top(cgroup_path); 292 } 293 294 /** 295 * join_root_cgroup() - Join the root cgroup 296 * 297 * This function joins the root cgroup. 298 * 299 * On success, it returns 0, otherwise on failure it returns 1. 300 */ 301 int join_root_cgroup(void) 302 { 303 return join_cgroup_from_top(CGROUP_MOUNT_PATH); 304 } 305 306 /** 307 * join_parent_cgroup() - Join a cgroup in the parent process workdir 308 * @relative_path: The cgroup path, relative to parent process workdir, to join 309 * 310 * See join_cgroup(). 311 * 312 * On success, it returns 0, otherwise on failure it returns 1. 313 */ 314 int join_parent_cgroup(const char *relative_path) 315 { 316 char cgroup_path[PATH_MAX + 1]; 317 318 format_parent_cgroup_path(cgroup_path, relative_path); 319 return join_cgroup_from_top(cgroup_path); 320 } 321 322 /** 323 * set_cgroup_xattr() - Set xattr on a cgroup dir 324 * @relative_path: The cgroup path, relative to the workdir, to set xattr 325 * @name: xattr name 326 * @value: xattr value 327 * 328 * This function set xattr on cgroup dir. 329 * 330 * On success, it returns 0, otherwise on failure it returns -1. 331 */ 332 int set_cgroup_xattr(const char *relative_path, 333 const char *name, 334 const char *value) 335 { 336 char cgroup_path[PATH_MAX + 1]; 337 338 format_cgroup_path(cgroup_path, relative_path); 339 return setxattr(cgroup_path, name, value, strlen(value) + 1, 0); 340 } 341 342 /** 343 * __cleanup_cgroup_environment() - Delete temporary cgroups 344 * 345 * This is a helper for cleanup_cgroup_environment() that is responsible for 346 * deletion of all temporary cgroups that have been created during the test. 347 */ 348 static void __cleanup_cgroup_environment(void) 349 { 350 char cgroup_workdir[PATH_MAX + 1]; 351 352 format_cgroup_path(cgroup_workdir, ""); 353 join_cgroup_from_top(CGROUP_MOUNT_PATH); 354 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 355 } 356 357 /** 358 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 359 * 360 * This is an idempotent function to delete all temporary cgroups that 361 * have been created during the test and unmount the cgroup testing work 362 * directory. 363 * 364 * At call time, it moves the calling process to the root cgroup, and then 365 * runs the deletion process. It is idempotent, and should not fail, unless 366 * a process is lingering. 367 * 368 * On failure, it will print an error to stderr, and try to continue. 369 */ 370 void cleanup_cgroup_environment(void) 371 { 372 __cleanup_cgroup_environment(); 373 if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) 374 log_err("umount cgroup2"); 375 cgroup_workdir_mounted = false; 376 } 377 378 /** 379 * get_root_cgroup() - Get the FD of the root cgroup 380 * 381 * On success, it returns the file descriptor. On failure, it returns -1. 382 * If there is a failure, it prints the error to stderr. 383 */ 384 int get_root_cgroup(void) 385 { 386 int fd; 387 388 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 389 if (fd < 0) { 390 log_err("Opening root cgroup"); 391 return -1; 392 } 393 return fd; 394 } 395 396 /* 397 * remove_cgroup() - Remove a cgroup 398 * @relative_path: The cgroup path, relative to the workdir, to remove 399 * 400 * This function expects a cgroup to already be created, relative to the cgroup 401 * work dir. It also expects the cgroup doesn't have any children or live 402 * processes and it removes the cgroup. 403 * 404 * On failure, it will print an error to stderr. 405 */ 406 void remove_cgroup(const char *relative_path) 407 { 408 char cgroup_path[PATH_MAX + 1]; 409 410 format_cgroup_path(cgroup_path, relative_path); 411 if (rmdir(cgroup_path)) 412 log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); 413 } 414 415 /** 416 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 417 * @relative_path: The cgroup path, relative to the workdir, to join 418 * 419 * This function creates a cgroup under the top level workdir and returns the 420 * file descriptor. It is idempotent. 421 * 422 * On success, it returns the file descriptor. On failure it returns -1. 423 * If there is a failure, it prints the error to stderr. 424 */ 425 int create_and_get_cgroup(const char *relative_path) 426 { 427 char cgroup_path[PATH_MAX + 1]; 428 int fd; 429 430 format_cgroup_path(cgroup_path, relative_path); 431 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 432 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 433 return -1; 434 } 435 436 fd = open(cgroup_path, O_RDONLY); 437 if (fd < 0) { 438 log_err("Opening Cgroup"); 439 return -1; 440 } 441 442 return fd; 443 } 444 445 /** 446 * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path 447 * @cgroup_workdir: The absolute cgroup path 448 * 449 * On success, it returns the cgroup id. On failure it returns 0, 450 * which is an invalid cgroup id. 451 * If there is a failure, it prints the error to stderr. 452 */ 453 static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) 454 { 455 int dirfd, err, flags, mount_id, fhsize; 456 union { 457 unsigned long long cgid; 458 unsigned char raw_bytes[8]; 459 } id; 460 struct file_handle *fhp, *fhp2; 461 unsigned long long ret = 0; 462 463 dirfd = AT_FDCWD; 464 flags = 0; 465 fhsize = sizeof(*fhp); 466 fhp = calloc(1, fhsize); 467 if (!fhp) { 468 log_err("calloc"); 469 return 0; 470 } 471 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 472 if (err >= 0 || fhp->handle_bytes != 8) { 473 log_err("name_to_handle_at"); 474 goto free_mem; 475 } 476 477 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 478 fhp2 = realloc(fhp, fhsize); 479 if (!fhp2) { 480 log_err("realloc"); 481 goto free_mem; 482 } 483 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 484 fhp = fhp2; 485 if (err < 0) { 486 log_err("name_to_handle_at"); 487 goto free_mem; 488 } 489 490 memcpy(id.raw_bytes, fhp->f_handle, 8); 491 ret = id.cgid; 492 493 free_mem: 494 free(fhp); 495 return ret; 496 } 497 498 unsigned long long get_cgroup_id(const char *relative_path) 499 { 500 char cgroup_workdir[PATH_MAX + 1]; 501 502 format_cgroup_path(cgroup_workdir, relative_path); 503 return get_cgroup_id_from_path(cgroup_workdir); 504 } 505 506 int cgroup_setup_and_join(const char *path) { 507 int cg_fd; 508 509 if (setup_cgroup_environment()) { 510 fprintf(stderr, "Failed to setup cgroup environment\n"); 511 return -EINVAL; 512 } 513 514 cg_fd = create_and_get_cgroup(path); 515 if (cg_fd < 0) { 516 fprintf(stderr, "Failed to create test cgroup\n"); 517 cleanup_cgroup_environment(); 518 return cg_fd; 519 } 520 521 if (join_cgroup(path)) { 522 fprintf(stderr, "Failed to join cgroup\n"); 523 cleanup_cgroup_environment(); 524 return -EINVAL; 525 } 526 return cg_fd; 527 } 528 529 /** 530 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 531 * 532 * This function should only be called in a custom mount namespace, e.g. 533 * created by running setup_cgroup_environment. 534 * 535 * After calling this function, cleanup_classid_environment should be called 536 * once testing is complete. 537 * 538 * This function will print an error to stderr and return 1 if it is unable 539 * to setup the cgroup environment. If setup is successful, 0 is returned. 540 */ 541 int setup_classid_environment(void) 542 { 543 char cgroup_workdir[PATH_MAX + 1]; 544 545 format_classid_path(cgroup_workdir); 546 547 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 548 errno != EBUSY) { 549 log_err("mount cgroup base"); 550 return 1; 551 } 552 553 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 554 log_err("mkdir cgroup net_cls"); 555 return 1; 556 } 557 558 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { 559 if (errno != EBUSY) { 560 log_err("mount cgroup net_cls"); 561 return 1; 562 } 563 564 if (rmdir(NETCLS_MOUNT_PATH)) { 565 log_err("rmdir cgroup net_cls"); 566 return 1; 567 } 568 if (umount(CGROUP_MOUNT_DFLT)) { 569 log_err("umount cgroup base"); 570 return 1; 571 } 572 } 573 574 cleanup_classid_environment(); 575 576 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 577 log_err("mkdir cgroup work dir"); 578 return 1; 579 } 580 581 return 0; 582 } 583 584 /** 585 * set_classid() - Set a cgroupv1 net_cls classid 586 * 587 * Writes the classid into the cgroup work dir's net_cls.classid 588 * file in order to later on trigger socket tagging. 589 * 590 * We leverage the current pid as the classid, ensuring unique identification. 591 * 592 * On success, it returns 0, otherwise on failure it returns 1. If there 593 * is a failure, it prints the error to stderr. 594 */ 595 int set_classid(void) 596 { 597 char cgroup_workdir[PATH_MAX - 42]; 598 char cgroup_classid_path[PATH_MAX + 1]; 599 int fd, rc = 0; 600 601 format_classid_path(cgroup_workdir); 602 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 603 "%s/net_cls.classid", cgroup_workdir); 604 605 fd = open(cgroup_classid_path, O_WRONLY); 606 if (fd < 0) { 607 log_err("Opening cgroup classid: %s", cgroup_classid_path); 608 return 1; 609 } 610 611 if (dprintf(fd, "%u\n", getpid()) < 0) { 612 log_err("Setting cgroup classid"); 613 rc = 1; 614 } 615 616 close(fd); 617 return rc; 618 } 619 620 /** 621 * join_classid() - Join a cgroupv1 net_cls classid 622 * 623 * This function expects the cgroup work dir to be already created, as we 624 * join it here. This causes the process sockets to be tagged with the given 625 * net_cls classid. 626 * 627 * On success, it returns 0, otherwise on failure it returns 1. 628 */ 629 int join_classid(void) 630 { 631 char cgroup_workdir[PATH_MAX + 1]; 632 633 format_classid_path(cgroup_workdir); 634 return join_cgroup_from_top(cgroup_workdir); 635 } 636 637 /** 638 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 639 * 640 * At call time, it moves the calling process to the root cgroup, and then 641 * runs the deletion process. 642 * 643 * On failure, it will print an error to stderr, and try to continue. 644 */ 645 void cleanup_classid_environment(void) 646 { 647 char cgroup_workdir[PATH_MAX + 1]; 648 649 format_classid_path(cgroup_workdir); 650 join_cgroup_from_top(NETCLS_MOUNT_PATH); 651 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 652 } 653 654 /** 655 * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup 656 */ 657 unsigned long long get_classid_cgroup_id(void) 658 { 659 char cgroup_workdir[PATH_MAX + 1]; 660 661 format_classid_path(cgroup_workdir); 662 return get_cgroup_id_from_path(cgroup_workdir); 663 } 664 665 /** 666 * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. 667 * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be 668 * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like 669 * "net_cls,net_prio". 670 */ 671 int get_cgroup1_hierarchy_id(const char *subsys_name) 672 { 673 char *c, *c2, *c3, *c4; 674 bool found = false; 675 char line[1024]; 676 FILE *file; 677 int i, id; 678 679 if (!subsys_name) 680 return -1; 681 682 file = fopen("/proc/self/cgroup", "r"); 683 if (!file) { 684 log_err("fopen /proc/self/cgroup"); 685 return -1; 686 } 687 688 while (fgets(line, 1024, file)) { 689 i = 0; 690 for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { 691 if (i == 0) { 692 id = strtol(c, NULL, 10); 693 } else if (i == 1) { 694 if (!strcmp(c, subsys_name)) { 695 found = true; 696 break; 697 } 698 699 /* Multiple subsystems may share one single mount point */ 700 for (c3 = strtok_r(c, ",", &c4); c3; 701 c3 = strtok_r(NULL, ",", &c4)) { 702 if (!strcmp(c, subsys_name)) { 703 found = true; 704 break; 705 } 706 } 707 } 708 i++; 709 } 710 if (found) 711 break; 712 } 713 fclose(file); 714 return found ? id : -1; 715 } 716 717 /** 718 * open_classid() - Open a cgroupv1 net_cls classid 719 * 720 * This function expects the cgroup work dir to be already created, as we 721 * open it here. 722 * 723 * On success, it returns the file descriptor. On failure it returns -1. 724 */ 725 int open_classid(void) 726 { 727 char cgroup_workdir[PATH_MAX + 1]; 728 729 format_classid_path(cgroup_workdir); 730 return open(cgroup_workdir, O_RDONLY); 731 } 732