1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <linux/limits.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/sched.h> 11 #include <fcntl.h> 12 #include <unistd.h> 13 #include <ftw.h> 14 15 #include "cgroup_helpers.h" 16 #include "bpf_util.h" 17 18 /* 19 * To avoid relying on the system setup, when setup_cgroup_env is called 20 * we create a new mount namespace, and cgroup namespace. The cgroupv2 21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 22 * have cgroupv2 enabled at this point in time. It's easier to create our 23 * own mount namespace and manage it ourselves. We assume /mnt exists. 24 * 25 * Related cgroupv1 helpers are named *classid*(), since we only use the 26 * net_cls controller for tagging net_cls.classid. We assume the default 27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 28 * vast majority of users. 29 */ 30 31 #define WALK_FD_LIMIT 16 32 33 #define CGROUP_MOUNT_PATH "/mnt" 34 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 35 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 36 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 37 38 #define format_cgroup_path_pid(buf, path, pid) \ 39 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 40 CGROUP_WORK_DIR, pid, path) 41 42 #define format_cgroup_path(buf, path) \ 43 format_cgroup_path_pid(buf, path, getpid()) 44 45 #define format_parent_cgroup_path(buf, path) \ 46 format_cgroup_path_pid(buf, path, getppid()) 47 48 #define format_classid_path_pid(buf, pid) \ 49 snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ 50 CGROUP_WORK_DIR, pid) 51 52 #define format_classid_path(buf) \ 53 format_classid_path_pid(buf, getpid()) 54 55 static __thread bool cgroup_workdir_mounted; 56 57 static void __cleanup_cgroup_environment(void); 58 59 static int __enable_controllers(const char *cgroup_path, const char *controllers) 60 { 61 char path[PATH_MAX + 1]; 62 char enable[PATH_MAX + 1]; 63 char *c, *c2; 64 int fd, cfd; 65 ssize_t len; 66 67 /* If not controllers are passed, enable all available controllers */ 68 if (!controllers) { 69 snprintf(path, sizeof(path), "%s/cgroup.controllers", 70 cgroup_path); 71 fd = open(path, O_RDONLY); 72 if (fd < 0) { 73 log_err("Opening cgroup.controllers: %s", path); 74 return 1; 75 } 76 len = read(fd, enable, sizeof(enable) - 1); 77 if (len < 0) { 78 close(fd); 79 log_err("Reading cgroup.controllers: %s", path); 80 return 1; 81 } else if (len == 0) { /* No controllers to enable */ 82 close(fd); 83 return 0; 84 } 85 enable[len] = 0; 86 close(fd); 87 } else { 88 bpf_strlcpy(enable, controllers, sizeof(enable)); 89 } 90 91 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 92 cfd = open(path, O_RDWR); 93 if (cfd < 0) { 94 log_err("Opening cgroup.subtree_control: %s", path); 95 return 1; 96 } 97 98 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 99 if (dprintf(cfd, "+%s\n", c) <= 0) { 100 log_err("Enabling controller %s: %s", c, path); 101 close(cfd); 102 return 1; 103 } 104 } 105 close(cfd); 106 return 0; 107 } 108 109 /** 110 * enable_controllers() - Enable cgroup v2 controllers 111 * @relative_path: The cgroup path, relative to the workdir 112 * @controllers: List of controllers to enable in cgroup.controllers format 113 * 114 * 115 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 116 * available controllers. 117 * 118 * If successful, 0 is returned. 119 */ 120 int enable_controllers(const char *relative_path, const char *controllers) 121 { 122 char cgroup_path[PATH_MAX + 1]; 123 124 format_cgroup_path(cgroup_path, relative_path); 125 return __enable_controllers(cgroup_path, controllers); 126 } 127 128 static int __write_cgroup_file(const char *cgroup_path, const char *file, 129 const char *buf) 130 { 131 char file_path[PATH_MAX + 1]; 132 int fd; 133 134 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 135 fd = open(file_path, O_RDWR); 136 if (fd < 0) { 137 log_err("Opening %s", file_path); 138 return 1; 139 } 140 141 if (dprintf(fd, "%s", buf) <= 0) { 142 log_err("Writing to %s", file_path); 143 close(fd); 144 return 1; 145 } 146 close(fd); 147 return 0; 148 } 149 150 /** 151 * write_cgroup_file() - Write to a cgroup file 152 * @relative_path: The cgroup path, relative to the workdir 153 * @file: The name of the file in cgroupfs to write to 154 * @buf: Buffer to write to the file 155 * 156 * Write to a file in the given cgroup's directory. 157 * 158 * If successful, 0 is returned. 159 */ 160 int write_cgroup_file(const char *relative_path, const char *file, 161 const char *buf) 162 { 163 char cgroup_path[PATH_MAX - 24]; 164 165 format_cgroup_path(cgroup_path, relative_path); 166 return __write_cgroup_file(cgroup_path, file, buf); 167 } 168 169 /** 170 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 171 * workdir 172 * @relative_path: The cgroup path, relative to the parent process workdir 173 * @file: The name of the file in cgroupfs to write to 174 * @buf: Buffer to write to the file 175 * 176 * Write to a file in the given cgroup's directory under the parent process 177 * workdir. 178 * 179 * If successful, 0 is returned. 180 */ 181 int write_cgroup_file_parent(const char *relative_path, const char *file, 182 const char *buf) 183 { 184 char cgroup_path[PATH_MAX - 24]; 185 186 format_parent_cgroup_path(cgroup_path, relative_path); 187 return __write_cgroup_file(cgroup_path, file, buf); 188 } 189 190 /** 191 * setup_cgroup_environment() - Setup the cgroup environment 192 * 193 * After calling this function, cleanup_cgroup_environment should be called 194 * once testing is complete. 195 * 196 * This function will print an error to stderr and return 1 if it is unable 197 * to setup the cgroup environment. If setup is successful, 0 is returned. 198 */ 199 int setup_cgroup_environment(void) 200 { 201 char cgroup_workdir[PATH_MAX - 24]; 202 203 format_cgroup_path(cgroup_workdir, ""); 204 205 if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { 206 log_err("mkdir mount"); 207 return 1; 208 } 209 210 if (unshare(CLONE_NEWNS)) { 211 log_err("unshare"); 212 return 1; 213 } 214 215 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 216 log_err("mount fakeroot"); 217 return 1; 218 } 219 220 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 221 log_err("mount cgroup2"); 222 return 1; 223 } 224 cgroup_workdir_mounted = true; 225 226 /* Cleanup existing failed runs, now that the environment is setup */ 227 __cleanup_cgroup_environment(); 228 229 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 230 log_err("mkdir cgroup work dir"); 231 return 1; 232 } 233 234 /* Enable all available controllers to increase test coverage */ 235 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 236 __enable_controllers(cgroup_workdir, NULL)) 237 return 1; 238 239 return 0; 240 } 241 242 static int nftwfunc(const char *filename, const struct stat *statptr, 243 int fileflags, struct FTW *pfwt) 244 { 245 if ((fileflags & FTW_D) && rmdir(filename)) 246 log_err("Removing cgroup: %s", filename); 247 return 0; 248 } 249 250 static int join_cgroup_from_top(const char *cgroup_path) 251 { 252 char cgroup_procs_path[PATH_MAX + 1]; 253 pid_t pid = getpid(); 254 int fd, rc = 0; 255 256 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 257 "%s/cgroup.procs", cgroup_path); 258 259 fd = open(cgroup_procs_path, O_WRONLY); 260 if (fd < 0) { 261 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 262 return 1; 263 } 264 265 if (dprintf(fd, "%d\n", pid) < 0) { 266 log_err("Joining Cgroup"); 267 rc = 1; 268 } 269 270 close(fd); 271 return rc; 272 } 273 274 /** 275 * join_cgroup() - Join a cgroup 276 * @relative_path: The cgroup path, relative to the workdir, to join 277 * 278 * This function expects a cgroup to already be created, relative to the cgroup 279 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 280 * would actually put the calling process into the cgroup 281 * "/cgroup-test-work-dir/my-cgroup" 282 * 283 * On success, it returns 0, otherwise on failure it returns 1. 284 */ 285 int join_cgroup(const char *relative_path) 286 { 287 char cgroup_path[PATH_MAX + 1]; 288 289 format_cgroup_path(cgroup_path, relative_path); 290 return join_cgroup_from_top(cgroup_path); 291 } 292 293 /** 294 * join_root_cgroup() - Join the root cgroup 295 * 296 * This function joins the root cgroup. 297 * 298 * On success, it returns 0, otherwise on failure it returns 1. 299 */ 300 int join_root_cgroup(void) 301 { 302 return join_cgroup_from_top(CGROUP_MOUNT_PATH); 303 } 304 305 /** 306 * join_parent_cgroup() - Join a cgroup in the parent process workdir 307 * @relative_path: The cgroup path, relative to parent process workdir, to join 308 * 309 * See join_cgroup(). 310 * 311 * On success, it returns 0, otherwise on failure it returns 1. 312 */ 313 int join_parent_cgroup(const char *relative_path) 314 { 315 char cgroup_path[PATH_MAX + 1]; 316 317 format_parent_cgroup_path(cgroup_path, relative_path); 318 return join_cgroup_from_top(cgroup_path); 319 } 320 321 /** 322 * __cleanup_cgroup_environment() - Delete temporary cgroups 323 * 324 * This is a helper for cleanup_cgroup_environment() that is responsible for 325 * deletion of all temporary cgroups that have been created during the test. 326 */ 327 static void __cleanup_cgroup_environment(void) 328 { 329 char cgroup_workdir[PATH_MAX + 1]; 330 331 format_cgroup_path(cgroup_workdir, ""); 332 join_cgroup_from_top(CGROUP_MOUNT_PATH); 333 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 334 } 335 336 /** 337 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 338 * 339 * This is an idempotent function to delete all temporary cgroups that 340 * have been created during the test and unmount the cgroup testing work 341 * directory. 342 * 343 * At call time, it moves the calling process to the root cgroup, and then 344 * runs the deletion process. It is idempotent, and should not fail, unless 345 * a process is lingering. 346 * 347 * On failure, it will print an error to stderr, and try to continue. 348 */ 349 void cleanup_cgroup_environment(void) 350 { 351 __cleanup_cgroup_environment(); 352 if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) 353 log_err("umount cgroup2"); 354 cgroup_workdir_mounted = false; 355 } 356 357 /** 358 * get_root_cgroup() - Get the FD of the root cgroup 359 * 360 * On success, it returns the file descriptor. On failure, it returns -1. 361 * If there is a failure, it prints the error to stderr. 362 */ 363 int get_root_cgroup(void) 364 { 365 int fd; 366 367 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 368 if (fd < 0) { 369 log_err("Opening root cgroup"); 370 return -1; 371 } 372 return fd; 373 } 374 375 /* 376 * remove_cgroup() - Remove a cgroup 377 * @relative_path: The cgroup path, relative to the workdir, to remove 378 * 379 * This function expects a cgroup to already be created, relative to the cgroup 380 * work dir. It also expects the cgroup doesn't have any children or live 381 * processes and it removes the cgroup. 382 * 383 * On failure, it will print an error to stderr. 384 */ 385 void remove_cgroup(const char *relative_path) 386 { 387 char cgroup_path[PATH_MAX + 1]; 388 389 format_cgroup_path(cgroup_path, relative_path); 390 if (rmdir(cgroup_path)) 391 log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); 392 } 393 394 /** 395 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 396 * @relative_path: The cgroup path, relative to the workdir, to join 397 * 398 * This function creates a cgroup under the top level workdir and returns the 399 * file descriptor. It is idempotent. 400 * 401 * On success, it returns the file descriptor. On failure it returns -1. 402 * If there is a failure, it prints the error to stderr. 403 */ 404 int create_and_get_cgroup(const char *relative_path) 405 { 406 char cgroup_path[PATH_MAX + 1]; 407 int fd; 408 409 format_cgroup_path(cgroup_path, relative_path); 410 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 411 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 412 return -1; 413 } 414 415 fd = open(cgroup_path, O_RDONLY); 416 if (fd < 0) { 417 log_err("Opening Cgroup"); 418 return -1; 419 } 420 421 return fd; 422 } 423 424 /** 425 * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path 426 * @cgroup_workdir: The absolute cgroup path 427 * 428 * On success, it returns the cgroup id. On failure it returns 0, 429 * which is an invalid cgroup id. 430 * If there is a failure, it prints the error to stderr. 431 */ 432 static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) 433 { 434 int dirfd, err, flags, mount_id, fhsize; 435 union { 436 unsigned long long cgid; 437 unsigned char raw_bytes[8]; 438 } id; 439 struct file_handle *fhp, *fhp2; 440 unsigned long long ret = 0; 441 442 dirfd = AT_FDCWD; 443 flags = 0; 444 fhsize = sizeof(*fhp); 445 fhp = calloc(1, fhsize); 446 if (!fhp) { 447 log_err("calloc"); 448 return 0; 449 } 450 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 451 if (err >= 0 || fhp->handle_bytes != 8) { 452 log_err("name_to_handle_at"); 453 goto free_mem; 454 } 455 456 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 457 fhp2 = realloc(fhp, fhsize); 458 if (!fhp2) { 459 log_err("realloc"); 460 goto free_mem; 461 } 462 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 463 fhp = fhp2; 464 if (err < 0) { 465 log_err("name_to_handle_at"); 466 goto free_mem; 467 } 468 469 memcpy(id.raw_bytes, fhp->f_handle, 8); 470 ret = id.cgid; 471 472 free_mem: 473 free(fhp); 474 return ret; 475 } 476 477 unsigned long long get_cgroup_id(const char *relative_path) 478 { 479 char cgroup_workdir[PATH_MAX + 1]; 480 481 format_cgroup_path(cgroup_workdir, relative_path); 482 return get_cgroup_id_from_path(cgroup_workdir); 483 } 484 485 int cgroup_setup_and_join(const char *path) { 486 int cg_fd; 487 488 if (setup_cgroup_environment()) { 489 fprintf(stderr, "Failed to setup cgroup environment\n"); 490 return -EINVAL; 491 } 492 493 cg_fd = create_and_get_cgroup(path); 494 if (cg_fd < 0) { 495 fprintf(stderr, "Failed to create test cgroup\n"); 496 cleanup_cgroup_environment(); 497 return cg_fd; 498 } 499 500 if (join_cgroup(path)) { 501 fprintf(stderr, "Failed to join cgroup\n"); 502 cleanup_cgroup_environment(); 503 return -EINVAL; 504 } 505 return cg_fd; 506 } 507 508 /** 509 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 510 * 511 * This function should only be called in a custom mount namespace, e.g. 512 * created by running setup_cgroup_environment. 513 * 514 * After calling this function, cleanup_classid_environment should be called 515 * once testing is complete. 516 * 517 * This function will print an error to stderr and return 1 if it is unable 518 * to setup the cgroup environment. If setup is successful, 0 is returned. 519 */ 520 int setup_classid_environment(void) 521 { 522 char cgroup_workdir[PATH_MAX + 1]; 523 524 format_classid_path(cgroup_workdir); 525 526 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 527 errno != EBUSY) { 528 log_err("mount cgroup base"); 529 return 1; 530 } 531 532 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 533 log_err("mkdir cgroup net_cls"); 534 return 1; 535 } 536 537 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { 538 if (errno != EBUSY) { 539 log_err("mount cgroup net_cls"); 540 return 1; 541 } 542 543 if (rmdir(NETCLS_MOUNT_PATH)) { 544 log_err("rmdir cgroup net_cls"); 545 return 1; 546 } 547 if (umount(CGROUP_MOUNT_DFLT)) { 548 log_err("umount cgroup base"); 549 return 1; 550 } 551 } 552 553 cleanup_classid_environment(); 554 555 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 556 log_err("mkdir cgroup work dir"); 557 return 1; 558 } 559 560 return 0; 561 } 562 563 /** 564 * set_classid() - Set a cgroupv1 net_cls classid 565 * 566 * Writes the classid into the cgroup work dir's net_cls.classid 567 * file in order to later on trigger socket tagging. 568 * 569 * We leverage the current pid as the classid, ensuring unique identification. 570 * 571 * On success, it returns 0, otherwise on failure it returns 1. If there 572 * is a failure, it prints the error to stderr. 573 */ 574 int set_classid(void) 575 { 576 char cgroup_workdir[PATH_MAX - 42]; 577 char cgroup_classid_path[PATH_MAX + 1]; 578 int fd, rc = 0; 579 580 format_classid_path(cgroup_workdir); 581 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 582 "%s/net_cls.classid", cgroup_workdir); 583 584 fd = open(cgroup_classid_path, O_WRONLY); 585 if (fd < 0) { 586 log_err("Opening cgroup classid: %s", cgroup_classid_path); 587 return 1; 588 } 589 590 if (dprintf(fd, "%u\n", getpid()) < 0) { 591 log_err("Setting cgroup classid"); 592 rc = 1; 593 } 594 595 close(fd); 596 return rc; 597 } 598 599 /** 600 * join_classid() - Join a cgroupv1 net_cls classid 601 * 602 * This function expects the cgroup work dir to be already created, as we 603 * join it here. This causes the process sockets to be tagged with the given 604 * net_cls classid. 605 * 606 * On success, it returns 0, otherwise on failure it returns 1. 607 */ 608 int join_classid(void) 609 { 610 char cgroup_workdir[PATH_MAX + 1]; 611 612 format_classid_path(cgroup_workdir); 613 return join_cgroup_from_top(cgroup_workdir); 614 } 615 616 /** 617 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 618 * 619 * At call time, it moves the calling process to the root cgroup, and then 620 * runs the deletion process. 621 * 622 * On failure, it will print an error to stderr, and try to continue. 623 */ 624 void cleanup_classid_environment(void) 625 { 626 char cgroup_workdir[PATH_MAX + 1]; 627 628 format_classid_path(cgroup_workdir); 629 join_cgroup_from_top(NETCLS_MOUNT_PATH); 630 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 631 } 632 633 /** 634 * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup 635 */ 636 unsigned long long get_classid_cgroup_id(void) 637 { 638 char cgroup_workdir[PATH_MAX + 1]; 639 640 format_classid_path(cgroup_workdir); 641 return get_cgroup_id_from_path(cgroup_workdir); 642 } 643 644 /** 645 * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. 646 * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be 647 * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like 648 * "net_cls,net_prio". 649 */ 650 int get_cgroup1_hierarchy_id(const char *subsys_name) 651 { 652 char *c, *c2, *c3, *c4; 653 bool found = false; 654 char line[1024]; 655 FILE *file; 656 int i, id; 657 658 if (!subsys_name) 659 return -1; 660 661 file = fopen("/proc/self/cgroup", "r"); 662 if (!file) { 663 log_err("fopen /proc/self/cgroup"); 664 return -1; 665 } 666 667 while (fgets(line, 1024, file)) { 668 i = 0; 669 for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { 670 if (i == 0) { 671 id = strtol(c, NULL, 10); 672 } else if (i == 1) { 673 if (!strcmp(c, subsys_name)) { 674 found = true; 675 break; 676 } 677 678 /* Multiple subsystems may share one single mount point */ 679 for (c3 = strtok_r(c, ",", &c4); c3; 680 c3 = strtok_r(NULL, ",", &c4)) { 681 if (!strcmp(c, subsys_name)) { 682 found = true; 683 break; 684 } 685 } 686 } 687 i++; 688 } 689 if (found) 690 break; 691 } 692 fclose(file); 693 return found ? id : -1; 694 } 695 696 /** 697 * open_classid() - Open a cgroupv1 net_cls classid 698 * 699 * This function expects the cgroup work dir to be already created, as we 700 * open it here. 701 * 702 * On success, it returns the file descriptor. On failure it returns -1. 703 */ 704 int open_classid(void) 705 { 706 char cgroup_workdir[PATH_MAX + 1]; 707 708 format_classid_path(cgroup_workdir); 709 return open(cgroup_workdir, O_RDONLY); 710 } 711