1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <linux/limits.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/sched.h> 11 #include <fcntl.h> 12 #include <unistd.h> 13 #include <ftw.h> 14 15 #include "cgroup_helpers.h" 16 #include "bpf_util.h" 17 18 /* 19 * To avoid relying on the system setup, when setup_cgroup_env is called 20 * we create a new mount namespace, and cgroup namespace. The cgroupv2 21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 22 * have cgroupv2 enabled at this point in time. It's easier to create our 23 * own mount namespace and manage it ourselves. We assume /mnt exists. 24 * 25 * Related cgroupv1 helpers are named *classid*(), since we only use the 26 * net_cls controller for tagging net_cls.classid. We assume the default 27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 28 * vast majority of users. 29 */ 30 31 #define WALK_FD_LIMIT 16 32 33 #define CGROUP_MOUNT_PATH "/mnt" 34 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 35 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 36 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 37 38 #define format_cgroup_path_pid(buf, path, pid) \ 39 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 40 CGROUP_WORK_DIR, pid, path) 41 42 #define format_cgroup_path(buf, path) \ 43 format_cgroup_path_pid(buf, path, getpid()) 44 45 #define format_parent_cgroup_path(buf, path) \ 46 format_cgroup_path_pid(buf, path, getppid()) 47 48 #define format_classid_path(buf) \ 49 snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ 50 CGROUP_WORK_DIR) 51 52 static __thread bool cgroup_workdir_mounted; 53 54 static void __cleanup_cgroup_environment(void); 55 56 static int __enable_controllers(const char *cgroup_path, const char *controllers) 57 { 58 char path[PATH_MAX + 1]; 59 char enable[PATH_MAX + 1]; 60 char *c, *c2; 61 int fd, cfd; 62 ssize_t len; 63 64 /* If not controllers are passed, enable all available controllers */ 65 if (!controllers) { 66 snprintf(path, sizeof(path), "%s/cgroup.controllers", 67 cgroup_path); 68 fd = open(path, O_RDONLY); 69 if (fd < 0) { 70 log_err("Opening cgroup.controllers: %s", path); 71 return 1; 72 } 73 len = read(fd, enable, sizeof(enable) - 1); 74 if (len < 0) { 75 close(fd); 76 log_err("Reading cgroup.controllers: %s", path); 77 return 1; 78 } else if (len == 0) { /* No controllers to enable */ 79 close(fd); 80 return 0; 81 } 82 enable[len] = 0; 83 close(fd); 84 } else { 85 bpf_strlcpy(enable, controllers, sizeof(enable)); 86 } 87 88 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 89 cfd = open(path, O_RDWR); 90 if (cfd < 0) { 91 log_err("Opening cgroup.subtree_control: %s", path); 92 return 1; 93 } 94 95 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 96 if (dprintf(cfd, "+%s\n", c) <= 0) { 97 log_err("Enabling controller %s: %s", c, path); 98 close(cfd); 99 return 1; 100 } 101 } 102 close(cfd); 103 return 0; 104 } 105 106 /** 107 * enable_controllers() - Enable cgroup v2 controllers 108 * @relative_path: The cgroup path, relative to the workdir 109 * @controllers: List of controllers to enable in cgroup.controllers format 110 * 111 * 112 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 113 * available controllers. 114 * 115 * If successful, 0 is returned. 116 */ 117 int enable_controllers(const char *relative_path, const char *controllers) 118 { 119 char cgroup_path[PATH_MAX + 1]; 120 121 format_cgroup_path(cgroup_path, relative_path); 122 return __enable_controllers(cgroup_path, controllers); 123 } 124 125 static int __write_cgroup_file(const char *cgroup_path, const char *file, 126 const char *buf) 127 { 128 char file_path[PATH_MAX + 1]; 129 int fd; 130 131 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 132 fd = open(file_path, O_RDWR); 133 if (fd < 0) { 134 log_err("Opening %s", file_path); 135 return 1; 136 } 137 138 if (dprintf(fd, "%s", buf) <= 0) { 139 log_err("Writing to %s", file_path); 140 close(fd); 141 return 1; 142 } 143 close(fd); 144 return 0; 145 } 146 147 /** 148 * write_cgroup_file() - Write to a cgroup file 149 * @relative_path: The cgroup path, relative to the workdir 150 * @file: The name of the file in cgroupfs to write to 151 * @buf: Buffer to write to the file 152 * 153 * Write to a file in the given cgroup's directory. 154 * 155 * If successful, 0 is returned. 156 */ 157 int write_cgroup_file(const char *relative_path, const char *file, 158 const char *buf) 159 { 160 char cgroup_path[PATH_MAX - 24]; 161 162 format_cgroup_path(cgroup_path, relative_path); 163 return __write_cgroup_file(cgroup_path, file, buf); 164 } 165 166 /** 167 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 168 * workdir 169 * @relative_path: The cgroup path, relative to the parent process workdir 170 * @file: The name of the file in cgroupfs to write to 171 * @buf: Buffer to write to the file 172 * 173 * Write to a file in the given cgroup's directory under the parent process 174 * workdir. 175 * 176 * If successful, 0 is returned. 177 */ 178 int write_cgroup_file_parent(const char *relative_path, const char *file, 179 const char *buf) 180 { 181 char cgroup_path[PATH_MAX - 24]; 182 183 format_parent_cgroup_path(cgroup_path, relative_path); 184 return __write_cgroup_file(cgroup_path, file, buf); 185 } 186 187 /** 188 * setup_cgroup_environment() - Setup the cgroup environment 189 * 190 * After calling this function, cleanup_cgroup_environment should be called 191 * once testing is complete. 192 * 193 * This function will print an error to stderr and return 1 if it is unable 194 * to setup the cgroup environment. If setup is successful, 0 is returned. 195 */ 196 int setup_cgroup_environment(void) 197 { 198 char cgroup_workdir[PATH_MAX - 24]; 199 200 format_cgroup_path(cgroup_workdir, ""); 201 202 if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { 203 log_err("mkdir mount"); 204 return 1; 205 } 206 207 if (unshare(CLONE_NEWNS)) { 208 log_err("unshare"); 209 return 1; 210 } 211 212 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 213 log_err("mount fakeroot"); 214 return 1; 215 } 216 217 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 218 log_err("mount cgroup2"); 219 return 1; 220 } 221 cgroup_workdir_mounted = true; 222 223 /* Cleanup existing failed runs, now that the environment is setup */ 224 __cleanup_cgroup_environment(); 225 226 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 227 log_err("mkdir cgroup work dir"); 228 return 1; 229 } 230 231 /* Enable all available controllers to increase test coverage */ 232 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 233 __enable_controllers(cgroup_workdir, NULL)) 234 return 1; 235 236 return 0; 237 } 238 239 static int nftwfunc(const char *filename, const struct stat *statptr, 240 int fileflags, struct FTW *pfwt) 241 { 242 if ((fileflags & FTW_D) && rmdir(filename)) 243 log_err("Removing cgroup: %s", filename); 244 return 0; 245 } 246 247 static int join_cgroup_from_top(const char *cgroup_path) 248 { 249 char cgroup_procs_path[PATH_MAX + 1]; 250 pid_t pid = getpid(); 251 int fd, rc = 0; 252 253 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 254 "%s/cgroup.procs", cgroup_path); 255 256 fd = open(cgroup_procs_path, O_WRONLY); 257 if (fd < 0) { 258 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 259 return 1; 260 } 261 262 if (dprintf(fd, "%d\n", pid) < 0) { 263 log_err("Joining Cgroup"); 264 rc = 1; 265 } 266 267 close(fd); 268 return rc; 269 } 270 271 /** 272 * join_cgroup() - Join a cgroup 273 * @relative_path: The cgroup path, relative to the workdir, to join 274 * 275 * This function expects a cgroup to already be created, relative to the cgroup 276 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 277 * would actually put the calling process into the cgroup 278 * "/cgroup-test-work-dir/my-cgroup" 279 * 280 * On success, it returns 0, otherwise on failure it returns 1. 281 */ 282 int join_cgroup(const char *relative_path) 283 { 284 char cgroup_path[PATH_MAX + 1]; 285 286 format_cgroup_path(cgroup_path, relative_path); 287 return join_cgroup_from_top(cgroup_path); 288 } 289 290 /** 291 * join_root_cgroup() - Join the root cgroup 292 * 293 * This function joins the root cgroup. 294 * 295 * On success, it returns 0, otherwise on failure it returns 1. 296 */ 297 int join_root_cgroup(void) 298 { 299 return join_cgroup_from_top(CGROUP_MOUNT_PATH); 300 } 301 302 /** 303 * join_parent_cgroup() - Join a cgroup in the parent process workdir 304 * @relative_path: The cgroup path, relative to parent process workdir, to join 305 * 306 * See join_cgroup(). 307 * 308 * On success, it returns 0, otherwise on failure it returns 1. 309 */ 310 int join_parent_cgroup(const char *relative_path) 311 { 312 char cgroup_path[PATH_MAX + 1]; 313 314 format_parent_cgroup_path(cgroup_path, relative_path); 315 return join_cgroup_from_top(cgroup_path); 316 } 317 318 /** 319 * __cleanup_cgroup_environment() - Delete temporary cgroups 320 * 321 * This is a helper for cleanup_cgroup_environment() that is responsible for 322 * deletion of all temporary cgroups that have been created during the test. 323 */ 324 static void __cleanup_cgroup_environment(void) 325 { 326 char cgroup_workdir[PATH_MAX + 1]; 327 328 format_cgroup_path(cgroup_workdir, ""); 329 join_cgroup_from_top(CGROUP_MOUNT_PATH); 330 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 331 } 332 333 /** 334 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 335 * 336 * This is an idempotent function to delete all temporary cgroups that 337 * have been created during the test and unmount the cgroup testing work 338 * directory. 339 * 340 * At call time, it moves the calling process to the root cgroup, and then 341 * runs the deletion process. It is idempotent, and should not fail, unless 342 * a process is lingering. 343 * 344 * On failure, it will print an error to stderr, and try to continue. 345 */ 346 void cleanup_cgroup_environment(void) 347 { 348 __cleanup_cgroup_environment(); 349 if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) 350 log_err("umount cgroup2"); 351 cgroup_workdir_mounted = false; 352 } 353 354 /** 355 * get_root_cgroup() - Get the FD of the root cgroup 356 * 357 * On success, it returns the file descriptor. On failure, it returns -1. 358 * If there is a failure, it prints the error to stderr. 359 */ 360 int get_root_cgroup(void) 361 { 362 int fd; 363 364 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 365 if (fd < 0) { 366 log_err("Opening root cgroup"); 367 return -1; 368 } 369 return fd; 370 } 371 372 /* 373 * remove_cgroup() - Remove a cgroup 374 * @relative_path: The cgroup path, relative to the workdir, to remove 375 * 376 * This function expects a cgroup to already be created, relative to the cgroup 377 * work dir. It also expects the cgroup doesn't have any children or live 378 * processes and it removes the cgroup. 379 * 380 * On failure, it will print an error to stderr. 381 */ 382 void remove_cgroup(const char *relative_path) 383 { 384 char cgroup_path[PATH_MAX + 1]; 385 386 format_cgroup_path(cgroup_path, relative_path); 387 if (rmdir(cgroup_path)) 388 log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); 389 } 390 391 /** 392 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 393 * @relative_path: The cgroup path, relative to the workdir, to join 394 * 395 * This function creates a cgroup under the top level workdir and returns the 396 * file descriptor. It is idempotent. 397 * 398 * On success, it returns the file descriptor. On failure it returns -1. 399 * If there is a failure, it prints the error to stderr. 400 */ 401 int create_and_get_cgroup(const char *relative_path) 402 { 403 char cgroup_path[PATH_MAX + 1]; 404 int fd; 405 406 format_cgroup_path(cgroup_path, relative_path); 407 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 408 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 409 return -1; 410 } 411 412 fd = open(cgroup_path, O_RDONLY); 413 if (fd < 0) { 414 log_err("Opening Cgroup"); 415 return -1; 416 } 417 418 return fd; 419 } 420 421 /** 422 * get_cgroup_id() - Get cgroup id for a particular cgroup path 423 * @relative_path: The cgroup path, relative to the workdir, to join 424 * 425 * On success, it returns the cgroup id. On failure it returns 0, 426 * which is an invalid cgroup id. 427 * If there is a failure, it prints the error to stderr. 428 */ 429 unsigned long long get_cgroup_id(const char *relative_path) 430 { 431 int dirfd, err, flags, mount_id, fhsize; 432 union { 433 unsigned long long cgid; 434 unsigned char raw_bytes[8]; 435 } id; 436 char cgroup_workdir[PATH_MAX + 1]; 437 struct file_handle *fhp, *fhp2; 438 unsigned long long ret = 0; 439 440 format_cgroup_path(cgroup_workdir, relative_path); 441 442 dirfd = AT_FDCWD; 443 flags = 0; 444 fhsize = sizeof(*fhp); 445 fhp = calloc(1, fhsize); 446 if (!fhp) { 447 log_err("calloc"); 448 return 0; 449 } 450 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 451 if (err >= 0 || fhp->handle_bytes != 8) { 452 log_err("name_to_handle_at"); 453 goto free_mem; 454 } 455 456 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 457 fhp2 = realloc(fhp, fhsize); 458 if (!fhp2) { 459 log_err("realloc"); 460 goto free_mem; 461 } 462 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 463 fhp = fhp2; 464 if (err < 0) { 465 log_err("name_to_handle_at"); 466 goto free_mem; 467 } 468 469 memcpy(id.raw_bytes, fhp->f_handle, 8); 470 ret = id.cgid; 471 472 free_mem: 473 free(fhp); 474 return ret; 475 } 476 477 int cgroup_setup_and_join(const char *path) { 478 int cg_fd; 479 480 if (setup_cgroup_environment()) { 481 fprintf(stderr, "Failed to setup cgroup environment\n"); 482 return -EINVAL; 483 } 484 485 cg_fd = create_and_get_cgroup(path); 486 if (cg_fd < 0) { 487 fprintf(stderr, "Failed to create test cgroup\n"); 488 cleanup_cgroup_environment(); 489 return cg_fd; 490 } 491 492 if (join_cgroup(path)) { 493 fprintf(stderr, "Failed to join cgroup\n"); 494 cleanup_cgroup_environment(); 495 return -EINVAL; 496 } 497 return cg_fd; 498 } 499 500 /** 501 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 502 * 503 * After calling this function, cleanup_classid_environment should be called 504 * once testing is complete. 505 * 506 * This function will print an error to stderr and return 1 if it is unable 507 * to setup the cgroup environment. If setup is successful, 0 is returned. 508 */ 509 int setup_classid_environment(void) 510 { 511 char cgroup_workdir[PATH_MAX + 1]; 512 513 format_classid_path(cgroup_workdir); 514 515 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 516 errno != EBUSY) { 517 log_err("mount cgroup base"); 518 return 1; 519 } 520 521 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 522 log_err("mkdir cgroup net_cls"); 523 return 1; 524 } 525 526 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && 527 errno != EBUSY) { 528 log_err("mount cgroup net_cls"); 529 return 1; 530 } 531 532 cleanup_classid_environment(); 533 534 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 535 log_err("mkdir cgroup work dir"); 536 return 1; 537 } 538 539 return 0; 540 } 541 542 /** 543 * set_classid() - Set a cgroupv1 net_cls classid 544 * @id: the numeric classid 545 * 546 * Writes the passed classid into the cgroup work dir's net_cls.classid 547 * file in order to later on trigger socket tagging. 548 * 549 * On success, it returns 0, otherwise on failure it returns 1. If there 550 * is a failure, it prints the error to stderr. 551 */ 552 int set_classid(unsigned int id) 553 { 554 char cgroup_workdir[PATH_MAX - 42]; 555 char cgroup_classid_path[PATH_MAX + 1]; 556 int fd, rc = 0; 557 558 format_classid_path(cgroup_workdir); 559 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 560 "%s/net_cls.classid", cgroup_workdir); 561 562 fd = open(cgroup_classid_path, O_WRONLY); 563 if (fd < 0) { 564 log_err("Opening cgroup classid: %s", cgroup_classid_path); 565 return 1; 566 } 567 568 if (dprintf(fd, "%u\n", id) < 0) { 569 log_err("Setting cgroup classid"); 570 rc = 1; 571 } 572 573 close(fd); 574 return rc; 575 } 576 577 /** 578 * join_classid() - Join a cgroupv1 net_cls classid 579 * 580 * This function expects the cgroup work dir to be already created, as we 581 * join it here. This causes the process sockets to be tagged with the given 582 * net_cls classid. 583 * 584 * On success, it returns 0, otherwise on failure it returns 1. 585 */ 586 int join_classid(void) 587 { 588 char cgroup_workdir[PATH_MAX + 1]; 589 590 format_classid_path(cgroup_workdir); 591 return join_cgroup_from_top(cgroup_workdir); 592 } 593 594 /** 595 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 596 * 597 * At call time, it moves the calling process to the root cgroup, and then 598 * runs the deletion process. 599 * 600 * On failure, it will print an error to stderr, and try to continue. 601 */ 602 void cleanup_classid_environment(void) 603 { 604 char cgroup_workdir[PATH_MAX + 1]; 605 606 format_classid_path(cgroup_workdir); 607 join_cgroup_from_top(NETCLS_MOUNT_PATH); 608 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 609 } 610