1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <linux/limits.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/sched.h> 11 #include <fcntl.h> 12 #include <unistd.h> 13 #include <ftw.h> 14 15 #include "cgroup_helpers.h" 16 #include "bpf_util.h" 17 18 /* 19 * To avoid relying on the system setup, when setup_cgroup_env is called 20 * we create a new mount namespace, and cgroup namespace. The cgroupv2 21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 22 * have cgroupv2 enabled at this point in time. It's easier to create our 23 * own mount namespace and manage it ourselves. We assume /mnt exists. 24 * 25 * Related cgroupv1 helpers are named *classid*(), since we only use the 26 * net_cls controller for tagging net_cls.classid. We assume the default 27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 28 * vast majority of users. 29 */ 30 31 #define WALK_FD_LIMIT 16 32 33 #define CGROUP_MOUNT_PATH "/mnt" 34 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 35 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 36 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 37 38 #define format_cgroup_path_pid(buf, path, pid) \ 39 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 40 CGROUP_WORK_DIR, pid, path) 41 42 #define format_cgroup_path(buf, path) \ 43 format_cgroup_path_pid(buf, path, getpid()) 44 45 #define format_parent_cgroup_path(buf, path) \ 46 format_cgroup_path_pid(buf, path, getppid()) 47 48 #define format_classid_path(buf) \ 49 snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ 50 CGROUP_WORK_DIR) 51 52 static int __enable_controllers(const char *cgroup_path, const char *controllers) 53 { 54 char path[PATH_MAX + 1]; 55 char enable[PATH_MAX + 1]; 56 char *c, *c2; 57 int fd, cfd; 58 ssize_t len; 59 60 /* If not controllers are passed, enable all available controllers */ 61 if (!controllers) { 62 snprintf(path, sizeof(path), "%s/cgroup.controllers", 63 cgroup_path); 64 fd = open(path, O_RDONLY); 65 if (fd < 0) { 66 log_err("Opening cgroup.controllers: %s", path); 67 return 1; 68 } 69 len = read(fd, enable, sizeof(enable) - 1); 70 if (len < 0) { 71 close(fd); 72 log_err("Reading cgroup.controllers: %s", path); 73 return 1; 74 } else if (len == 0) { /* No controllers to enable */ 75 close(fd); 76 return 0; 77 } 78 enable[len] = 0; 79 close(fd); 80 } else { 81 bpf_strlcpy(enable, controllers, sizeof(enable)); 82 } 83 84 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 85 cfd = open(path, O_RDWR); 86 if (cfd < 0) { 87 log_err("Opening cgroup.subtree_control: %s", path); 88 return 1; 89 } 90 91 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 92 if (dprintf(cfd, "+%s\n", c) <= 0) { 93 log_err("Enabling controller %s: %s", c, path); 94 close(cfd); 95 return 1; 96 } 97 } 98 close(cfd); 99 return 0; 100 } 101 102 /** 103 * enable_controllers() - Enable cgroup v2 controllers 104 * @relative_path: The cgroup path, relative to the workdir 105 * @controllers: List of controllers to enable in cgroup.controllers format 106 * 107 * 108 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 109 * available controllers. 110 * 111 * If successful, 0 is returned. 112 */ 113 int enable_controllers(const char *relative_path, const char *controllers) 114 { 115 char cgroup_path[PATH_MAX + 1]; 116 117 format_cgroup_path(cgroup_path, relative_path); 118 return __enable_controllers(cgroup_path, controllers); 119 } 120 121 static int __write_cgroup_file(const char *cgroup_path, const char *file, 122 const char *buf) 123 { 124 char file_path[PATH_MAX + 1]; 125 int fd; 126 127 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 128 fd = open(file_path, O_RDWR); 129 if (fd < 0) { 130 log_err("Opening %s", file_path); 131 return 1; 132 } 133 134 if (dprintf(fd, "%s", buf) <= 0) { 135 log_err("Writing to %s", file_path); 136 close(fd); 137 return 1; 138 } 139 close(fd); 140 return 0; 141 } 142 143 /** 144 * write_cgroup_file() - Write to a cgroup file 145 * @relative_path: The cgroup path, relative to the workdir 146 * @file: The name of the file in cgroupfs to write to 147 * @buf: Buffer to write to the file 148 * 149 * Write to a file in the given cgroup's directory. 150 * 151 * If successful, 0 is returned. 152 */ 153 int write_cgroup_file(const char *relative_path, const char *file, 154 const char *buf) 155 { 156 char cgroup_path[PATH_MAX - 24]; 157 158 format_cgroup_path(cgroup_path, relative_path); 159 return __write_cgroup_file(cgroup_path, file, buf); 160 } 161 162 /** 163 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 164 * workdir 165 * @relative_path: The cgroup path, relative to the parent process workdir 166 * @file: The name of the file in cgroupfs to write to 167 * @buf: Buffer to write to the file 168 * 169 * Write to a file in the given cgroup's directory under the parent process 170 * workdir. 171 * 172 * If successful, 0 is returned. 173 */ 174 int write_cgroup_file_parent(const char *relative_path, const char *file, 175 const char *buf) 176 { 177 char cgroup_path[PATH_MAX - 24]; 178 179 format_parent_cgroup_path(cgroup_path, relative_path); 180 return __write_cgroup_file(cgroup_path, file, buf); 181 } 182 183 /** 184 * setup_cgroup_environment() - Setup the cgroup environment 185 * 186 * After calling this function, cleanup_cgroup_environment should be called 187 * once testing is complete. 188 * 189 * This function will print an error to stderr and return 1 if it is unable 190 * to setup the cgroup environment. If setup is successful, 0 is returned. 191 */ 192 int setup_cgroup_environment(void) 193 { 194 char cgroup_workdir[PATH_MAX - 24]; 195 196 format_cgroup_path(cgroup_workdir, ""); 197 198 if (unshare(CLONE_NEWNS)) { 199 log_err("unshare"); 200 return 1; 201 } 202 203 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 204 log_err("mount fakeroot"); 205 return 1; 206 } 207 208 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 209 log_err("mount cgroup2"); 210 return 1; 211 } 212 213 /* Cleanup existing failed runs, now that the environment is setup */ 214 cleanup_cgroup_environment(); 215 216 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 217 log_err("mkdir cgroup work dir"); 218 return 1; 219 } 220 221 /* Enable all available controllers to increase test coverage */ 222 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 223 __enable_controllers(cgroup_workdir, NULL)) 224 return 1; 225 226 return 0; 227 } 228 229 static int nftwfunc(const char *filename, const struct stat *statptr, 230 int fileflags, struct FTW *pfwt) 231 { 232 if ((fileflags & FTW_D) && rmdir(filename)) 233 log_err("Removing cgroup: %s", filename); 234 return 0; 235 } 236 237 static int join_cgroup_from_top(const char *cgroup_path) 238 { 239 char cgroup_procs_path[PATH_MAX + 1]; 240 pid_t pid = getpid(); 241 int fd, rc = 0; 242 243 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 244 "%s/cgroup.procs", cgroup_path); 245 246 fd = open(cgroup_procs_path, O_WRONLY); 247 if (fd < 0) { 248 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 249 return 1; 250 } 251 252 if (dprintf(fd, "%d\n", pid) < 0) { 253 log_err("Joining Cgroup"); 254 rc = 1; 255 } 256 257 close(fd); 258 return rc; 259 } 260 261 /** 262 * join_cgroup() - Join a cgroup 263 * @relative_path: The cgroup path, relative to the workdir, to join 264 * 265 * This function expects a cgroup to already be created, relative to the cgroup 266 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 267 * would actually put the calling process into the cgroup 268 * "/cgroup-test-work-dir/my-cgroup" 269 * 270 * On success, it returns 0, otherwise on failure it returns 1. 271 */ 272 int join_cgroup(const char *relative_path) 273 { 274 char cgroup_path[PATH_MAX + 1]; 275 276 format_cgroup_path(cgroup_path, relative_path); 277 return join_cgroup_from_top(cgroup_path); 278 } 279 280 /** 281 * join_parent_cgroup() - Join a cgroup in the parent process workdir 282 * @relative_path: The cgroup path, relative to parent process workdir, to join 283 * 284 * See join_cgroup(). 285 * 286 * On success, it returns 0, otherwise on failure it returns 1. 287 */ 288 int join_parent_cgroup(const char *relative_path) 289 { 290 char cgroup_path[PATH_MAX + 1]; 291 292 format_parent_cgroup_path(cgroup_path, relative_path); 293 return join_cgroup_from_top(cgroup_path); 294 } 295 296 /** 297 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 298 * 299 * This is an idempotent function to delete all temporary cgroups that 300 * have been created during the test, including the cgroup testing work 301 * directory. 302 * 303 * At call time, it moves the calling process to the root cgroup, and then 304 * runs the deletion process. It is idempotent, and should not fail, unless 305 * a process is lingering. 306 * 307 * On failure, it will print an error to stderr, and try to continue. 308 */ 309 void cleanup_cgroup_environment(void) 310 { 311 char cgroup_workdir[PATH_MAX + 1]; 312 313 format_cgroup_path(cgroup_workdir, ""); 314 join_cgroup_from_top(CGROUP_MOUNT_PATH); 315 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 316 } 317 318 /** 319 * get_root_cgroup() - Get the FD of the root cgroup 320 * 321 * On success, it returns the file descriptor. On failure, it returns -1. 322 * If there is a failure, it prints the error to stderr. 323 */ 324 int get_root_cgroup(void) 325 { 326 int fd; 327 328 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 329 if (fd < 0) { 330 log_err("Opening root cgroup"); 331 return -1; 332 } 333 return fd; 334 } 335 336 /** 337 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 338 * @relative_path: The cgroup path, relative to the workdir, to join 339 * 340 * This function creates a cgroup under the top level workdir and returns the 341 * file descriptor. It is idempotent. 342 * 343 * On success, it returns the file descriptor. On failure it returns -1. 344 * If there is a failure, it prints the error to stderr. 345 */ 346 int create_and_get_cgroup(const char *relative_path) 347 { 348 char cgroup_path[PATH_MAX + 1]; 349 int fd; 350 351 format_cgroup_path(cgroup_path, relative_path); 352 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 353 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 354 return -1; 355 } 356 357 fd = open(cgroup_path, O_RDONLY); 358 if (fd < 0) { 359 log_err("Opening Cgroup"); 360 return -1; 361 } 362 363 return fd; 364 } 365 366 /** 367 * get_cgroup_id() - Get cgroup id for a particular cgroup path 368 * @relative_path: The cgroup path, relative to the workdir, to join 369 * 370 * On success, it returns the cgroup id. On failure it returns 0, 371 * which is an invalid cgroup id. 372 * If there is a failure, it prints the error to stderr. 373 */ 374 unsigned long long get_cgroup_id(const char *relative_path) 375 { 376 int dirfd, err, flags, mount_id, fhsize; 377 union { 378 unsigned long long cgid; 379 unsigned char raw_bytes[8]; 380 } id; 381 char cgroup_workdir[PATH_MAX + 1]; 382 struct file_handle *fhp, *fhp2; 383 unsigned long long ret = 0; 384 385 format_cgroup_path(cgroup_workdir, relative_path); 386 387 dirfd = AT_FDCWD; 388 flags = 0; 389 fhsize = sizeof(*fhp); 390 fhp = calloc(1, fhsize); 391 if (!fhp) { 392 log_err("calloc"); 393 return 0; 394 } 395 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 396 if (err >= 0 || fhp->handle_bytes != 8) { 397 log_err("name_to_handle_at"); 398 goto free_mem; 399 } 400 401 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 402 fhp2 = realloc(fhp, fhsize); 403 if (!fhp2) { 404 log_err("realloc"); 405 goto free_mem; 406 } 407 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 408 fhp = fhp2; 409 if (err < 0) { 410 log_err("name_to_handle_at"); 411 goto free_mem; 412 } 413 414 memcpy(id.raw_bytes, fhp->f_handle, 8); 415 ret = id.cgid; 416 417 free_mem: 418 free(fhp); 419 return ret; 420 } 421 422 int cgroup_setup_and_join(const char *path) { 423 int cg_fd; 424 425 if (setup_cgroup_environment()) { 426 fprintf(stderr, "Failed to setup cgroup environment\n"); 427 return -EINVAL; 428 } 429 430 cg_fd = create_and_get_cgroup(path); 431 if (cg_fd < 0) { 432 fprintf(stderr, "Failed to create test cgroup\n"); 433 cleanup_cgroup_environment(); 434 return cg_fd; 435 } 436 437 if (join_cgroup(path)) { 438 fprintf(stderr, "Failed to join cgroup\n"); 439 cleanup_cgroup_environment(); 440 return -EINVAL; 441 } 442 return cg_fd; 443 } 444 445 /** 446 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 447 * 448 * After calling this function, cleanup_classid_environment should be called 449 * once testing is complete. 450 * 451 * This function will print an error to stderr and return 1 if it is unable 452 * to setup the cgroup environment. If setup is successful, 0 is returned. 453 */ 454 int setup_classid_environment(void) 455 { 456 char cgroup_workdir[PATH_MAX + 1]; 457 458 format_classid_path(cgroup_workdir); 459 460 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 461 errno != EBUSY) { 462 log_err("mount cgroup base"); 463 return 1; 464 } 465 466 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 467 log_err("mkdir cgroup net_cls"); 468 return 1; 469 } 470 471 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && 472 errno != EBUSY) { 473 log_err("mount cgroup net_cls"); 474 return 1; 475 } 476 477 cleanup_classid_environment(); 478 479 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 480 log_err("mkdir cgroup work dir"); 481 return 1; 482 } 483 484 return 0; 485 } 486 487 /** 488 * set_classid() - Set a cgroupv1 net_cls classid 489 * @id: the numeric classid 490 * 491 * Writes the passed classid into the cgroup work dir's net_cls.classid 492 * file in order to later on trigger socket tagging. 493 * 494 * On success, it returns 0, otherwise on failure it returns 1. If there 495 * is a failure, it prints the error to stderr. 496 */ 497 int set_classid(unsigned int id) 498 { 499 char cgroup_workdir[PATH_MAX - 42]; 500 char cgroup_classid_path[PATH_MAX + 1]; 501 int fd, rc = 0; 502 503 format_classid_path(cgroup_workdir); 504 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 505 "%s/net_cls.classid", cgroup_workdir); 506 507 fd = open(cgroup_classid_path, O_WRONLY); 508 if (fd < 0) { 509 log_err("Opening cgroup classid: %s", cgroup_classid_path); 510 return 1; 511 } 512 513 if (dprintf(fd, "%u\n", id) < 0) { 514 log_err("Setting cgroup classid"); 515 rc = 1; 516 } 517 518 close(fd); 519 return rc; 520 } 521 522 /** 523 * join_classid() - Join a cgroupv1 net_cls classid 524 * 525 * This function expects the cgroup work dir to be already created, as we 526 * join it here. This causes the process sockets to be tagged with the given 527 * net_cls classid. 528 * 529 * On success, it returns 0, otherwise on failure it returns 1. 530 */ 531 int join_classid(void) 532 { 533 char cgroup_workdir[PATH_MAX + 1]; 534 535 format_classid_path(cgroup_workdir); 536 return join_cgroup_from_top(cgroup_workdir); 537 } 538 539 /** 540 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 541 * 542 * At call time, it moves the calling process to the root cgroup, and then 543 * runs the deletion process. 544 * 545 * On failure, it will print an error to stderr, and try to continue. 546 */ 547 void cleanup_classid_environment(void) 548 { 549 char cgroup_workdir[PATH_MAX + 1]; 550 551 format_classid_path(cgroup_workdir); 552 join_cgroup_from_top(NETCLS_MOUNT_PATH); 553 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 554 } 555