1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14
15 #include "cgroup_helpers.h"
16 #include "bpf_util.h"
17
18 /*
19 * To avoid relying on the system setup, when setup_cgroup_env is called
20 * we create a new mount namespace, and cgroup namespace. The cgroupv2
21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
22 * have cgroupv2 enabled at this point in time. It's easier to create our
23 * own mount namespace and manage it ourselves. We assume /mnt exists.
24 *
25 * Related cgroupv1 helpers are named *classid*(), since we only use the
26 * net_cls controller for tagging net_cls.classid. We assume the default
27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the
28 * vast majority of users.
29 */
30
31 #define WALK_FD_LIMIT 16
32
33 #define CGROUP_MOUNT_PATH "/mnt"
34 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
35 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
36 #define CGROUP_WORK_DIR "/cgroup-test-work-dir"
37
38 #define format_cgroup_path_pid(buf, path, pid) \
39 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
40 CGROUP_WORK_DIR, pid, path)
41
42 #define format_cgroup_path(buf, path) \
43 format_cgroup_path_pid(buf, path, getpid())
44
45 #define format_parent_cgroup_path(buf, path) \
46 format_cgroup_path_pid(buf, path, getppid())
47
48 #define format_classid_path_pid(buf, pid) \
49 snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \
50 CGROUP_WORK_DIR, pid)
51
52 #define format_classid_path(buf) \
53 format_classid_path_pid(buf, getpid())
54
55 static __thread bool cgroup_workdir_mounted;
56
57 static void __cleanup_cgroup_environment(void);
58
__enable_controllers(const char * cgroup_path,const char * controllers)59 static int __enable_controllers(const char *cgroup_path, const char *controllers)
60 {
61 char path[PATH_MAX + 1];
62 char enable[PATH_MAX + 1];
63 char *c, *c2;
64 int fd, cfd;
65 ssize_t len;
66
67 /* If not controllers are passed, enable all available controllers */
68 if (!controllers) {
69 snprintf(path, sizeof(path), "%s/cgroup.controllers",
70 cgroup_path);
71 fd = open(path, O_RDONLY);
72 if (fd < 0) {
73 log_err("Opening cgroup.controllers: %s", path);
74 return 1;
75 }
76 len = read(fd, enable, sizeof(enable) - 1);
77 if (len < 0) {
78 close(fd);
79 log_err("Reading cgroup.controllers: %s", path);
80 return 1;
81 } else if (len == 0) { /* No controllers to enable */
82 close(fd);
83 return 0;
84 }
85 enable[len] = 0;
86 close(fd);
87 } else {
88 bpf_strlcpy(enable, controllers, sizeof(enable));
89 }
90
91 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
92 cfd = open(path, O_RDWR);
93 if (cfd < 0) {
94 log_err("Opening cgroup.subtree_control: %s", path);
95 return 1;
96 }
97
98 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
99 if (dprintf(cfd, "+%s\n", c) <= 0) {
100 log_err("Enabling controller %s: %s", c, path);
101 close(cfd);
102 return 1;
103 }
104 }
105 close(cfd);
106 return 0;
107 }
108
109 /**
110 * enable_controllers() - Enable cgroup v2 controllers
111 * @relative_path: The cgroup path, relative to the workdir
112 * @controllers: List of controllers to enable in cgroup.controllers format
113 *
114 *
115 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
116 * available controllers.
117 *
118 * If successful, 0 is returned.
119 */
enable_controllers(const char * relative_path,const char * controllers)120 int enable_controllers(const char *relative_path, const char *controllers)
121 {
122 char cgroup_path[PATH_MAX + 1];
123
124 format_cgroup_path(cgroup_path, relative_path);
125 return __enable_controllers(cgroup_path, controllers);
126 }
127
__write_cgroup_file(const char * cgroup_path,const char * file,const char * buf)128 static int __write_cgroup_file(const char *cgroup_path, const char *file,
129 const char *buf)
130 {
131 char file_path[PATH_MAX + 1];
132 int fd;
133
134 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
135 fd = open(file_path, O_RDWR);
136 if (fd < 0) {
137 log_err("Opening %s", file_path);
138 return 1;
139 }
140
141 if (dprintf(fd, "%s", buf) <= 0) {
142 log_err("Writing to %s", file_path);
143 close(fd);
144 return 1;
145 }
146 close(fd);
147 return 0;
148 }
149
150 /**
151 * write_cgroup_file() - Write to a cgroup file
152 * @relative_path: The cgroup path, relative to the workdir
153 * @file: The name of the file in cgroupfs to write to
154 * @buf: Buffer to write to the file
155 *
156 * Write to a file in the given cgroup's directory.
157 *
158 * If successful, 0 is returned.
159 */
write_cgroup_file(const char * relative_path,const char * file,const char * buf)160 int write_cgroup_file(const char *relative_path, const char *file,
161 const char *buf)
162 {
163 char cgroup_path[PATH_MAX - 24];
164
165 format_cgroup_path(cgroup_path, relative_path);
166 return __write_cgroup_file(cgroup_path, file, buf);
167 }
168
169 /**
170 * write_cgroup_file_parent() - Write to a cgroup file in the parent process
171 * workdir
172 * @relative_path: The cgroup path, relative to the parent process workdir
173 * @file: The name of the file in cgroupfs to write to
174 * @buf: Buffer to write to the file
175 *
176 * Write to a file in the given cgroup's directory under the parent process
177 * workdir.
178 *
179 * If successful, 0 is returned.
180 */
write_cgroup_file_parent(const char * relative_path,const char * file,const char * buf)181 int write_cgroup_file_parent(const char *relative_path, const char *file,
182 const char *buf)
183 {
184 char cgroup_path[PATH_MAX - 24];
185
186 format_parent_cgroup_path(cgroup_path, relative_path);
187 return __write_cgroup_file(cgroup_path, file, buf);
188 }
189
190 /**
191 * setup_cgroup_environment() - Setup the cgroup environment
192 *
193 * After calling this function, cleanup_cgroup_environment should be called
194 * once testing is complete.
195 *
196 * This function will print an error to stderr and return 1 if it is unable
197 * to setup the cgroup environment. If setup is successful, 0 is returned.
198 */
setup_cgroup_environment(void)199 int setup_cgroup_environment(void)
200 {
201 char cgroup_workdir[PATH_MAX - 24];
202
203 format_cgroup_path(cgroup_workdir, "");
204
205 if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
206 log_err("mkdir mount");
207 return 1;
208 }
209
210 if (unshare(CLONE_NEWNS)) {
211 log_err("unshare");
212 return 1;
213 }
214
215 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
216 log_err("mount fakeroot");
217 return 1;
218 }
219
220 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
221 log_err("mount cgroup2");
222 return 1;
223 }
224 cgroup_workdir_mounted = true;
225
226 /* Cleanup existing failed runs, now that the environment is setup */
227 __cleanup_cgroup_environment();
228
229 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
230 log_err("mkdir cgroup work dir");
231 return 1;
232 }
233
234 /* Enable all available controllers to increase test coverage */
235 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
236 __enable_controllers(cgroup_workdir, NULL))
237 return 1;
238
239 return 0;
240 }
241
nftwfunc(const char * filename,const struct stat * statptr,int fileflags,struct FTW * pfwt)242 static int nftwfunc(const char *filename, const struct stat *statptr,
243 int fileflags, struct FTW *pfwt)
244 {
245 if ((fileflags & FTW_D) && rmdir(filename))
246 log_err("Removing cgroup: %s", filename);
247 return 0;
248 }
249
join_cgroup_from_top(const char * cgroup_path)250 static int join_cgroup_from_top(const char *cgroup_path)
251 {
252 char cgroup_procs_path[PATH_MAX + 1];
253 pid_t pid = getpid();
254 int fd, rc = 0;
255
256 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
257 "%s/cgroup.procs", cgroup_path);
258
259 fd = open(cgroup_procs_path, O_WRONLY);
260 if (fd < 0) {
261 log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
262 return 1;
263 }
264
265 if (dprintf(fd, "%d\n", pid) < 0) {
266 log_err("Joining Cgroup");
267 rc = 1;
268 }
269
270 close(fd);
271 return rc;
272 }
273
274 /**
275 * join_cgroup() - Join a cgroup
276 * @relative_path: The cgroup path, relative to the workdir, to join
277 *
278 * This function expects a cgroup to already be created, relative to the cgroup
279 * work dir, and it joins it. For example, passing "/my-cgroup" as the path
280 * would actually put the calling process into the cgroup
281 * "/cgroup-test-work-dir/my-cgroup"
282 *
283 * On success, it returns 0, otherwise on failure it returns 1.
284 */
join_cgroup(const char * relative_path)285 int join_cgroup(const char *relative_path)
286 {
287 char cgroup_path[PATH_MAX + 1];
288
289 format_cgroup_path(cgroup_path, relative_path);
290 return join_cgroup_from_top(cgroup_path);
291 }
292
293 /**
294 * join_root_cgroup() - Join the root cgroup
295 *
296 * This function joins the root cgroup.
297 *
298 * On success, it returns 0, otherwise on failure it returns 1.
299 */
join_root_cgroup(void)300 int join_root_cgroup(void)
301 {
302 return join_cgroup_from_top(CGROUP_MOUNT_PATH);
303 }
304
305 /**
306 * join_parent_cgroup() - Join a cgroup in the parent process workdir
307 * @relative_path: The cgroup path, relative to parent process workdir, to join
308 *
309 * See join_cgroup().
310 *
311 * On success, it returns 0, otherwise on failure it returns 1.
312 */
join_parent_cgroup(const char * relative_path)313 int join_parent_cgroup(const char *relative_path)
314 {
315 char cgroup_path[PATH_MAX + 1];
316
317 format_parent_cgroup_path(cgroup_path, relative_path);
318 return join_cgroup_from_top(cgroup_path);
319 }
320
321 /**
322 * __cleanup_cgroup_environment() - Delete temporary cgroups
323 *
324 * This is a helper for cleanup_cgroup_environment() that is responsible for
325 * deletion of all temporary cgroups that have been created during the test.
326 */
__cleanup_cgroup_environment(void)327 static void __cleanup_cgroup_environment(void)
328 {
329 char cgroup_workdir[PATH_MAX + 1];
330
331 format_cgroup_path(cgroup_workdir, "");
332 join_cgroup_from_top(CGROUP_MOUNT_PATH);
333 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
334 }
335
336 /**
337 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
338 *
339 * This is an idempotent function to delete all temporary cgroups that
340 * have been created during the test and unmount the cgroup testing work
341 * directory.
342 *
343 * At call time, it moves the calling process to the root cgroup, and then
344 * runs the deletion process. It is idempotent, and should not fail, unless
345 * a process is lingering.
346 *
347 * On failure, it will print an error to stderr, and try to continue.
348 */
cleanup_cgroup_environment(void)349 void cleanup_cgroup_environment(void)
350 {
351 __cleanup_cgroup_environment();
352 if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
353 log_err("umount cgroup2");
354 cgroup_workdir_mounted = false;
355 }
356
357 /**
358 * get_root_cgroup() - Get the FD of the root cgroup
359 *
360 * On success, it returns the file descriptor. On failure, it returns -1.
361 * If there is a failure, it prints the error to stderr.
362 */
get_root_cgroup(void)363 int get_root_cgroup(void)
364 {
365 int fd;
366
367 fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
368 if (fd < 0) {
369 log_err("Opening root cgroup");
370 return -1;
371 }
372 return fd;
373 }
374
375 /*
376 * remove_cgroup() - Remove a cgroup
377 * @relative_path: The cgroup path, relative to the workdir, to remove
378 *
379 * This function expects a cgroup to already be created, relative to the cgroup
380 * work dir. It also expects the cgroup doesn't have any children or live
381 * processes and it removes the cgroup.
382 *
383 * On failure, it will print an error to stderr.
384 */
remove_cgroup(const char * relative_path)385 void remove_cgroup(const char *relative_path)
386 {
387 char cgroup_path[PATH_MAX + 1];
388
389 format_cgroup_path(cgroup_path, relative_path);
390 if (rmdir(cgroup_path))
391 log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
392 }
393
394 /**
395 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
396 * @relative_path: The cgroup path, relative to the workdir, to join
397 *
398 * This function creates a cgroup under the top level workdir and returns the
399 * file descriptor. It is idempotent.
400 *
401 * On success, it returns the file descriptor. On failure it returns -1.
402 * If there is a failure, it prints the error to stderr.
403 */
create_and_get_cgroup(const char * relative_path)404 int create_and_get_cgroup(const char *relative_path)
405 {
406 char cgroup_path[PATH_MAX + 1];
407 int fd;
408
409 format_cgroup_path(cgroup_path, relative_path);
410 if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
411 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
412 return -1;
413 }
414
415 fd = open(cgroup_path, O_RDONLY);
416 if (fd < 0) {
417 log_err("Opening Cgroup");
418 return -1;
419 }
420
421 return fd;
422 }
423
424 /**
425 * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
426 * @cgroup_workdir: The absolute cgroup path
427 *
428 * On success, it returns the cgroup id. On failure it returns 0,
429 * which is an invalid cgroup id.
430 * If there is a failure, it prints the error to stderr.
431 */
get_cgroup_id_from_path(const char * cgroup_workdir)432 static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
433 {
434 int dirfd, err, flags, mount_id, fhsize;
435 union {
436 unsigned long long cgid;
437 unsigned char raw_bytes[8];
438 } id;
439 struct file_handle *fhp, *fhp2;
440 unsigned long long ret = 0;
441
442 dirfd = AT_FDCWD;
443 flags = 0;
444 fhsize = sizeof(*fhp);
445 fhp = calloc(1, fhsize);
446 if (!fhp) {
447 log_err("calloc");
448 return 0;
449 }
450 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
451 if (err >= 0 || fhp->handle_bytes != 8) {
452 log_err("name_to_handle_at");
453 goto free_mem;
454 }
455
456 fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
457 fhp2 = realloc(fhp, fhsize);
458 if (!fhp2) {
459 log_err("realloc");
460 goto free_mem;
461 }
462 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
463 fhp = fhp2;
464 if (err < 0) {
465 log_err("name_to_handle_at");
466 goto free_mem;
467 }
468
469 memcpy(id.raw_bytes, fhp->f_handle, 8);
470 ret = id.cgid;
471
472 free_mem:
473 free(fhp);
474 return ret;
475 }
476
get_cgroup_id(const char * relative_path)477 unsigned long long get_cgroup_id(const char *relative_path)
478 {
479 char cgroup_workdir[PATH_MAX + 1];
480
481 format_cgroup_path(cgroup_workdir, relative_path);
482 return get_cgroup_id_from_path(cgroup_workdir);
483 }
484
cgroup_setup_and_join(const char * path)485 int cgroup_setup_and_join(const char *path) {
486 int cg_fd;
487
488 if (setup_cgroup_environment()) {
489 fprintf(stderr, "Failed to setup cgroup environment\n");
490 return -EINVAL;
491 }
492
493 cg_fd = create_and_get_cgroup(path);
494 if (cg_fd < 0) {
495 fprintf(stderr, "Failed to create test cgroup\n");
496 cleanup_cgroup_environment();
497 return cg_fd;
498 }
499
500 if (join_cgroup(path)) {
501 fprintf(stderr, "Failed to join cgroup\n");
502 cleanup_cgroup_environment();
503 return -EINVAL;
504 }
505 return cg_fd;
506 }
507
508 /**
509 * setup_classid_environment() - Setup the cgroupv1 net_cls environment
510 *
511 * This function should only be called in a custom mount namespace, e.g.
512 * created by running setup_cgroup_environment.
513 *
514 * After calling this function, cleanup_classid_environment should be called
515 * once testing is complete.
516 *
517 * This function will print an error to stderr and return 1 if it is unable
518 * to setup the cgroup environment. If setup is successful, 0 is returned.
519 */
setup_classid_environment(void)520 int setup_classid_environment(void)
521 {
522 char cgroup_workdir[PATH_MAX + 1];
523
524 format_classid_path(cgroup_workdir);
525
526 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
527 errno != EBUSY) {
528 log_err("mount cgroup base");
529 return 1;
530 }
531
532 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
533 log_err("mkdir cgroup net_cls");
534 return 1;
535 }
536
537 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
538 if (errno != EBUSY) {
539 log_err("mount cgroup net_cls");
540 return 1;
541 }
542
543 if (rmdir(NETCLS_MOUNT_PATH)) {
544 log_err("rmdir cgroup net_cls");
545 return 1;
546 }
547 if (umount(CGROUP_MOUNT_DFLT)) {
548 log_err("umount cgroup base");
549 return 1;
550 }
551 }
552
553 cleanup_classid_environment();
554
555 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
556 log_err("mkdir cgroup work dir");
557 return 1;
558 }
559
560 return 0;
561 }
562
563 /**
564 * set_classid() - Set a cgroupv1 net_cls classid
565 *
566 * Writes the classid into the cgroup work dir's net_cls.classid
567 * file in order to later on trigger socket tagging.
568 *
569 * We leverage the current pid as the classid, ensuring unique identification.
570 *
571 * On success, it returns 0, otherwise on failure it returns 1. If there
572 * is a failure, it prints the error to stderr.
573 */
set_classid(void)574 int set_classid(void)
575 {
576 char cgroup_workdir[PATH_MAX - 42];
577 char cgroup_classid_path[PATH_MAX + 1];
578 int fd, rc = 0;
579
580 format_classid_path(cgroup_workdir);
581 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
582 "%s/net_cls.classid", cgroup_workdir);
583
584 fd = open(cgroup_classid_path, O_WRONLY);
585 if (fd < 0) {
586 log_err("Opening cgroup classid: %s", cgroup_classid_path);
587 return 1;
588 }
589
590 if (dprintf(fd, "%u\n", getpid()) < 0) {
591 log_err("Setting cgroup classid");
592 rc = 1;
593 }
594
595 close(fd);
596 return rc;
597 }
598
599 /**
600 * join_classid() - Join a cgroupv1 net_cls classid
601 *
602 * This function expects the cgroup work dir to be already created, as we
603 * join it here. This causes the process sockets to be tagged with the given
604 * net_cls classid.
605 *
606 * On success, it returns 0, otherwise on failure it returns 1.
607 */
join_classid(void)608 int join_classid(void)
609 {
610 char cgroup_workdir[PATH_MAX + 1];
611
612 format_classid_path(cgroup_workdir);
613 return join_cgroup_from_top(cgroup_workdir);
614 }
615
616 /**
617 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
618 *
619 * At call time, it moves the calling process to the root cgroup, and then
620 * runs the deletion process.
621 *
622 * On failure, it will print an error to stderr, and try to continue.
623 */
cleanup_classid_environment(void)624 void cleanup_classid_environment(void)
625 {
626 char cgroup_workdir[PATH_MAX + 1];
627
628 format_classid_path(cgroup_workdir);
629 join_cgroup_from_top(NETCLS_MOUNT_PATH);
630 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
631 }
632
633 /**
634 * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
635 */
get_classid_cgroup_id(void)636 unsigned long long get_classid_cgroup_id(void)
637 {
638 char cgroup_workdir[PATH_MAX + 1];
639
640 format_classid_path(cgroup_workdir);
641 return get_cgroup_id_from_path(cgroup_workdir);
642 }
643
644 /**
645 * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
646 * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
647 * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-controllers like
648 * "net_cls,net_prio".
649 */
get_cgroup1_hierarchy_id(const char * subsys_name)650 int get_cgroup1_hierarchy_id(const char *subsys_name)
651 {
652 char *c, *c2, *c3, *c4;
653 bool found = false;
654 char line[1024];
655 FILE *file;
656 int i, id;
657
658 if (!subsys_name)
659 return -1;
660
661 file = fopen("/proc/self/cgroup", "r");
662 if (!file) {
663 log_err("fopen /proc/self/cgroup");
664 return -1;
665 }
666
667 while (fgets(line, 1024, file)) {
668 i = 0;
669 for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
670 if (i == 0) {
671 id = strtol(c, NULL, 10);
672 } else if (i == 1) {
673 if (!strcmp(c, subsys_name)) {
674 found = true;
675 break;
676 }
677
678 /* Multiple subsystems may share one single mount point */
679 for (c3 = strtok_r(c, ",", &c4); c3;
680 c3 = strtok_r(NULL, ",", &c4)) {
681 if (!strcmp(c, subsys_name)) {
682 found = true;
683 break;
684 }
685 }
686 }
687 i++;
688 }
689 if (found)
690 break;
691 }
692 fclose(file);
693 return found ? id : -1;
694 }
695
696 /**
697 * open_classid() - Open a cgroupv1 net_cls classid
698 *
699 * This function expects the cgroup work dir to be already created, as we
700 * open it here.
701 *
702 * On success, it returns the file descriptor. On failure it returns -1.
703 */
open_classid(void)704 int open_classid(void)
705 {
706 char cgroup_workdir[PATH_MAX + 1];
707
708 format_classid_path(cgroup_workdir);
709 return open(cgroup_workdir, O_RDONLY);
710 }
711