xref: /linux/tools/testing/selftests/bpf/cgroup_helpers.c (revision 8b6d678fede700db6466d73f11fcbad496fa515e)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14 
15 #include "cgroup_helpers.h"
16 #include "bpf_util.h"
17 
18 /*
19  * To avoid relying on the system setup, when setup_cgroup_env is called
20  * we create a new mount namespace, and cgroup namespace. The cgroupv2
21  * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
22  * have cgroupv2 enabled at this point in time. It's easier to create our
23  * own mount namespace and manage it ourselves. We assume /mnt exists.
24  *
25  * Related cgroupv1 helpers are named *classid*(), since we only use the
26  * net_cls controller for tagging net_cls.classid. We assume the default
27  * mount under /sys/fs/cgroup/net_cls, which should be the case for the
28  * vast majority of users.
29  */
30 
31 #define WALK_FD_LIMIT			16
32 
33 #define CGROUP_MOUNT_PATH		"/mnt"
34 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
35 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
36 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
37 
38 #define format_cgroup_path_pid(buf, path, pid) \
39 	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
40 	CGROUP_WORK_DIR, pid, path)
41 
42 #define format_cgroup_path(buf, path) \
43 	format_cgroup_path_pid(buf, path, getpid())
44 
45 #define format_parent_cgroup_path(buf, path) \
46 	format_cgroup_path_pid(buf, path, getppid())
47 
48 #define format_classid_path_pid(buf, pid)				\
49 	snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH,	\
50 		 CGROUP_WORK_DIR, pid)
51 
52 #define format_classid_path(buf)	\
53 	format_classid_path_pid(buf, getpid())
54 
55 static __thread bool cgroup_workdir_mounted;
56 
57 static void __cleanup_cgroup_environment(void);
58 
59 static int __enable_controllers(const char *cgroup_path, const char *controllers)
60 {
61 	char path[PATH_MAX + 1];
62 	char enable[PATH_MAX + 1];
63 	char *c, *c2;
64 	int fd, cfd;
65 	ssize_t len;
66 
67 	/* If not controllers are passed, enable all available controllers */
68 	if (!controllers) {
69 		snprintf(path, sizeof(path), "%s/cgroup.controllers",
70 			 cgroup_path);
71 		fd = open(path, O_RDONLY);
72 		if (fd < 0) {
73 			log_err("Opening cgroup.controllers: %s", path);
74 			return 1;
75 		}
76 		len = read(fd, enable, sizeof(enable) - 1);
77 		if (len < 0) {
78 			close(fd);
79 			log_err("Reading cgroup.controllers: %s", path);
80 			return 1;
81 		} else if (len == 0) { /* No controllers to enable */
82 			close(fd);
83 			return 0;
84 		}
85 		enable[len] = 0;
86 		close(fd);
87 	} else {
88 		bpf_strlcpy(enable, controllers, sizeof(enable));
89 	}
90 
91 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
92 	cfd = open(path, O_RDWR);
93 	if (cfd < 0) {
94 		log_err("Opening cgroup.subtree_control: %s", path);
95 		return 1;
96 	}
97 
98 	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
99 		if (dprintf(cfd, "+%s\n", c) <= 0) {
100 			log_err("Enabling controller %s: %s", c, path);
101 			close(cfd);
102 			return 1;
103 		}
104 	}
105 	close(cfd);
106 	return 0;
107 }
108 
109 /**
110  * enable_controllers() - Enable cgroup v2 controllers
111  * @relative_path: The cgroup path, relative to the workdir
112  * @controllers: List of controllers to enable in cgroup.controllers format
113  *
114  *
115  * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
116  * available controllers.
117  *
118  * If successful, 0 is returned.
119  */
120 int enable_controllers(const char *relative_path, const char *controllers)
121 {
122 	char cgroup_path[PATH_MAX + 1];
123 
124 	format_cgroup_path(cgroup_path, relative_path);
125 	return __enable_controllers(cgroup_path, controllers);
126 }
127 
128 static int __write_cgroup_file(const char *cgroup_path, const char *file,
129 			       const char *buf)
130 {
131 	char file_path[PATH_MAX + 1];
132 	int fd;
133 
134 	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
135 	fd = open(file_path, O_RDWR);
136 	if (fd < 0) {
137 		log_err("Opening %s", file_path);
138 		return 1;
139 	}
140 
141 	if (dprintf(fd, "%s", buf) <= 0) {
142 		log_err("Writing to %s", file_path);
143 		close(fd);
144 		return 1;
145 	}
146 	close(fd);
147 	return 0;
148 }
149 
150 /**
151  * write_cgroup_file() - Write to a cgroup file
152  * @relative_path: The cgroup path, relative to the workdir
153  * @file: The name of the file in cgroupfs to write to
154  * @buf: Buffer to write to the file
155  *
156  * Write to a file in the given cgroup's directory.
157  *
158  * If successful, 0 is returned.
159  */
160 int write_cgroup_file(const char *relative_path, const char *file,
161 		      const char *buf)
162 {
163 	char cgroup_path[PATH_MAX - 24];
164 
165 	format_cgroup_path(cgroup_path, relative_path);
166 	return __write_cgroup_file(cgroup_path, file, buf);
167 }
168 
169 /**
170  * write_cgroup_file_parent() - Write to a cgroup file in the parent process
171  *                              workdir
172  * @relative_path: The cgroup path, relative to the parent process workdir
173  * @file: The name of the file in cgroupfs to write to
174  * @buf: Buffer to write to the file
175  *
176  * Write to a file in the given cgroup's directory under the parent process
177  * workdir.
178  *
179  * If successful, 0 is returned.
180  */
181 int write_cgroup_file_parent(const char *relative_path, const char *file,
182 			     const char *buf)
183 {
184 	char cgroup_path[PATH_MAX - 24];
185 
186 	format_parent_cgroup_path(cgroup_path, relative_path);
187 	return __write_cgroup_file(cgroup_path, file, buf);
188 }
189 
190 /**
191  * setup_cgroup_environment() - Setup the cgroup environment
192  *
193  * After calling this function, cleanup_cgroup_environment should be called
194  * once testing is complete.
195  *
196  * This function will print an error to stderr and return 1 if it is unable
197  * to setup the cgroup environment. If setup is successful, 0 is returned.
198  */
199 int setup_cgroup_environment(void)
200 {
201 	char cgroup_workdir[PATH_MAX - 24];
202 
203 	format_cgroup_path(cgroup_workdir, "");
204 
205 	if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
206 		log_err("mkdir mount");
207 		return 1;
208 	}
209 
210 	if (unshare(CLONE_NEWNS)) {
211 		log_err("unshare");
212 		return 1;
213 	}
214 
215 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
216 		log_err("mount fakeroot");
217 		return 1;
218 	}
219 
220 	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
221 		log_err("mount cgroup2");
222 		return 1;
223 	}
224 	cgroup_workdir_mounted = true;
225 
226 	/* Cleanup existing failed runs, now that the environment is setup */
227 	__cleanup_cgroup_environment();
228 
229 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
230 		log_err("mkdir cgroup work dir");
231 		return 1;
232 	}
233 
234 	/* Enable all available controllers to increase test coverage */
235 	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
236 	    __enable_controllers(cgroup_workdir, NULL))
237 		return 1;
238 
239 	return 0;
240 }
241 
242 static int nftwfunc(const char *filename, const struct stat *statptr,
243 		    int fileflags, struct FTW *pfwt)
244 {
245 	if ((fileflags & FTW_D) && rmdir(filename))
246 		log_err("Removing cgroup: %s", filename);
247 	return 0;
248 }
249 
250 static int join_cgroup_from_top(const char *cgroup_path)
251 {
252 	char cgroup_procs_path[PATH_MAX + 1];
253 	pid_t pid = getpid();
254 	int fd, rc = 0;
255 
256 	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
257 		 "%s/cgroup.procs", cgroup_path);
258 
259 	fd = open(cgroup_procs_path, O_WRONLY);
260 	if (fd < 0) {
261 		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
262 		return 1;
263 	}
264 
265 	if (dprintf(fd, "%d\n", pid) < 0) {
266 		log_err("Joining Cgroup");
267 		rc = 1;
268 	}
269 
270 	close(fd);
271 	return rc;
272 }
273 
274 /**
275  * join_cgroup() - Join a cgroup
276  * @relative_path: The cgroup path, relative to the workdir, to join
277  *
278  * This function expects a cgroup to already be created, relative to the cgroup
279  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
280  * would actually put the calling process into the cgroup
281  * "/cgroup-test-work-dir/my-cgroup"
282  *
283  * On success, it returns 0, otherwise on failure it returns 1.
284  */
285 int join_cgroup(const char *relative_path)
286 {
287 	char cgroup_path[PATH_MAX + 1];
288 
289 	format_cgroup_path(cgroup_path, relative_path);
290 	return join_cgroup_from_top(cgroup_path);
291 }
292 
293 /**
294  * join_root_cgroup() - Join the root cgroup
295  *
296  * This function joins the root cgroup.
297  *
298  * On success, it returns 0, otherwise on failure it returns 1.
299  */
300 int join_root_cgroup(void)
301 {
302 	return join_cgroup_from_top(CGROUP_MOUNT_PATH);
303 }
304 
305 /**
306  * join_parent_cgroup() - Join a cgroup in the parent process workdir
307  * @relative_path: The cgroup path, relative to parent process workdir, to join
308  *
309  * See join_cgroup().
310  *
311  * On success, it returns 0, otherwise on failure it returns 1.
312  */
313 int join_parent_cgroup(const char *relative_path)
314 {
315 	char cgroup_path[PATH_MAX + 1];
316 
317 	format_parent_cgroup_path(cgroup_path, relative_path);
318 	return join_cgroup_from_top(cgroup_path);
319 }
320 
321 /**
322  * __cleanup_cgroup_environment() - Delete temporary cgroups
323  *
324  * This is a helper for cleanup_cgroup_environment() that is responsible for
325  * deletion of all temporary cgroups that have been created during the test.
326  */
327 static void __cleanup_cgroup_environment(void)
328 {
329 	char cgroup_workdir[PATH_MAX + 1];
330 
331 	format_cgroup_path(cgroup_workdir, "");
332 	join_cgroup_from_top(CGROUP_MOUNT_PATH);
333 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
334 }
335 
336 /**
337  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
338  *
339  * This is an idempotent function to delete all temporary cgroups that
340  * have been created during the test and unmount the cgroup testing work
341  * directory.
342  *
343  * At call time, it moves the calling process to the root cgroup, and then
344  * runs the deletion process. It is idempotent, and should not fail, unless
345  * a process is lingering.
346  *
347  * On failure, it will print an error to stderr, and try to continue.
348  */
349 void cleanup_cgroup_environment(void)
350 {
351 	__cleanup_cgroup_environment();
352 	if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
353 		log_err("umount cgroup2");
354 	cgroup_workdir_mounted = false;
355 }
356 
357 /**
358  * get_root_cgroup() - Get the FD of the root cgroup
359  *
360  * On success, it returns the file descriptor. On failure, it returns -1.
361  * If there is a failure, it prints the error to stderr.
362  */
363 int get_root_cgroup(void)
364 {
365 	int fd;
366 
367 	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
368 	if (fd < 0) {
369 		log_err("Opening root cgroup");
370 		return -1;
371 	}
372 	return fd;
373 }
374 
375 /*
376  * remove_cgroup() - Remove a cgroup
377  * @relative_path: The cgroup path, relative to the workdir, to remove
378  *
379  * This function expects a cgroup to already be created, relative to the cgroup
380  * work dir. It also expects the cgroup doesn't have any children or live
381  * processes and it removes the cgroup.
382  *
383  * On failure, it will print an error to stderr.
384  */
385 void remove_cgroup(const char *relative_path)
386 {
387 	char cgroup_path[PATH_MAX + 1];
388 
389 	format_cgroup_path(cgroup_path, relative_path);
390 	if (rmdir(cgroup_path))
391 		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
392 }
393 
394 /**
395  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
396  * @relative_path: The cgroup path, relative to the workdir, to join
397  *
398  * This function creates a cgroup under the top level workdir and returns the
399  * file descriptor. It is idempotent.
400  *
401  * On success, it returns the file descriptor. On failure it returns -1.
402  * If there is a failure, it prints the error to stderr.
403  */
404 int create_and_get_cgroup(const char *relative_path)
405 {
406 	char cgroup_path[PATH_MAX + 1];
407 	int fd;
408 
409 	format_cgroup_path(cgroup_path, relative_path);
410 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
411 		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
412 		return -1;
413 	}
414 
415 	fd = open(cgroup_path, O_RDONLY);
416 	if (fd < 0) {
417 		log_err("Opening Cgroup");
418 		return -1;
419 	}
420 
421 	return fd;
422 }
423 
424 /**
425  * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
426  * @cgroup_workdir: The absolute cgroup path
427  *
428  * On success, it returns the cgroup id. On failure it returns 0,
429  * which is an invalid cgroup id.
430  * If there is a failure, it prints the error to stderr.
431  */
432 unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
433 {
434 	int dirfd, err, flags, mount_id, fhsize;
435 	union {
436 		unsigned long long cgid;
437 		unsigned char raw_bytes[8];
438 	} id;
439 	struct file_handle *fhp, *fhp2;
440 	unsigned long long ret = 0;
441 
442 	dirfd = AT_FDCWD;
443 	flags = 0;
444 	fhsize = sizeof(*fhp);
445 	fhp = calloc(1, fhsize);
446 	if (!fhp) {
447 		log_err("calloc");
448 		return 0;
449 	}
450 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
451 	if (err >= 0 || fhp->handle_bytes != 8) {
452 		log_err("name_to_handle_at");
453 		goto free_mem;
454 	}
455 
456 	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
457 	fhp2 = realloc(fhp, fhsize);
458 	if (!fhp2) {
459 		log_err("realloc");
460 		goto free_mem;
461 	}
462 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
463 	fhp = fhp2;
464 	if (err < 0) {
465 		log_err("name_to_handle_at");
466 		goto free_mem;
467 	}
468 
469 	memcpy(id.raw_bytes, fhp->f_handle, 8);
470 	ret = id.cgid;
471 
472 free_mem:
473 	free(fhp);
474 	return ret;
475 }
476 
477 unsigned long long get_cgroup_id(const char *relative_path)
478 {
479 	char cgroup_workdir[PATH_MAX + 1];
480 
481 	format_cgroup_path(cgroup_workdir, relative_path);
482 	return get_cgroup_id_from_path(cgroup_workdir);
483 }
484 
485 int cgroup_setup_and_join(const char *path) {
486 	int cg_fd;
487 
488 	if (setup_cgroup_environment()) {
489 		fprintf(stderr, "Failed to setup cgroup environment\n");
490 		return -EINVAL;
491 	}
492 
493 	cg_fd = create_and_get_cgroup(path);
494 	if (cg_fd < 0) {
495 		fprintf(stderr, "Failed to create test cgroup\n");
496 		cleanup_cgroup_environment();
497 		return cg_fd;
498 	}
499 
500 	if (join_cgroup(path)) {
501 		fprintf(stderr, "Failed to join cgroup\n");
502 		cleanup_cgroup_environment();
503 		return -EINVAL;
504 	}
505 	return cg_fd;
506 }
507 
508 /**
509  * setup_classid_environment() - Setup the cgroupv1 net_cls environment
510  *
511  * After calling this function, cleanup_classid_environment should be called
512  * once testing is complete.
513  *
514  * This function will print an error to stderr and return 1 if it is unable
515  * to setup the cgroup environment. If setup is successful, 0 is returned.
516  */
517 int setup_classid_environment(void)
518 {
519 	char cgroup_workdir[PATH_MAX + 1];
520 
521 	format_classid_path(cgroup_workdir);
522 
523 	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
524 	    errno != EBUSY) {
525 		log_err("mount cgroup base");
526 		return 1;
527 	}
528 
529 	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
530 		log_err("mkdir cgroup net_cls");
531 		return 1;
532 	}
533 
534 	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
535 		if (errno != EBUSY) {
536 			log_err("mount cgroup net_cls");
537 			return 1;
538 		}
539 
540 		if (rmdir(NETCLS_MOUNT_PATH)) {
541 			log_err("rmdir cgroup net_cls");
542 			return 1;
543 		}
544 		if (umount(CGROUP_MOUNT_DFLT)) {
545 			log_err("umount cgroup base");
546 			return 1;
547 		}
548 	}
549 
550 	cleanup_classid_environment();
551 
552 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
553 		log_err("mkdir cgroup work dir");
554 		return 1;
555 	}
556 
557 	return 0;
558 }
559 
560 /**
561  * set_classid() - Set a cgroupv1 net_cls classid
562  *
563  * Writes the classid into the cgroup work dir's net_cls.classid
564  * file in order to later on trigger socket tagging.
565  *
566  * We leverage the current pid as the classid, ensuring unique identification.
567  *
568  * On success, it returns 0, otherwise on failure it returns 1. If there
569  * is a failure, it prints the error to stderr.
570  */
571 int set_classid(void)
572 {
573 	char cgroup_workdir[PATH_MAX - 42];
574 	char cgroup_classid_path[PATH_MAX + 1];
575 	int fd, rc = 0;
576 
577 	format_classid_path(cgroup_workdir);
578 	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
579 		 "%s/net_cls.classid", cgroup_workdir);
580 
581 	fd = open(cgroup_classid_path, O_WRONLY);
582 	if (fd < 0) {
583 		log_err("Opening cgroup classid: %s", cgroup_classid_path);
584 		return 1;
585 	}
586 
587 	if (dprintf(fd, "%u\n", getpid()) < 0) {
588 		log_err("Setting cgroup classid");
589 		rc = 1;
590 	}
591 
592 	close(fd);
593 	return rc;
594 }
595 
596 /**
597  * join_classid() - Join a cgroupv1 net_cls classid
598  *
599  * This function expects the cgroup work dir to be already created, as we
600  * join it here. This causes the process sockets to be tagged with the given
601  * net_cls classid.
602  *
603  * On success, it returns 0, otherwise on failure it returns 1.
604  */
605 int join_classid(void)
606 {
607 	char cgroup_workdir[PATH_MAX + 1];
608 
609 	format_classid_path(cgroup_workdir);
610 	return join_cgroup_from_top(cgroup_workdir);
611 }
612 
613 /**
614  * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
615  *
616  * At call time, it moves the calling process to the root cgroup, and then
617  * runs the deletion process.
618  *
619  * On failure, it will print an error to stderr, and try to continue.
620  */
621 void cleanup_classid_environment(void)
622 {
623 	char cgroup_workdir[PATH_MAX + 1];
624 
625 	format_classid_path(cgroup_workdir);
626 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
627 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
628 }
629 
630 /**
631  * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
632  */
633 unsigned long long get_classid_cgroup_id(void)
634 {
635 	char cgroup_workdir[PATH_MAX + 1];
636 
637 	format_classid_path(cgroup_workdir);
638 	return get_cgroup_id_from_path(cgroup_workdir);
639 }
640 
641 /**
642  * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
643  * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
644  * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
645  * "net_cls,net_prio".
646  */
647 int get_cgroup1_hierarchy_id(const char *subsys_name)
648 {
649 	char *c, *c2, *c3, *c4;
650 	bool found = false;
651 	char line[1024];
652 	FILE *file;
653 	int i, id;
654 
655 	if (!subsys_name)
656 		return -1;
657 
658 	file = fopen("/proc/self/cgroup", "r");
659 	if (!file) {
660 		log_err("fopen /proc/self/cgroup");
661 		return -1;
662 	}
663 
664 	while (fgets(line, 1024, file)) {
665 		i = 0;
666 		for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
667 			if (i == 0) {
668 				id = strtol(c, NULL, 10);
669 			} else if (i == 1) {
670 				if (!strcmp(c, subsys_name)) {
671 					found = true;
672 					break;
673 				}
674 
675 				/* Multiple subsystems may share one single mount point */
676 				for (c3 = strtok_r(c, ",", &c4); c3;
677 				     c3 = strtok_r(NULL, ",", &c4)) {
678 					if (!strcmp(c, subsys_name)) {
679 						found = true;
680 						break;
681 					}
682 				}
683 			}
684 			i++;
685 		}
686 		if (found)
687 			break;
688 	}
689 	fclose(file);
690 	return found ? id : -1;
691 }
692 
693 /**
694  * open_classid() - Open a cgroupv1 net_cls classid
695  *
696  * This function expects the cgroup work dir to be already created, as we
697  * open it here.
698  *
699  * On success, it returns the file descriptor. On failure it returns -1.
700  */
701 int open_classid(void)
702 {
703 	char cgroup_workdir[PATH_MAX + 1];
704 
705 	format_classid_path(cgroup_workdir);
706 	return open(cgroup_workdir, O_RDONLY);
707 }
708