xref: /linux/tools/testing/selftests/bpf/cgroup_helpers.c (revision 2aceb896ee18ae35b21b14c978d8c2ef8c7b439d)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14 
15 #include "cgroup_helpers.h"
16 #include "bpf_util.h"
17 
18 /*
19  * To avoid relying on the system setup, when setup_cgroup_env is called
20  * we create a new mount namespace, and cgroup namespace. The cgroupv2
21  * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
22  * have cgroupv2 enabled at this point in time. It's easier to create our
23  * own mount namespace and manage it ourselves. We assume /mnt exists.
24  *
25  * Related cgroupv1 helpers are named *classid*(), since we only use the
26  * net_cls controller for tagging net_cls.classid. We assume the default
27  * mount under /sys/fs/cgroup/net_cls, which should be the case for the
28  * vast majority of users.
29  */
30 
31 #define WALK_FD_LIMIT			16
32 
33 #define CGROUP_MOUNT_PATH		"/mnt"
34 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
35 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
36 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
37 
38 #define format_cgroup_path_pid(buf, path, pid) \
39 	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
40 	CGROUP_WORK_DIR, pid, path)
41 
42 #define format_cgroup_path(buf, path) \
43 	format_cgroup_path_pid(buf, path, getpid())
44 
45 #define format_parent_cgroup_path(buf, path) \
46 	format_cgroup_path_pid(buf, path, getppid())
47 
48 #define format_classid_path(buf)				\
49 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
50 		 CGROUP_WORK_DIR)
51 
52 static __thread bool cgroup_workdir_mounted;
53 
54 static void __cleanup_cgroup_environment(void);
55 
56 static int __enable_controllers(const char *cgroup_path, const char *controllers)
57 {
58 	char path[PATH_MAX + 1];
59 	char enable[PATH_MAX + 1];
60 	char *c, *c2;
61 	int fd, cfd;
62 	ssize_t len;
63 
64 	/* If not controllers are passed, enable all available controllers */
65 	if (!controllers) {
66 		snprintf(path, sizeof(path), "%s/cgroup.controllers",
67 			 cgroup_path);
68 		fd = open(path, O_RDONLY);
69 		if (fd < 0) {
70 			log_err("Opening cgroup.controllers: %s", path);
71 			return 1;
72 		}
73 		len = read(fd, enable, sizeof(enable) - 1);
74 		if (len < 0) {
75 			close(fd);
76 			log_err("Reading cgroup.controllers: %s", path);
77 			return 1;
78 		} else if (len == 0) { /* No controllers to enable */
79 			close(fd);
80 			return 0;
81 		}
82 		enable[len] = 0;
83 		close(fd);
84 	} else {
85 		bpf_strlcpy(enable, controllers, sizeof(enable));
86 	}
87 
88 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
89 	cfd = open(path, O_RDWR);
90 	if (cfd < 0) {
91 		log_err("Opening cgroup.subtree_control: %s", path);
92 		return 1;
93 	}
94 
95 	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
96 		if (dprintf(cfd, "+%s\n", c) <= 0) {
97 			log_err("Enabling controller %s: %s", c, path);
98 			close(cfd);
99 			return 1;
100 		}
101 	}
102 	close(cfd);
103 	return 0;
104 }
105 
106 /**
107  * enable_controllers() - Enable cgroup v2 controllers
108  * @relative_path: The cgroup path, relative to the workdir
109  * @controllers: List of controllers to enable in cgroup.controllers format
110  *
111  *
112  * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
113  * available controllers.
114  *
115  * If successful, 0 is returned.
116  */
117 int enable_controllers(const char *relative_path, const char *controllers)
118 {
119 	char cgroup_path[PATH_MAX + 1];
120 
121 	format_cgroup_path(cgroup_path, relative_path);
122 	return __enable_controllers(cgroup_path, controllers);
123 }
124 
125 static int __write_cgroup_file(const char *cgroup_path, const char *file,
126 			       const char *buf)
127 {
128 	char file_path[PATH_MAX + 1];
129 	int fd;
130 
131 	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
132 	fd = open(file_path, O_RDWR);
133 	if (fd < 0) {
134 		log_err("Opening %s", file_path);
135 		return 1;
136 	}
137 
138 	if (dprintf(fd, "%s", buf) <= 0) {
139 		log_err("Writing to %s", file_path);
140 		close(fd);
141 		return 1;
142 	}
143 	close(fd);
144 	return 0;
145 }
146 
147 /**
148  * write_cgroup_file() - Write to a cgroup file
149  * @relative_path: The cgroup path, relative to the workdir
150  * @file: The name of the file in cgroupfs to write to
151  * @buf: Buffer to write to the file
152  *
153  * Write to a file in the given cgroup's directory.
154  *
155  * If successful, 0 is returned.
156  */
157 int write_cgroup_file(const char *relative_path, const char *file,
158 		      const char *buf)
159 {
160 	char cgroup_path[PATH_MAX - 24];
161 
162 	format_cgroup_path(cgroup_path, relative_path);
163 	return __write_cgroup_file(cgroup_path, file, buf);
164 }
165 
166 /**
167  * write_cgroup_file_parent() - Write to a cgroup file in the parent process
168  *                              workdir
169  * @relative_path: The cgroup path, relative to the parent process workdir
170  * @file: The name of the file in cgroupfs to write to
171  * @buf: Buffer to write to the file
172  *
173  * Write to a file in the given cgroup's directory under the parent process
174  * workdir.
175  *
176  * If successful, 0 is returned.
177  */
178 int write_cgroup_file_parent(const char *relative_path, const char *file,
179 			     const char *buf)
180 {
181 	char cgroup_path[PATH_MAX - 24];
182 
183 	format_parent_cgroup_path(cgroup_path, relative_path);
184 	return __write_cgroup_file(cgroup_path, file, buf);
185 }
186 
187 /**
188  * setup_cgroup_environment() - Setup the cgroup environment
189  *
190  * After calling this function, cleanup_cgroup_environment should be called
191  * once testing is complete.
192  *
193  * This function will print an error to stderr and return 1 if it is unable
194  * to setup the cgroup environment. If setup is successful, 0 is returned.
195  */
196 int setup_cgroup_environment(void)
197 {
198 	char cgroup_workdir[PATH_MAX - 24];
199 
200 	format_cgroup_path(cgroup_workdir, "");
201 
202 	if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
203 		log_err("mkdir mount");
204 		return 1;
205 	}
206 
207 	if (unshare(CLONE_NEWNS)) {
208 		log_err("unshare");
209 		return 1;
210 	}
211 
212 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
213 		log_err("mount fakeroot");
214 		return 1;
215 	}
216 
217 	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
218 		log_err("mount cgroup2");
219 		return 1;
220 	}
221 	cgroup_workdir_mounted = true;
222 
223 	/* Cleanup existing failed runs, now that the environment is setup */
224 	__cleanup_cgroup_environment();
225 
226 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
227 		log_err("mkdir cgroup work dir");
228 		return 1;
229 	}
230 
231 	/* Enable all available controllers to increase test coverage */
232 	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
233 	    __enable_controllers(cgroup_workdir, NULL))
234 		return 1;
235 
236 	return 0;
237 }
238 
239 static int nftwfunc(const char *filename, const struct stat *statptr,
240 		    int fileflags, struct FTW *pfwt)
241 {
242 	if ((fileflags & FTW_D) && rmdir(filename))
243 		log_err("Removing cgroup: %s", filename);
244 	return 0;
245 }
246 
247 static int join_cgroup_from_top(const char *cgroup_path)
248 {
249 	char cgroup_procs_path[PATH_MAX + 1];
250 	pid_t pid = getpid();
251 	int fd, rc = 0;
252 
253 	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
254 		 "%s/cgroup.procs", cgroup_path);
255 
256 	fd = open(cgroup_procs_path, O_WRONLY);
257 	if (fd < 0) {
258 		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
259 		return 1;
260 	}
261 
262 	if (dprintf(fd, "%d\n", pid) < 0) {
263 		log_err("Joining Cgroup");
264 		rc = 1;
265 	}
266 
267 	close(fd);
268 	return rc;
269 }
270 
271 /**
272  * join_cgroup() - Join a cgroup
273  * @relative_path: The cgroup path, relative to the workdir, to join
274  *
275  * This function expects a cgroup to already be created, relative to the cgroup
276  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
277  * would actually put the calling process into the cgroup
278  * "/cgroup-test-work-dir/my-cgroup"
279  *
280  * On success, it returns 0, otherwise on failure it returns 1.
281  */
282 int join_cgroup(const char *relative_path)
283 {
284 	char cgroup_path[PATH_MAX + 1];
285 
286 	format_cgroup_path(cgroup_path, relative_path);
287 	return join_cgroup_from_top(cgroup_path);
288 }
289 
290 /**
291  * join_root_cgroup() - Join the root cgroup
292  *
293  * This function joins the root cgroup.
294  *
295  * On success, it returns 0, otherwise on failure it returns 1.
296  */
297 int join_root_cgroup(void)
298 {
299 	return join_cgroup_from_top(CGROUP_MOUNT_PATH);
300 }
301 
302 /**
303  * join_parent_cgroup() - Join a cgroup in the parent process workdir
304  * @relative_path: The cgroup path, relative to parent process workdir, to join
305  *
306  * See join_cgroup().
307  *
308  * On success, it returns 0, otherwise on failure it returns 1.
309  */
310 int join_parent_cgroup(const char *relative_path)
311 {
312 	char cgroup_path[PATH_MAX + 1];
313 
314 	format_parent_cgroup_path(cgroup_path, relative_path);
315 	return join_cgroup_from_top(cgroup_path);
316 }
317 
318 /**
319  * __cleanup_cgroup_environment() - Delete temporary cgroups
320  *
321  * This is a helper for cleanup_cgroup_environment() that is responsible for
322  * deletion of all temporary cgroups that have been created during the test.
323  */
324 static void __cleanup_cgroup_environment(void)
325 {
326 	char cgroup_workdir[PATH_MAX + 1];
327 
328 	format_cgroup_path(cgroup_workdir, "");
329 	join_cgroup_from_top(CGROUP_MOUNT_PATH);
330 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
331 }
332 
333 /**
334  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
335  *
336  * This is an idempotent function to delete all temporary cgroups that
337  * have been created during the test and unmount the cgroup testing work
338  * directory.
339  *
340  * At call time, it moves the calling process to the root cgroup, and then
341  * runs the deletion process. It is idempotent, and should not fail, unless
342  * a process is lingering.
343  *
344  * On failure, it will print an error to stderr, and try to continue.
345  */
346 void cleanup_cgroup_environment(void)
347 {
348 	__cleanup_cgroup_environment();
349 	if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
350 		log_err("umount cgroup2");
351 	cgroup_workdir_mounted = false;
352 }
353 
354 /**
355  * get_root_cgroup() - Get the FD of the root cgroup
356  *
357  * On success, it returns the file descriptor. On failure, it returns -1.
358  * If there is a failure, it prints the error to stderr.
359  */
360 int get_root_cgroup(void)
361 {
362 	int fd;
363 
364 	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
365 	if (fd < 0) {
366 		log_err("Opening root cgroup");
367 		return -1;
368 	}
369 	return fd;
370 }
371 
372 /*
373  * remove_cgroup() - Remove a cgroup
374  * @relative_path: The cgroup path, relative to the workdir, to remove
375  *
376  * This function expects a cgroup to already be created, relative to the cgroup
377  * work dir. It also expects the cgroup doesn't have any children or live
378  * processes and it removes the cgroup.
379  *
380  * On failure, it will print an error to stderr.
381  */
382 void remove_cgroup(const char *relative_path)
383 {
384 	char cgroup_path[PATH_MAX + 1];
385 
386 	format_cgroup_path(cgroup_path, relative_path);
387 	if (rmdir(cgroup_path))
388 		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
389 }
390 
391 /**
392  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
393  * @relative_path: The cgroup path, relative to the workdir, to join
394  *
395  * This function creates a cgroup under the top level workdir and returns the
396  * file descriptor. It is idempotent.
397  *
398  * On success, it returns the file descriptor. On failure it returns -1.
399  * If there is a failure, it prints the error to stderr.
400  */
401 int create_and_get_cgroup(const char *relative_path)
402 {
403 	char cgroup_path[PATH_MAX + 1];
404 	int fd;
405 
406 	format_cgroup_path(cgroup_path, relative_path);
407 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
408 		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
409 		return -1;
410 	}
411 
412 	fd = open(cgroup_path, O_RDONLY);
413 	if (fd < 0) {
414 		log_err("Opening Cgroup");
415 		return -1;
416 	}
417 
418 	return fd;
419 }
420 
421 /**
422  * get_cgroup_id() - Get cgroup id for a particular cgroup path
423  * @relative_path: The cgroup path, relative to the workdir, to join
424  *
425  * On success, it returns the cgroup id. On failure it returns 0,
426  * which is an invalid cgroup id.
427  * If there is a failure, it prints the error to stderr.
428  */
429 unsigned long long get_cgroup_id(const char *relative_path)
430 {
431 	int dirfd, err, flags, mount_id, fhsize;
432 	union {
433 		unsigned long long cgid;
434 		unsigned char raw_bytes[8];
435 	} id;
436 	char cgroup_workdir[PATH_MAX + 1];
437 	struct file_handle *fhp, *fhp2;
438 	unsigned long long ret = 0;
439 
440 	format_cgroup_path(cgroup_workdir, relative_path);
441 
442 	dirfd = AT_FDCWD;
443 	flags = 0;
444 	fhsize = sizeof(*fhp);
445 	fhp = calloc(1, fhsize);
446 	if (!fhp) {
447 		log_err("calloc");
448 		return 0;
449 	}
450 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
451 	if (err >= 0 || fhp->handle_bytes != 8) {
452 		log_err("name_to_handle_at");
453 		goto free_mem;
454 	}
455 
456 	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
457 	fhp2 = realloc(fhp, fhsize);
458 	if (!fhp2) {
459 		log_err("realloc");
460 		goto free_mem;
461 	}
462 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
463 	fhp = fhp2;
464 	if (err < 0) {
465 		log_err("name_to_handle_at");
466 		goto free_mem;
467 	}
468 
469 	memcpy(id.raw_bytes, fhp->f_handle, 8);
470 	ret = id.cgid;
471 
472 free_mem:
473 	free(fhp);
474 	return ret;
475 }
476 
477 int cgroup_setup_and_join(const char *path) {
478 	int cg_fd;
479 
480 	if (setup_cgroup_environment()) {
481 		fprintf(stderr, "Failed to setup cgroup environment\n");
482 		return -EINVAL;
483 	}
484 
485 	cg_fd = create_and_get_cgroup(path);
486 	if (cg_fd < 0) {
487 		fprintf(stderr, "Failed to create test cgroup\n");
488 		cleanup_cgroup_environment();
489 		return cg_fd;
490 	}
491 
492 	if (join_cgroup(path)) {
493 		fprintf(stderr, "Failed to join cgroup\n");
494 		cleanup_cgroup_environment();
495 		return -EINVAL;
496 	}
497 	return cg_fd;
498 }
499 
500 /**
501  * setup_classid_environment() - Setup the cgroupv1 net_cls environment
502  *
503  * After calling this function, cleanup_classid_environment should be called
504  * once testing is complete.
505  *
506  * This function will print an error to stderr and return 1 if it is unable
507  * to setup the cgroup environment. If setup is successful, 0 is returned.
508  */
509 int setup_classid_environment(void)
510 {
511 	char cgroup_workdir[PATH_MAX + 1];
512 
513 	format_classid_path(cgroup_workdir);
514 
515 	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
516 	    errno != EBUSY) {
517 		log_err("mount cgroup base");
518 		return 1;
519 	}
520 
521 	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
522 		log_err("mkdir cgroup net_cls");
523 		return 1;
524 	}
525 
526 	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
527 	    errno != EBUSY) {
528 		log_err("mount cgroup net_cls");
529 		return 1;
530 	}
531 
532 	cleanup_classid_environment();
533 
534 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
535 		log_err("mkdir cgroup work dir");
536 		return 1;
537 	}
538 
539 	return 0;
540 }
541 
542 /**
543  * set_classid() - Set a cgroupv1 net_cls classid
544  * @id: the numeric classid
545  *
546  * Writes the passed classid into the cgroup work dir's net_cls.classid
547  * file in order to later on trigger socket tagging.
548  *
549  * On success, it returns 0, otherwise on failure it returns 1. If there
550  * is a failure, it prints the error to stderr.
551  */
552 int set_classid(unsigned int id)
553 {
554 	char cgroup_workdir[PATH_MAX - 42];
555 	char cgroup_classid_path[PATH_MAX + 1];
556 	int fd, rc = 0;
557 
558 	format_classid_path(cgroup_workdir);
559 	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
560 		 "%s/net_cls.classid", cgroup_workdir);
561 
562 	fd = open(cgroup_classid_path, O_WRONLY);
563 	if (fd < 0) {
564 		log_err("Opening cgroup classid: %s", cgroup_classid_path);
565 		return 1;
566 	}
567 
568 	if (dprintf(fd, "%u\n", id) < 0) {
569 		log_err("Setting cgroup classid");
570 		rc = 1;
571 	}
572 
573 	close(fd);
574 	return rc;
575 }
576 
577 /**
578  * join_classid() - Join a cgroupv1 net_cls classid
579  *
580  * This function expects the cgroup work dir to be already created, as we
581  * join it here. This causes the process sockets to be tagged with the given
582  * net_cls classid.
583  *
584  * On success, it returns 0, otherwise on failure it returns 1.
585  */
586 int join_classid(void)
587 {
588 	char cgroup_workdir[PATH_MAX + 1];
589 
590 	format_classid_path(cgroup_workdir);
591 	return join_cgroup_from_top(cgroup_workdir);
592 }
593 
594 /**
595  * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
596  *
597  * At call time, it moves the calling process to the root cgroup, and then
598  * runs the deletion process.
599  *
600  * On failure, it will print an error to stderr, and try to continue.
601  */
602 void cleanup_classid_environment(void)
603 {
604 	char cgroup_workdir[PATH_MAX + 1];
605 
606 	format_classid_path(cgroup_workdir);
607 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
608 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
609 }
610