xref: /linux/tools/testing/selftests/cgroup/lib/cgroup_util.c (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18 
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21 
22 bool cg_test_v1_named;
23 
24 /* Returns read len on success, or -errno on failure. */
25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 	ssize_t len;
28 	int fd;
29 
30 	fd = open(path, O_RDONLY);
31 	if (fd < 0)
32 		return -errno;
33 
34 	len = read(fd, buf, max_len - 1);
35 
36 	if (len >= 0)
37 		buf[len] = 0;
38 
39 	close(fd);
40 	return len < 0 ? -errno : len;
41 }
42 
43 /* Returns written len on success, or -errno on failure. */
44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 	int fd;
47 
48 	fd = open(path, O_WRONLY | O_APPEND);
49 	if (fd < 0)
50 		return -errno;
51 
52 	len = write(fd, buf, len);
53 	close(fd);
54 	return len < 0 ? -errno : len;
55 }
56 
57 char *cg_name(const char *root, const char *name)
58 {
59 	size_t len = strlen(root) + strlen(name) + 2;
60 	char *ret = malloc(len);
61 
62 	snprintf(ret, len, "%s/%s", root, name);
63 
64 	return ret;
65 }
66 
67 char *cg_name_indexed(const char *root, const char *name, int index)
68 {
69 	size_t len = strlen(root) + strlen(name) + 10;
70 	char *ret = malloc(len);
71 
72 	snprintf(ret, len, "%s/%s_%d", root, name, index);
73 
74 	return ret;
75 }
76 
77 char *cg_control(const char *cgroup, const char *control)
78 {
79 	size_t len = strlen(cgroup) + strlen(control) + 2;
80 	char *ret = malloc(len);
81 
82 	snprintf(ret, len, "%s/%s", cgroup, control);
83 
84 	return ret;
85 }
86 
87 /* Returns 0 on success, or -errno on failure. */
88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 	char path[PATH_MAX];
91 	ssize_t ret;
92 
93 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
94 
95 	ret = read_text(path, buf, len);
96 	return ret >= 0 ? 0 : ret;
97 }
98 
99 int cg_read_strcmp(const char *cgroup, const char *control,
100 		   const char *expected)
101 {
102 	size_t size;
103 	char *buf;
104 	int ret;
105 
106 	/* Handle the case of comparing against empty string */
107 	if (!expected)
108 		return -1;
109 	else
110 		size = strlen(expected) + 1;
111 
112 	buf = malloc(size);
113 	if (!buf)
114 		return -1;
115 
116 	if (cg_read(cgroup, control, buf, size)) {
117 		free(buf);
118 		return -1;
119 	}
120 
121 	ret = strcmp(expected, buf);
122 	free(buf);
123 	return ret;
124 }
125 
126 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
127 {
128 	char buf[PAGE_SIZE];
129 
130 	if (cg_read(cgroup, control, buf, sizeof(buf)))
131 		return -1;
132 
133 	return strstr(buf, needle) ? 0 : -1;
134 }
135 
136 long cg_read_long(const char *cgroup, const char *control)
137 {
138 	char buf[128];
139 
140 	if (cg_read(cgroup, control, buf, sizeof(buf)))
141 		return -1;
142 
143 	return atol(buf);
144 }
145 
146 long cg_read_long_fd(int fd)
147 {
148 	char buf[128];
149 
150 	if (pread(fd, buf, sizeof(buf), 0) <= 0)
151 		return -1;
152 
153 	return atol(buf);
154 }
155 
156 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
157 {
158 	char buf[PAGE_SIZE];
159 	char *ptr;
160 
161 	if (cg_read(cgroup, control, buf, sizeof(buf)))
162 		return -1;
163 
164 	ptr = strstr(buf, key);
165 	if (!ptr)
166 		return -1;
167 
168 	return atol(ptr + strlen(key));
169 }
170 
171 long cg_read_key_long_poll(const char *cgroup, const char *control,
172 			   const char *key, long expected, int retries,
173 			   useconds_t wait_interval_us)
174 {
175 	long val = -1;
176 	int i;
177 
178 	for (i = 0; i < retries; i++) {
179 		val = cg_read_key_long(cgroup, control, key);
180 		if (val < 0)
181 			return val;
182 
183 		if (val == expected)
184 			break;
185 
186 		usleep(wait_interval_us);
187 	}
188 
189 	return val;
190 }
191 
192 long cg_read_lc(const char *cgroup, const char *control)
193 {
194 	char buf[PAGE_SIZE];
195 	const char delim[] = "\n";
196 	char *line;
197 	long cnt = 0;
198 
199 	if (cg_read(cgroup, control, buf, sizeof(buf)))
200 		return -1;
201 
202 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
203 		cnt++;
204 
205 	return cnt;
206 }
207 
208 /* Returns 0 on success, or -errno on failure. */
209 int cg_write(const char *cgroup, const char *control, char *buf)
210 {
211 	char path[PATH_MAX];
212 	ssize_t len = strlen(buf), ret;
213 
214 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
215 	ret = write_text(path, buf, len);
216 	return ret == len ? 0 : ret;
217 }
218 
219 /*
220  * Returns fd on success, or -1 on failure.
221  * (fd should be closed with close() as usual)
222  */
223 int cg_open(const char *cgroup, const char *control, int flags)
224 {
225 	char path[PATH_MAX];
226 
227 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
228 	return open(path, flags);
229 }
230 
231 int cg_write_numeric(const char *cgroup, const char *control, long value)
232 {
233 	char buf[64];
234 	int ret;
235 
236 	ret = sprintf(buf, "%lu", value);
237 	if (ret < 0)
238 		return ret;
239 
240 	return cg_write(cgroup, control, buf);
241 }
242 
243 static int cg_find_root(char *root, size_t len, const char *controller,
244 			bool *nsdelegate)
245 {
246 	char buf[10 * PAGE_SIZE];
247 	char *fs, *mount, *type, *options;
248 	const char delim[] = "\n\t ";
249 
250 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
251 		return -1;
252 
253 	/*
254 	 * Example:
255 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
256 	 */
257 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
258 		mount = strtok(NULL, delim);
259 		type = strtok(NULL, delim);
260 		options = strtok(NULL, delim);
261 		strtok(NULL, delim);
262 		strtok(NULL, delim);
263 		if (strcmp(type, "cgroup") == 0) {
264 			if (!controller || !strstr(options, controller))
265 				continue;
266 		} else if (strcmp(type, "cgroup2") == 0) {
267 			if (controller &&
268 					cg_read_strstr(mount, "cgroup.controllers", controller))
269 				continue;
270 		} else {
271 			continue;
272 		}
273 		strncpy(root, mount, len);
274 
275 		if (nsdelegate)
276 			*nsdelegate = !!strstr(options, "nsdelegate");
277 		return 0;
278 
279 	}
280 
281 	return -1;
282 }
283 
284 int cg_find_controller_root(char *root, size_t len, const char *controller)
285 {
286 	return cg_find_root(root, len, controller, NULL);
287 }
288 
289 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
290 {
291 	return cg_find_root(root, len, NULL, nsdelegate);
292 }
293 
294 int cg_create(const char *cgroup)
295 {
296 	return mkdir(cgroup, 0755);
297 }
298 
299 int cg_wait_for_proc_count(const char *cgroup, int count)
300 {
301 	char buf[10 * PAGE_SIZE] = {0};
302 	int attempts;
303 	char *ptr;
304 
305 	for (attempts = 10; attempts >= 0; attempts--) {
306 		int nr = 0;
307 
308 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
309 			break;
310 
311 		for (ptr = buf; *ptr; ptr++)
312 			if (*ptr == '\n')
313 				nr++;
314 
315 		if (nr >= count)
316 			return 0;
317 
318 		usleep(100000);
319 	}
320 
321 	return -1;
322 }
323 
324 int cg_killall(const char *cgroup)
325 {
326 	char buf[PAGE_SIZE];
327 	char *ptr = buf;
328 
329 	/* If cgroup.kill exists use it. */
330 	if (!cg_write(cgroup, "cgroup.kill", "1"))
331 		return 0;
332 
333 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
334 		return -1;
335 
336 	while (ptr < buf + sizeof(buf)) {
337 		int pid = strtol(ptr, &ptr, 10);
338 
339 		if (pid == 0)
340 			break;
341 		if (*ptr)
342 			ptr++;
343 		else
344 			break;
345 		if (kill(pid, SIGKILL))
346 			return -1;
347 	}
348 
349 	return 0;
350 }
351 
352 int cg_destroy(const char *cgroup)
353 {
354 	int ret;
355 
356 	if (!cgroup)
357 		return 0;
358 retry:
359 	ret = rmdir(cgroup);
360 	if (ret && errno == EBUSY) {
361 		cg_killall(cgroup);
362 		usleep(100);
363 		goto retry;
364 	}
365 
366 	if (ret && errno == ENOENT)
367 		ret = 0;
368 
369 	return ret;
370 }
371 
372 int cg_enter(const char *cgroup, int pid)
373 {
374 	char pidbuf[64];
375 
376 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
377 	return cg_write(cgroup, "cgroup.procs", pidbuf);
378 }
379 
380 int cg_enter_current(const char *cgroup)
381 {
382 	return cg_write(cgroup, "cgroup.procs", "0");
383 }
384 
385 int cg_enter_current_thread(const char *cgroup)
386 {
387 	return cg_write(cgroup, CG_THREADS_FILE, "0");
388 }
389 
390 int cg_run(const char *cgroup,
391 	   int (*fn)(const char *cgroup, void *arg),
392 	   void *arg)
393 {
394 	int pid, retcode;
395 
396 	pid = fork();
397 	if (pid < 0) {
398 		return pid;
399 	} else if (pid == 0) {
400 		char buf[64];
401 
402 		snprintf(buf, sizeof(buf), "%d", getpid());
403 		if (cg_write(cgroup, "cgroup.procs", buf))
404 			exit(EXIT_FAILURE);
405 		exit(fn(cgroup, arg));
406 	} else {
407 		waitpid(pid, &retcode, 0);
408 		if (WIFEXITED(retcode))
409 			return WEXITSTATUS(retcode);
410 		else
411 			return -1;
412 	}
413 }
414 
415 pid_t clone_into_cgroup(int cgroup_fd)
416 {
417 #ifdef CLONE_ARGS_SIZE_VER2
418 	pid_t pid;
419 
420 	struct __clone_args args = {
421 		.flags = CLONE_INTO_CGROUP,
422 		.exit_signal = SIGCHLD,
423 		.cgroup = cgroup_fd,
424 	};
425 
426 	pid = sys_clone3(&args, sizeof(struct __clone_args));
427 	/*
428 	 * Verify that this is a genuine test failure:
429 	 * ENOSYS -> clone3() not available
430 	 * E2BIG  -> CLONE_INTO_CGROUP not available
431 	 */
432 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
433 		goto pretend_enosys;
434 
435 	return pid;
436 
437 pretend_enosys:
438 #endif
439 	errno = ENOSYS;
440 	return -ENOSYS;
441 }
442 
443 int clone_reap(pid_t pid, int options)
444 {
445 	int ret;
446 	siginfo_t info = {
447 		.si_signo = 0,
448 	};
449 
450 again:
451 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
452 	if (ret < 0) {
453 		if (errno == EINTR)
454 			goto again;
455 		return -1;
456 	}
457 
458 	if (options & WEXITED) {
459 		if (WIFEXITED(info.si_status))
460 			return WEXITSTATUS(info.si_status);
461 	}
462 
463 	if (options & WSTOPPED) {
464 		if (WIFSTOPPED(info.si_status))
465 			return WSTOPSIG(info.si_status);
466 	}
467 
468 	if (options & WCONTINUED) {
469 		if (WIFCONTINUED(info.si_status))
470 			return 0;
471 	}
472 
473 	return -1;
474 }
475 
476 int dirfd_open_opath(const char *dir)
477 {
478 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
479 }
480 
481 #define close_prot_errno(fd)                                                   \
482 	if (fd >= 0) {                                                         \
483 		int _e_ = errno;                                               \
484 		close(fd);                                                     \
485 		errno = _e_;                                                   \
486 	}
487 
488 static int clone_into_cgroup_run_nowait(const char *cgroup,
489 					int (*fn)(const char *cgroup, void *arg),
490 					void *arg)
491 {
492 	int cgroup_fd;
493 	pid_t pid;
494 
495 	cgroup_fd =  dirfd_open_opath(cgroup);
496 	if (cgroup_fd < 0)
497 		return -1;
498 
499 	pid = clone_into_cgroup(cgroup_fd);
500 	close_prot_errno(cgroup_fd);
501 	if (pid == 0)
502 		exit(fn(cgroup, arg));
503 
504 	return pid;
505 }
506 
507 int cg_run_nowait(const char *cgroup,
508 		  int (*fn)(const char *cgroup, void *arg),
509 		  void *arg)
510 {
511 	int pid;
512 
513 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
514 	if (pid > 0)
515 		return pid;
516 
517 	/* Genuine test failure. */
518 	if (pid < 0 && errno != ENOSYS)
519 		return -1;
520 
521 	pid = fork();
522 	if (pid == 0) {
523 		char buf[64];
524 
525 		snprintf(buf, sizeof(buf), "%d", getpid());
526 		if (cg_write(cgroup, "cgroup.procs", buf))
527 			exit(EXIT_FAILURE);
528 		exit(fn(cgroup, arg));
529 	}
530 
531 	return pid;
532 }
533 
534 int proc_mount_contains(const char *option)
535 {
536 	char buf[4 * PAGE_SIZE];
537 	ssize_t read;
538 
539 	read = read_text("/proc/mounts", buf, sizeof(buf));
540 	if (read < 0)
541 		return read;
542 
543 	return strstr(buf, option) != NULL;
544 }
545 
546 int cgroup_feature(const char *feature)
547 {
548 	char buf[PAGE_SIZE];
549 	ssize_t read;
550 
551 	read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
552 	if (read < 0)
553 		return read;
554 
555 	return strstr(buf, feature) != NULL;
556 }
557 
558 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
559 {
560 	char path[PATH_MAX];
561 	ssize_t ret;
562 
563 	if (!pid)
564 		snprintf(path, sizeof(path), "/proc/%s/%s",
565 			 thread ? "thread-self" : "self", item);
566 	else
567 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
568 
569 	ret = read_text(path, buf, size);
570 	return ret < 0 ? -1 : ret;
571 }
572 
573 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
574 {
575 	char buf[PAGE_SIZE];
576 
577 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
578 		return -1;
579 
580 	return strstr(buf, needle) ? 0 : -1;
581 }
582 
583 int clone_into_cgroup_run_wait(const char *cgroup)
584 {
585 	int cgroup_fd;
586 	pid_t pid;
587 
588 	cgroup_fd =  dirfd_open_opath(cgroup);
589 	if (cgroup_fd < 0)
590 		return -1;
591 
592 	pid = clone_into_cgroup(cgroup_fd);
593 	close_prot_errno(cgroup_fd);
594 	if (pid < 0)
595 		return -1;
596 
597 	if (pid == 0)
598 		exit(EXIT_SUCCESS);
599 
600 	/*
601 	 * We don't care whether this fails. We only care whether the initial
602 	 * clone succeeded.
603 	 */
604 	(void)clone_reap(pid, WEXITED);
605 	return 0;
606 }
607 
608 static int __prepare_for_wait(const char *cgroup, const char *filename)
609 {
610 	int fd, ret = -1;
611 
612 	fd = inotify_init1(0);
613 	if (fd == -1)
614 		return fd;
615 
616 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
617 	if (ret == -1) {
618 		close(fd);
619 		fd = -1;
620 	}
621 
622 	return fd;
623 }
624 
625 int cg_prepare_for_wait(const char *cgroup)
626 {
627 	return __prepare_for_wait(cgroup, "cgroup.events");
628 }
629 
630 int memcg_prepare_for_wait(const char *cgroup)
631 {
632 	return __prepare_for_wait(cgroup, "memory.events");
633 }
634 
635 int cg_wait_for(int fd)
636 {
637 	int ret = -1;
638 	struct pollfd fds = {
639 		.fd = fd,
640 		.events = POLLIN,
641 	};
642 
643 	while (true) {
644 		ret = poll(&fds, 1, 10000);
645 
646 		if (ret == -1) {
647 			if (errno == EINTR)
648 				continue;
649 
650 			break;
651 		}
652 
653 		if (ret > 0 && fds.revents & POLLIN) {
654 			ret = 0;
655 			break;
656 		}
657 	}
658 
659 	return ret;
660 }
661