xref: /linux/tools/testing/selftests/cgroup/lib/cgroup_util.c (revision a552c81ff4a16738ca5a44a177d552eb38d552ce)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18 
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21 
22 bool cg_test_v1_named;
23 
24 /* Returns read len on success, or -errno on failure. */
25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 	ssize_t len;
28 	int fd;
29 
30 	fd = open(path, O_RDONLY);
31 	if (fd < 0)
32 		return -errno;
33 
34 	len = read(fd, buf, max_len - 1);
35 
36 	if (len >= 0)
37 		buf[len] = 0;
38 
39 	close(fd);
40 	return len < 0 ? -errno : len;
41 }
42 
43 /* Returns written len on success, or -errno on failure. */
44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 	int fd;
47 
48 	fd = open(path, O_WRONLY | O_APPEND);
49 	if (fd < 0)
50 		return -errno;
51 
52 	len = write(fd, buf, len);
53 	close(fd);
54 	return len < 0 ? -errno : len;
55 }
56 
57 char *cg_name(const char *root, const char *name)
58 {
59 	size_t len = strlen(root) + strlen(name) + 2;
60 	char *ret = malloc(len);
61 
62 	if (ret)
63 		snprintf(ret, len, "%s/%s", root, name);
64 
65 	return ret;
66 }
67 
68 char *cg_name_indexed(const char *root, const char *name, int index)
69 {
70 	size_t len = strlen(root) + strlen(name) + 10;
71 	char *ret = malloc(len);
72 
73 	if (ret)
74 		snprintf(ret, len, "%s/%s_%d", root, name, index);
75 
76 	return ret;
77 }
78 
79 char *cg_control(const char *cgroup, const char *control)
80 {
81 	size_t len = strlen(cgroup) + strlen(control) + 2;
82 	char *ret = malloc(len);
83 
84 	if (ret)
85 		snprintf(ret, len, "%s/%s", cgroup, control);
86 
87 	return ret;
88 }
89 
90 /* Returns 0 on success, or -errno on failure. */
91 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
92 {
93 	char path[PATH_MAX];
94 	ssize_t ret;
95 
96 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
97 
98 	ret = read_text(path, buf, len);
99 	return ret >= 0 ? 0 : ret;
100 }
101 
102 int cg_read_strcmp(const char *cgroup, const char *control,
103 		   const char *expected)
104 {
105 	size_t size;
106 	char *buf;
107 	int ret;
108 
109 	/* Handle the case of comparing against empty string */
110 	if (!expected)
111 		return -1;
112 
113 	/* needs size > 1, otherwise cg_read() reads 0 bytes */
114 	size = (expected[0] == '\0') ? 2 : strlen(expected) + 1;
115 
116 	buf = malloc(size);
117 	if (!buf)
118 		return -1;
119 
120 	if (cg_read(cgroup, control, buf, size)) {
121 		free(buf);
122 		return -1;
123 	}
124 
125 	ret = strcmp(expected, buf);
126 	free(buf);
127 	return ret;
128 }
129 
130 int cg_read_strcmp_wait(const char *cgroup, const char *control,
131 			    const char *expected)
132 {
133 	int i, ret;
134 
135 	for (i = 0; i < 100; i++) {
136 		ret = cg_read_strcmp(cgroup, control, expected);
137 		if (!ret)
138 			return ret;
139 		usleep(10000);
140 	}
141 
142 	return ret;
143 }
144 
145 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
146 {
147 	char buf[BUF_SIZE];
148 
149 	if (cg_read(cgroup, control, buf, sizeof(buf)))
150 		return -1;
151 
152 	return strstr(buf, needle) ? 0 : -1;
153 }
154 
155 long cg_read_long(const char *cgroup, const char *control)
156 {
157 	char buf[128];
158 
159 	if (cg_read(cgroup, control, buf, sizeof(buf)))
160 		return -1;
161 
162 	return atol(buf);
163 }
164 
165 long cg_read_long_fd(int fd)
166 {
167 	char buf[128];
168 
169 	if (pread(fd, buf, sizeof(buf), 0) <= 0)
170 		return -1;
171 
172 	return atol(buf);
173 }
174 
175 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
176 {
177 	char buf[BUF_SIZE];
178 	char *ptr;
179 
180 	if (cg_read(cgroup, control, buf, sizeof(buf)))
181 		return -1;
182 
183 	ptr = strstr(buf, key);
184 	if (!ptr)
185 		return -1;
186 
187 	return atol(ptr + strlen(key));
188 }
189 
190 long cg_read_key_long_poll(const char *cgroup, const char *control,
191 			   const char *key, long expected, int retries,
192 			   useconds_t wait_interval_us)
193 {
194 	long val = -1;
195 	int i;
196 
197 	for (i = 0; i < retries; i++) {
198 		val = cg_read_key_long(cgroup, control, key);
199 		if (val < 0)
200 			return val;
201 
202 		if (val == expected)
203 			break;
204 
205 		usleep(wait_interval_us);
206 	}
207 
208 	return val;
209 }
210 
211 long cg_read_lc(const char *cgroup, const char *control)
212 {
213 	char buf[BUF_SIZE];
214 	const char delim[] = "\n";
215 	char *line;
216 	long cnt = 0;
217 
218 	if (cg_read(cgroup, control, buf, sizeof(buf)))
219 		return -1;
220 
221 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
222 		cnt++;
223 
224 	return cnt;
225 }
226 
227 /* Returns 0 on success, or -errno on failure. */
228 int cg_write(const char *cgroup, const char *control, char *buf)
229 {
230 	char path[PATH_MAX];
231 	ssize_t len = strlen(buf), ret;
232 
233 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
234 	ret = write_text(path, buf, len);
235 	return ret == len ? 0 : ret;
236 }
237 
238 /*
239  * Returns fd on success, or -1 on failure.
240  * (fd should be closed with close() as usual)
241  */
242 int cg_open(const char *cgroup, const char *control, int flags)
243 {
244 	char path[PATH_MAX];
245 
246 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
247 	return open(path, flags);
248 }
249 
250 int cg_write_numeric(const char *cgroup, const char *control, long value)
251 {
252 	char buf[64];
253 	int ret;
254 
255 	ret = sprintf(buf, "%lu", value);
256 	if (ret < 0)
257 		return ret;
258 
259 	return cg_write(cgroup, control, buf);
260 }
261 
262 static int cg_find_root(char *root, size_t len, const char *controller,
263 			bool *nsdelegate)
264 {
265 	char buf[10 * BUF_SIZE];
266 	char *fs, *mount, *type, *options;
267 	const char delim[] = "\n\t ";
268 
269 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
270 		return -1;
271 
272 	/*
273 	 * Example:
274 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
275 	 */
276 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
277 		mount = strtok(NULL, delim);
278 		type = strtok(NULL, delim);
279 		options = strtok(NULL, delim);
280 		strtok(NULL, delim);
281 		strtok(NULL, delim);
282 		if (strcmp(type, "cgroup") == 0) {
283 			if (!controller || !strstr(options, controller))
284 				continue;
285 		} else if (strcmp(type, "cgroup2") == 0) {
286 			if (controller &&
287 					cg_read_strstr(mount, "cgroup.controllers", controller))
288 				continue;
289 		} else {
290 			continue;
291 		}
292 		strncpy(root, mount, len);
293 
294 		if (nsdelegate)
295 			*nsdelegate = !!strstr(options, "nsdelegate");
296 		return 0;
297 
298 	}
299 
300 	return -1;
301 }
302 
303 int cg_find_controller_root(char *root, size_t len, const char *controller)
304 {
305 	return cg_find_root(root, len, controller, NULL);
306 }
307 
308 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
309 {
310 	return cg_find_root(root, len, NULL, nsdelegate);
311 }
312 
313 int cg_create(const char *cgroup)
314 {
315 	return mkdir(cgroup, 0755);
316 }
317 
318 int cg_wait_for_proc_count(const char *cgroup, int count)
319 {
320 	char buf[10 * BUF_SIZE] = {0};
321 	int attempts;
322 	char *ptr;
323 
324 	for (attempts = 10; attempts >= 0; attempts--) {
325 		int nr = 0;
326 
327 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
328 			break;
329 
330 		for (ptr = buf; *ptr; ptr++)
331 			if (*ptr == '\n')
332 				nr++;
333 
334 		if (nr >= count)
335 			return 0;
336 
337 		usleep(100000);
338 	}
339 
340 	return -1;
341 }
342 
343 int cg_killall(const char *cgroup)
344 {
345 	char buf[BUF_SIZE];
346 	char *ptr = buf;
347 
348 	/* If cgroup.kill exists use it. */
349 	if (!cg_write(cgroup, "cgroup.kill", "1"))
350 		return 0;
351 
352 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
353 		return -1;
354 
355 	while (ptr < buf + sizeof(buf)) {
356 		int pid = strtol(ptr, &ptr, 10);
357 
358 		if (pid == 0)
359 			break;
360 		if (*ptr)
361 			ptr++;
362 		else
363 			break;
364 		if (kill(pid, SIGKILL))
365 			return -1;
366 	}
367 
368 	return 0;
369 }
370 
371 int cg_destroy(const char *cgroup)
372 {
373 	int ret;
374 
375 	if (!cgroup)
376 		return 0;
377 retry:
378 	ret = rmdir(cgroup);
379 	if (ret && errno == EBUSY) {
380 		cg_killall(cgroup);
381 		usleep(100);
382 		goto retry;
383 	}
384 
385 	if (ret && errno == ENOENT)
386 		ret = 0;
387 
388 	return ret;
389 }
390 
391 int cg_enter(const char *cgroup, int pid)
392 {
393 	char pidbuf[64];
394 
395 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
396 	return cg_write(cgroup, "cgroup.procs", pidbuf);
397 }
398 
399 int cg_enter_current(const char *cgroup)
400 {
401 	return cg_write(cgroup, "cgroup.procs", "0");
402 }
403 
404 int cg_enter_current_thread(const char *cgroup)
405 {
406 	return cg_write(cgroup, CG_THREADS_FILE, "0");
407 }
408 
409 int cg_run(const char *cgroup,
410 	   int (*fn)(const char *cgroup, void *arg),
411 	   void *arg)
412 {
413 	int pid, retcode;
414 
415 	pid = fork();
416 	if (pid < 0) {
417 		return pid;
418 	} else if (pid == 0) {
419 		char buf[64];
420 
421 		snprintf(buf, sizeof(buf), "%d", getpid());
422 		if (cg_write(cgroup, "cgroup.procs", buf))
423 			exit(EXIT_FAILURE);
424 		exit(fn(cgroup, arg));
425 	} else {
426 		waitpid(pid, &retcode, 0);
427 		if (WIFEXITED(retcode))
428 			return WEXITSTATUS(retcode);
429 		else
430 			return -1;
431 	}
432 }
433 
434 pid_t clone_into_cgroup(int cgroup_fd)
435 {
436 #ifdef CLONE_ARGS_SIZE_VER2
437 	pid_t pid;
438 
439 	struct __clone_args args = {
440 		.flags = CLONE_INTO_CGROUP,
441 		.exit_signal = SIGCHLD,
442 		.cgroup = cgroup_fd,
443 	};
444 
445 	pid = sys_clone3(&args, sizeof(struct __clone_args));
446 	/*
447 	 * Verify that this is a genuine test failure:
448 	 * ENOSYS -> clone3() not available
449 	 * E2BIG  -> CLONE_INTO_CGROUP not available
450 	 */
451 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
452 		goto pretend_enosys;
453 
454 	return pid;
455 
456 pretend_enosys:
457 #endif
458 	errno = ENOSYS;
459 	return -ENOSYS;
460 }
461 
462 int clone_reap(pid_t pid, int options)
463 {
464 	int ret;
465 	siginfo_t info = {
466 		.si_signo = 0,
467 	};
468 
469 again:
470 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
471 	if (ret < 0) {
472 		if (errno == EINTR)
473 			goto again;
474 		return -1;
475 	}
476 
477 	if (options & WEXITED) {
478 		if (WIFEXITED(info.si_status))
479 			return WEXITSTATUS(info.si_status);
480 	}
481 
482 	if (options & WSTOPPED) {
483 		if (WIFSTOPPED(info.si_status))
484 			return WSTOPSIG(info.si_status);
485 	}
486 
487 	if (options & WCONTINUED) {
488 		if (WIFCONTINUED(info.si_status))
489 			return 0;
490 	}
491 
492 	return -1;
493 }
494 
495 int dirfd_open_opath(const char *dir)
496 {
497 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
498 }
499 
500 #define close_prot_errno(fd)                                                   \
501 	if (fd >= 0) {                                                         \
502 		int _e_ = errno;                                               \
503 		close(fd);                                                     \
504 		errno = _e_;                                                   \
505 	}
506 
507 static int clone_into_cgroup_run_nowait(const char *cgroup,
508 					int (*fn)(const char *cgroup, void *arg),
509 					void *arg)
510 {
511 	int cgroup_fd;
512 	pid_t pid;
513 
514 	cgroup_fd =  dirfd_open_opath(cgroup);
515 	if (cgroup_fd < 0)
516 		return -1;
517 
518 	pid = clone_into_cgroup(cgroup_fd);
519 	close_prot_errno(cgroup_fd);
520 	if (pid == 0)
521 		exit(fn(cgroup, arg));
522 
523 	return pid;
524 }
525 
526 int cg_run_nowait(const char *cgroup,
527 		  int (*fn)(const char *cgroup, void *arg),
528 		  void *arg)
529 {
530 	int pid;
531 
532 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
533 	if (pid > 0)
534 		return pid;
535 
536 	/* Genuine test failure. */
537 	if (pid < 0 && errno != ENOSYS)
538 		return -1;
539 
540 	pid = fork();
541 	if (pid == 0) {
542 		char buf[64];
543 
544 		snprintf(buf, sizeof(buf), "%d", getpid());
545 		if (cg_write(cgroup, "cgroup.procs", buf))
546 			exit(EXIT_FAILURE);
547 		exit(fn(cgroup, arg));
548 	}
549 
550 	return pid;
551 }
552 
553 int proc_mount_contains(const char *option)
554 {
555 	char buf[4 * BUF_SIZE];
556 	ssize_t read;
557 
558 	read = read_text("/proc/mounts", buf, sizeof(buf));
559 	if (read < 0)
560 		return read;
561 
562 	return strstr(buf, option) != NULL;
563 }
564 
565 int cgroup_feature(const char *feature)
566 {
567 	char buf[BUF_SIZE];
568 	ssize_t read;
569 
570 	read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
571 	if (read < 0)
572 		return read;
573 
574 	return strstr(buf, feature) != NULL;
575 }
576 
577 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
578 {
579 	char path[PATH_MAX];
580 	ssize_t ret;
581 
582 	if (!pid)
583 		snprintf(path, sizeof(path), "/proc/%s/%s",
584 			 thread ? "thread-self" : "self", item);
585 	else
586 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
587 
588 	ret = read_text(path, buf, size);
589 	return ret < 0 ? -1 : ret;
590 }
591 
592 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
593 {
594 	char buf[BUF_SIZE];
595 
596 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
597 		return -1;
598 
599 	return strstr(buf, needle) ? 0 : -1;
600 }
601 
602 int clone_into_cgroup_run_wait(const char *cgroup)
603 {
604 	int cgroup_fd;
605 	pid_t pid;
606 
607 	cgroup_fd =  dirfd_open_opath(cgroup);
608 	if (cgroup_fd < 0)
609 		return -1;
610 
611 	pid = clone_into_cgroup(cgroup_fd);
612 	close_prot_errno(cgroup_fd);
613 	if (pid < 0)
614 		return -1;
615 
616 	if (pid == 0)
617 		exit(EXIT_SUCCESS);
618 
619 	/*
620 	 * We don't care whether this fails. We only care whether the initial
621 	 * clone succeeded.
622 	 */
623 	(void)clone_reap(pid, WEXITED);
624 	return 0;
625 }
626 
627 static int __prepare_for_wait(const char *cgroup, const char *filename)
628 {
629 	int fd, ret = -1;
630 
631 	fd = inotify_init1(0);
632 	if (fd == -1)
633 		return fd;
634 
635 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
636 	if (ret == -1) {
637 		close(fd);
638 		fd = -1;
639 	}
640 
641 	return fd;
642 }
643 
644 int cg_prepare_for_wait(const char *cgroup)
645 {
646 	return __prepare_for_wait(cgroup, "cgroup.events");
647 }
648 
649 int memcg_prepare_for_wait(const char *cgroup)
650 {
651 	return __prepare_for_wait(cgroup, "memory.events");
652 }
653 
654 int cg_wait_for(int fd)
655 {
656 	int ret = -1;
657 	struct pollfd fds = {
658 		.fd = fd,
659 		.events = POLLIN,
660 	};
661 
662 	while (true) {
663 		ret = poll(&fds, 1, 10000);
664 
665 		if (ret == -1) {
666 			if (errno == EINTR)
667 				continue;
668 
669 			break;
670 		}
671 
672 		if (ret > 0 && fds.revents & POLLIN) {
673 			ret = 0;
674 			break;
675 		}
676 	}
677 
678 	return ret;
679 }
680