xref: /linux/tools/testing/selftests/cgroup/lib/cgroup_util.c (revision e32e6f02168f2ad7991eb5d160d312d2001520c8)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18 
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21 
22 bool cg_test_v1_named;
23 
24 /* Returns read len on success, or -errno on failure. */
25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 	ssize_t len;
28 	int fd;
29 
30 	fd = open(path, O_RDONLY);
31 	if (fd < 0)
32 		return -errno;
33 
34 	len = read(fd, buf, max_len - 1);
35 
36 	if (len >= 0)
37 		buf[len] = 0;
38 
39 	close(fd);
40 	return len < 0 ? -errno : len;
41 }
42 
43 /* Returns written len on success, or -errno on failure. */
44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 	int fd;
47 
48 	fd = open(path, O_WRONLY | O_APPEND);
49 	if (fd < 0)
50 		return -errno;
51 
52 	len = write(fd, buf, len);
53 	close(fd);
54 	return len < 0 ? -errno : len;
55 }
56 
57 char *cg_name(const char *root, const char *name)
58 {
59 	size_t len = strlen(root) + strlen(name) + 2;
60 	char *ret = malloc(len);
61 
62 	snprintf(ret, len, "%s/%s", root, name);
63 
64 	return ret;
65 }
66 
67 char *cg_name_indexed(const char *root, const char *name, int index)
68 {
69 	size_t len = strlen(root) + strlen(name) + 10;
70 	char *ret = malloc(len);
71 
72 	snprintf(ret, len, "%s/%s_%d", root, name, index);
73 
74 	return ret;
75 }
76 
77 char *cg_control(const char *cgroup, const char *control)
78 {
79 	size_t len = strlen(cgroup) + strlen(control) + 2;
80 	char *ret = malloc(len);
81 
82 	snprintf(ret, len, "%s/%s", cgroup, control);
83 
84 	return ret;
85 }
86 
87 /* Returns 0 on success, or -errno on failure. */
88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 	char path[PATH_MAX];
91 	ssize_t ret;
92 
93 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
94 
95 	ret = read_text(path, buf, len);
96 	return ret >= 0 ? 0 : ret;
97 }
98 
99 int cg_read_strcmp(const char *cgroup, const char *control,
100 		   const char *expected)
101 {
102 	size_t size;
103 	char *buf;
104 	int ret;
105 
106 	/* Handle the case of comparing against empty string */
107 	if (!expected)
108 		return -1;
109 
110 	/* needs size > 1, otherwise cg_read() reads 0 bytes */
111 	size = (expected[0] == '\0') ? 2 : strlen(expected) + 1;
112 
113 	buf = malloc(size);
114 	if (!buf)
115 		return -1;
116 
117 	if (cg_read(cgroup, control, buf, size)) {
118 		free(buf);
119 		return -1;
120 	}
121 
122 	ret = strcmp(expected, buf);
123 	free(buf);
124 	return ret;
125 }
126 
127 int cg_read_strcmp_wait(const char *cgroup, const char *control,
128 			    const char *expected)
129 {
130 	int i, ret;
131 
132 	for (i = 0; i < 100; i++) {
133 		ret = cg_read_strcmp(cgroup, control, expected);
134 		if (!ret)
135 			return ret;
136 		usleep(10000);
137 	}
138 
139 	return ret;
140 }
141 
142 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
143 {
144 	char buf[PAGE_SIZE];
145 
146 	if (cg_read(cgroup, control, buf, sizeof(buf)))
147 		return -1;
148 
149 	return strstr(buf, needle) ? 0 : -1;
150 }
151 
152 long cg_read_long(const char *cgroup, const char *control)
153 {
154 	char buf[128];
155 
156 	if (cg_read(cgroup, control, buf, sizeof(buf)))
157 		return -1;
158 
159 	return atol(buf);
160 }
161 
162 long cg_read_long_fd(int fd)
163 {
164 	char buf[128];
165 
166 	if (pread(fd, buf, sizeof(buf), 0) <= 0)
167 		return -1;
168 
169 	return atol(buf);
170 }
171 
172 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
173 {
174 	char buf[PAGE_SIZE];
175 	char *ptr;
176 
177 	if (cg_read(cgroup, control, buf, sizeof(buf)))
178 		return -1;
179 
180 	ptr = strstr(buf, key);
181 	if (!ptr)
182 		return -1;
183 
184 	return atol(ptr + strlen(key));
185 }
186 
187 long cg_read_key_long_poll(const char *cgroup, const char *control,
188 			   const char *key, long expected, int retries,
189 			   useconds_t wait_interval_us)
190 {
191 	long val = -1;
192 	int i;
193 
194 	for (i = 0; i < retries; i++) {
195 		val = cg_read_key_long(cgroup, control, key);
196 		if (val < 0)
197 			return val;
198 
199 		if (val == expected)
200 			break;
201 
202 		usleep(wait_interval_us);
203 	}
204 
205 	return val;
206 }
207 
208 long cg_read_lc(const char *cgroup, const char *control)
209 {
210 	char buf[PAGE_SIZE];
211 	const char delim[] = "\n";
212 	char *line;
213 	long cnt = 0;
214 
215 	if (cg_read(cgroup, control, buf, sizeof(buf)))
216 		return -1;
217 
218 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
219 		cnt++;
220 
221 	return cnt;
222 }
223 
224 /* Returns 0 on success, or -errno on failure. */
225 int cg_write(const char *cgroup, const char *control, char *buf)
226 {
227 	char path[PATH_MAX];
228 	ssize_t len = strlen(buf), ret;
229 
230 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
231 	ret = write_text(path, buf, len);
232 	return ret == len ? 0 : ret;
233 }
234 
235 /*
236  * Returns fd on success, or -1 on failure.
237  * (fd should be closed with close() as usual)
238  */
239 int cg_open(const char *cgroup, const char *control, int flags)
240 {
241 	char path[PATH_MAX];
242 
243 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
244 	return open(path, flags);
245 }
246 
247 int cg_write_numeric(const char *cgroup, const char *control, long value)
248 {
249 	char buf[64];
250 	int ret;
251 
252 	ret = sprintf(buf, "%lu", value);
253 	if (ret < 0)
254 		return ret;
255 
256 	return cg_write(cgroup, control, buf);
257 }
258 
259 static int cg_find_root(char *root, size_t len, const char *controller,
260 			bool *nsdelegate)
261 {
262 	char buf[10 * PAGE_SIZE];
263 	char *fs, *mount, *type, *options;
264 	const char delim[] = "\n\t ";
265 
266 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
267 		return -1;
268 
269 	/*
270 	 * Example:
271 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
272 	 */
273 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
274 		mount = strtok(NULL, delim);
275 		type = strtok(NULL, delim);
276 		options = strtok(NULL, delim);
277 		strtok(NULL, delim);
278 		strtok(NULL, delim);
279 		if (strcmp(type, "cgroup") == 0) {
280 			if (!controller || !strstr(options, controller))
281 				continue;
282 		} else if (strcmp(type, "cgroup2") == 0) {
283 			if (controller &&
284 					cg_read_strstr(mount, "cgroup.controllers", controller))
285 				continue;
286 		} else {
287 			continue;
288 		}
289 		strncpy(root, mount, len);
290 
291 		if (nsdelegate)
292 			*nsdelegate = !!strstr(options, "nsdelegate");
293 		return 0;
294 
295 	}
296 
297 	return -1;
298 }
299 
300 int cg_find_controller_root(char *root, size_t len, const char *controller)
301 {
302 	return cg_find_root(root, len, controller, NULL);
303 }
304 
305 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
306 {
307 	return cg_find_root(root, len, NULL, nsdelegate);
308 }
309 
310 int cg_create(const char *cgroup)
311 {
312 	return mkdir(cgroup, 0755);
313 }
314 
315 int cg_wait_for_proc_count(const char *cgroup, int count)
316 {
317 	char buf[10 * PAGE_SIZE] = {0};
318 	int attempts;
319 	char *ptr;
320 
321 	for (attempts = 10; attempts >= 0; attempts--) {
322 		int nr = 0;
323 
324 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
325 			break;
326 
327 		for (ptr = buf; *ptr; ptr++)
328 			if (*ptr == '\n')
329 				nr++;
330 
331 		if (nr >= count)
332 			return 0;
333 
334 		usleep(100000);
335 	}
336 
337 	return -1;
338 }
339 
340 int cg_killall(const char *cgroup)
341 {
342 	char buf[PAGE_SIZE];
343 	char *ptr = buf;
344 
345 	/* If cgroup.kill exists use it. */
346 	if (!cg_write(cgroup, "cgroup.kill", "1"))
347 		return 0;
348 
349 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
350 		return -1;
351 
352 	while (ptr < buf + sizeof(buf)) {
353 		int pid = strtol(ptr, &ptr, 10);
354 
355 		if (pid == 0)
356 			break;
357 		if (*ptr)
358 			ptr++;
359 		else
360 			break;
361 		if (kill(pid, SIGKILL))
362 			return -1;
363 	}
364 
365 	return 0;
366 }
367 
368 int cg_destroy(const char *cgroup)
369 {
370 	int ret;
371 
372 	if (!cgroup)
373 		return 0;
374 retry:
375 	ret = rmdir(cgroup);
376 	if (ret && errno == EBUSY) {
377 		cg_killall(cgroup);
378 		usleep(100);
379 		goto retry;
380 	}
381 
382 	if (ret && errno == ENOENT)
383 		ret = 0;
384 
385 	return ret;
386 }
387 
388 int cg_enter(const char *cgroup, int pid)
389 {
390 	char pidbuf[64];
391 
392 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
393 	return cg_write(cgroup, "cgroup.procs", pidbuf);
394 }
395 
396 int cg_enter_current(const char *cgroup)
397 {
398 	return cg_write(cgroup, "cgroup.procs", "0");
399 }
400 
401 int cg_enter_current_thread(const char *cgroup)
402 {
403 	return cg_write(cgroup, CG_THREADS_FILE, "0");
404 }
405 
406 int cg_run(const char *cgroup,
407 	   int (*fn)(const char *cgroup, void *arg),
408 	   void *arg)
409 {
410 	int pid, retcode;
411 
412 	pid = fork();
413 	if (pid < 0) {
414 		return pid;
415 	} else if (pid == 0) {
416 		char buf[64];
417 
418 		snprintf(buf, sizeof(buf), "%d", getpid());
419 		if (cg_write(cgroup, "cgroup.procs", buf))
420 			exit(EXIT_FAILURE);
421 		exit(fn(cgroup, arg));
422 	} else {
423 		waitpid(pid, &retcode, 0);
424 		if (WIFEXITED(retcode))
425 			return WEXITSTATUS(retcode);
426 		else
427 			return -1;
428 	}
429 }
430 
431 pid_t clone_into_cgroup(int cgroup_fd)
432 {
433 #ifdef CLONE_ARGS_SIZE_VER2
434 	pid_t pid;
435 
436 	struct __clone_args args = {
437 		.flags = CLONE_INTO_CGROUP,
438 		.exit_signal = SIGCHLD,
439 		.cgroup = cgroup_fd,
440 	};
441 
442 	pid = sys_clone3(&args, sizeof(struct __clone_args));
443 	/*
444 	 * Verify that this is a genuine test failure:
445 	 * ENOSYS -> clone3() not available
446 	 * E2BIG  -> CLONE_INTO_CGROUP not available
447 	 */
448 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
449 		goto pretend_enosys;
450 
451 	return pid;
452 
453 pretend_enosys:
454 #endif
455 	errno = ENOSYS;
456 	return -ENOSYS;
457 }
458 
459 int clone_reap(pid_t pid, int options)
460 {
461 	int ret;
462 	siginfo_t info = {
463 		.si_signo = 0,
464 	};
465 
466 again:
467 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
468 	if (ret < 0) {
469 		if (errno == EINTR)
470 			goto again;
471 		return -1;
472 	}
473 
474 	if (options & WEXITED) {
475 		if (WIFEXITED(info.si_status))
476 			return WEXITSTATUS(info.si_status);
477 	}
478 
479 	if (options & WSTOPPED) {
480 		if (WIFSTOPPED(info.si_status))
481 			return WSTOPSIG(info.si_status);
482 	}
483 
484 	if (options & WCONTINUED) {
485 		if (WIFCONTINUED(info.si_status))
486 			return 0;
487 	}
488 
489 	return -1;
490 }
491 
492 int dirfd_open_opath(const char *dir)
493 {
494 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
495 }
496 
497 #define close_prot_errno(fd)                                                   \
498 	if (fd >= 0) {                                                         \
499 		int _e_ = errno;                                               \
500 		close(fd);                                                     \
501 		errno = _e_;                                                   \
502 	}
503 
504 static int clone_into_cgroup_run_nowait(const char *cgroup,
505 					int (*fn)(const char *cgroup, void *arg),
506 					void *arg)
507 {
508 	int cgroup_fd;
509 	pid_t pid;
510 
511 	cgroup_fd =  dirfd_open_opath(cgroup);
512 	if (cgroup_fd < 0)
513 		return -1;
514 
515 	pid = clone_into_cgroup(cgroup_fd);
516 	close_prot_errno(cgroup_fd);
517 	if (pid == 0)
518 		exit(fn(cgroup, arg));
519 
520 	return pid;
521 }
522 
523 int cg_run_nowait(const char *cgroup,
524 		  int (*fn)(const char *cgroup, void *arg),
525 		  void *arg)
526 {
527 	int pid;
528 
529 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
530 	if (pid > 0)
531 		return pid;
532 
533 	/* Genuine test failure. */
534 	if (pid < 0 && errno != ENOSYS)
535 		return -1;
536 
537 	pid = fork();
538 	if (pid == 0) {
539 		char buf[64];
540 
541 		snprintf(buf, sizeof(buf), "%d", getpid());
542 		if (cg_write(cgroup, "cgroup.procs", buf))
543 			exit(EXIT_FAILURE);
544 		exit(fn(cgroup, arg));
545 	}
546 
547 	return pid;
548 }
549 
550 int proc_mount_contains(const char *option)
551 {
552 	char buf[4 * PAGE_SIZE];
553 	ssize_t read;
554 
555 	read = read_text("/proc/mounts", buf, sizeof(buf));
556 	if (read < 0)
557 		return read;
558 
559 	return strstr(buf, option) != NULL;
560 }
561 
562 int cgroup_feature(const char *feature)
563 {
564 	char buf[PAGE_SIZE];
565 	ssize_t read;
566 
567 	read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
568 	if (read < 0)
569 		return read;
570 
571 	return strstr(buf, feature) != NULL;
572 }
573 
574 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
575 {
576 	char path[PATH_MAX];
577 	ssize_t ret;
578 
579 	if (!pid)
580 		snprintf(path, sizeof(path), "/proc/%s/%s",
581 			 thread ? "thread-self" : "self", item);
582 	else
583 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
584 
585 	ret = read_text(path, buf, size);
586 	return ret < 0 ? -1 : ret;
587 }
588 
589 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
590 {
591 	char buf[PAGE_SIZE];
592 
593 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
594 		return -1;
595 
596 	return strstr(buf, needle) ? 0 : -1;
597 }
598 
599 int clone_into_cgroup_run_wait(const char *cgroup)
600 {
601 	int cgroup_fd;
602 	pid_t pid;
603 
604 	cgroup_fd =  dirfd_open_opath(cgroup);
605 	if (cgroup_fd < 0)
606 		return -1;
607 
608 	pid = clone_into_cgroup(cgroup_fd);
609 	close_prot_errno(cgroup_fd);
610 	if (pid < 0)
611 		return -1;
612 
613 	if (pid == 0)
614 		exit(EXIT_SUCCESS);
615 
616 	/*
617 	 * We don't care whether this fails. We only care whether the initial
618 	 * clone succeeded.
619 	 */
620 	(void)clone_reap(pid, WEXITED);
621 	return 0;
622 }
623 
624 static int __prepare_for_wait(const char *cgroup, const char *filename)
625 {
626 	int fd, ret = -1;
627 
628 	fd = inotify_init1(0);
629 	if (fd == -1)
630 		return fd;
631 
632 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
633 	if (ret == -1) {
634 		close(fd);
635 		fd = -1;
636 	}
637 
638 	return fd;
639 }
640 
641 int cg_prepare_for_wait(const char *cgroup)
642 {
643 	return __prepare_for_wait(cgroup, "cgroup.events");
644 }
645 
646 int memcg_prepare_for_wait(const char *cgroup)
647 {
648 	return __prepare_for_wait(cgroup, "memory.events");
649 }
650 
651 int cg_wait_for(int fd)
652 {
653 	int ret = -1;
654 	struct pollfd fds = {
655 		.fd = fd,
656 		.events = POLLIN,
657 	};
658 
659 	while (true) {
660 		ret = poll(&fds, 1, 10000);
661 
662 		if (ret == -1) {
663 			if (errno == EINTR)
664 				continue;
665 
666 			break;
667 		}
668 
669 		if (ret > 0 && fds.revents & POLLIN) {
670 			ret = 0;
671 			break;
672 		}
673 	}
674 
675 	return ret;
676 }
677