xref: /linux/tools/testing/selftests/cgroup/lib/cgroup_util.c (revision 300a0cfe9f375b2843bcb331bcfa7503475ef5dd)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18 
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21 
22 bool cg_test_v1_named;
23 
24 /* Returns read len on success, or -errno on failure. */
25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 	ssize_t len;
28 	int fd;
29 
30 	fd = open(path, O_RDONLY);
31 	if (fd < 0)
32 		return -errno;
33 
34 	len = read(fd, buf, max_len - 1);
35 
36 	if (len >= 0)
37 		buf[len] = 0;
38 
39 	close(fd);
40 	return len < 0 ? -errno : len;
41 }
42 
43 /* Returns written len on success, or -errno on failure. */
44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 	int fd;
47 
48 	fd = open(path, O_WRONLY | O_APPEND);
49 	if (fd < 0)
50 		return -errno;
51 
52 	len = write(fd, buf, len);
53 	close(fd);
54 	return len < 0 ? -errno : len;
55 }
56 
57 char *cg_name(const char *root, const char *name)
58 {
59 	size_t len = strlen(root) + strlen(name) + 2;
60 	char *ret = malloc(len);
61 
62 	snprintf(ret, len, "%s/%s", root, name);
63 
64 	return ret;
65 }
66 
67 char *cg_name_indexed(const char *root, const char *name, int index)
68 {
69 	size_t len = strlen(root) + strlen(name) + 10;
70 	char *ret = malloc(len);
71 
72 	snprintf(ret, len, "%s/%s_%d", root, name, index);
73 
74 	return ret;
75 }
76 
77 char *cg_control(const char *cgroup, const char *control)
78 {
79 	size_t len = strlen(cgroup) + strlen(control) + 2;
80 	char *ret = malloc(len);
81 
82 	snprintf(ret, len, "%s/%s", cgroup, control);
83 
84 	return ret;
85 }
86 
87 /* Returns 0 on success, or -errno on failure. */
88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 	char path[PATH_MAX];
91 	ssize_t ret;
92 
93 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
94 
95 	ret = read_text(path, buf, len);
96 	return ret >= 0 ? 0 : ret;
97 }
98 
99 int cg_read_strcmp(const char *cgroup, const char *control,
100 		   const char *expected)
101 {
102 	size_t size;
103 	char *buf;
104 	int ret;
105 
106 	/* Handle the case of comparing against empty string */
107 	if (!expected)
108 		return -1;
109 	else
110 		size = strlen(expected) + 1;
111 
112 	buf = malloc(size);
113 	if (!buf)
114 		return -1;
115 
116 	if (cg_read(cgroup, control, buf, size)) {
117 		free(buf);
118 		return -1;
119 	}
120 
121 	ret = strcmp(expected, buf);
122 	free(buf);
123 	return ret;
124 }
125 
126 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
127 {
128 	char buf[PAGE_SIZE];
129 
130 	if (cg_read(cgroup, control, buf, sizeof(buf)))
131 		return -1;
132 
133 	return strstr(buf, needle) ? 0 : -1;
134 }
135 
136 long cg_read_long(const char *cgroup, const char *control)
137 {
138 	char buf[128];
139 
140 	if (cg_read(cgroup, control, buf, sizeof(buf)))
141 		return -1;
142 
143 	return atol(buf);
144 }
145 
146 long cg_read_long_fd(int fd)
147 {
148 	char buf[128];
149 
150 	if (pread(fd, buf, sizeof(buf), 0) <= 0)
151 		return -1;
152 
153 	return atol(buf);
154 }
155 
156 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
157 {
158 	char buf[PAGE_SIZE];
159 	char *ptr;
160 
161 	if (cg_read(cgroup, control, buf, sizeof(buf)))
162 		return -1;
163 
164 	ptr = strstr(buf, key);
165 	if (!ptr)
166 		return -1;
167 
168 	return atol(ptr + strlen(key));
169 }
170 
171 long cg_read_lc(const char *cgroup, const char *control)
172 {
173 	char buf[PAGE_SIZE];
174 	const char delim[] = "\n";
175 	char *line;
176 	long cnt = 0;
177 
178 	if (cg_read(cgroup, control, buf, sizeof(buf)))
179 		return -1;
180 
181 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
182 		cnt++;
183 
184 	return cnt;
185 }
186 
187 /* Returns 0 on success, or -errno on failure. */
188 int cg_write(const char *cgroup, const char *control, char *buf)
189 {
190 	char path[PATH_MAX];
191 	ssize_t len = strlen(buf), ret;
192 
193 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
194 	ret = write_text(path, buf, len);
195 	return ret == len ? 0 : ret;
196 }
197 
198 /*
199  * Returns fd on success, or -1 on failure.
200  * (fd should be closed with close() as usual)
201  */
202 int cg_open(const char *cgroup, const char *control, int flags)
203 {
204 	char path[PATH_MAX];
205 
206 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
207 	return open(path, flags);
208 }
209 
210 int cg_write_numeric(const char *cgroup, const char *control, long value)
211 {
212 	char buf[64];
213 	int ret;
214 
215 	ret = sprintf(buf, "%lu", value);
216 	if (ret < 0)
217 		return ret;
218 
219 	return cg_write(cgroup, control, buf);
220 }
221 
222 static int cg_find_root(char *root, size_t len, const char *controller,
223 			bool *nsdelegate)
224 {
225 	char buf[10 * PAGE_SIZE];
226 	char *fs, *mount, *type, *options;
227 	const char delim[] = "\n\t ";
228 
229 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
230 		return -1;
231 
232 	/*
233 	 * Example:
234 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
235 	 */
236 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
237 		mount = strtok(NULL, delim);
238 		type = strtok(NULL, delim);
239 		options = strtok(NULL, delim);
240 		strtok(NULL, delim);
241 		strtok(NULL, delim);
242 		if (strcmp(type, "cgroup") == 0) {
243 			if (!controller || !strstr(options, controller))
244 				continue;
245 		} else if (strcmp(type, "cgroup2") == 0) {
246 			if (controller &&
247 					cg_read_strstr(mount, "cgroup.controllers", controller))
248 				continue;
249 		} else {
250 			continue;
251 		}
252 		strncpy(root, mount, len);
253 
254 		if (nsdelegate)
255 			*nsdelegate = !!strstr(options, "nsdelegate");
256 		return 0;
257 
258 	}
259 
260 	return -1;
261 }
262 
263 int cg_find_controller_root(char *root, size_t len, const char *controller)
264 {
265 	return cg_find_root(root, len, controller, NULL);
266 }
267 
268 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
269 {
270 	return cg_find_root(root, len, NULL, nsdelegate);
271 }
272 
273 int cg_create(const char *cgroup)
274 {
275 	return mkdir(cgroup, 0755);
276 }
277 
278 int cg_wait_for_proc_count(const char *cgroup, int count)
279 {
280 	char buf[10 * PAGE_SIZE] = {0};
281 	int attempts;
282 	char *ptr;
283 
284 	for (attempts = 10; attempts >= 0; attempts--) {
285 		int nr = 0;
286 
287 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
288 			break;
289 
290 		for (ptr = buf; *ptr; ptr++)
291 			if (*ptr == '\n')
292 				nr++;
293 
294 		if (nr >= count)
295 			return 0;
296 
297 		usleep(100000);
298 	}
299 
300 	return -1;
301 }
302 
303 int cg_killall(const char *cgroup)
304 {
305 	char buf[PAGE_SIZE];
306 	char *ptr = buf;
307 
308 	/* If cgroup.kill exists use it. */
309 	if (!cg_write(cgroup, "cgroup.kill", "1"))
310 		return 0;
311 
312 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
313 		return -1;
314 
315 	while (ptr < buf + sizeof(buf)) {
316 		int pid = strtol(ptr, &ptr, 10);
317 
318 		if (pid == 0)
319 			break;
320 		if (*ptr)
321 			ptr++;
322 		else
323 			break;
324 		if (kill(pid, SIGKILL))
325 			return -1;
326 	}
327 
328 	return 0;
329 }
330 
331 int cg_destroy(const char *cgroup)
332 {
333 	int ret;
334 
335 	if (!cgroup)
336 		return 0;
337 retry:
338 	ret = rmdir(cgroup);
339 	if (ret && errno == EBUSY) {
340 		cg_killall(cgroup);
341 		usleep(100);
342 		goto retry;
343 	}
344 
345 	if (ret && errno == ENOENT)
346 		ret = 0;
347 
348 	return ret;
349 }
350 
351 int cg_enter(const char *cgroup, int pid)
352 {
353 	char pidbuf[64];
354 
355 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
356 	return cg_write(cgroup, "cgroup.procs", pidbuf);
357 }
358 
359 int cg_enter_current(const char *cgroup)
360 {
361 	return cg_write(cgroup, "cgroup.procs", "0");
362 }
363 
364 int cg_enter_current_thread(const char *cgroup)
365 {
366 	return cg_write(cgroup, CG_THREADS_FILE, "0");
367 }
368 
369 int cg_run(const char *cgroup,
370 	   int (*fn)(const char *cgroup, void *arg),
371 	   void *arg)
372 {
373 	int pid, retcode;
374 
375 	pid = fork();
376 	if (pid < 0) {
377 		return pid;
378 	} else if (pid == 0) {
379 		char buf[64];
380 
381 		snprintf(buf, sizeof(buf), "%d", getpid());
382 		if (cg_write(cgroup, "cgroup.procs", buf))
383 			exit(EXIT_FAILURE);
384 		exit(fn(cgroup, arg));
385 	} else {
386 		waitpid(pid, &retcode, 0);
387 		if (WIFEXITED(retcode))
388 			return WEXITSTATUS(retcode);
389 		else
390 			return -1;
391 	}
392 }
393 
394 pid_t clone_into_cgroup(int cgroup_fd)
395 {
396 #ifdef CLONE_ARGS_SIZE_VER2
397 	pid_t pid;
398 
399 	struct __clone_args args = {
400 		.flags = CLONE_INTO_CGROUP,
401 		.exit_signal = SIGCHLD,
402 		.cgroup = cgroup_fd,
403 	};
404 
405 	pid = sys_clone3(&args, sizeof(struct __clone_args));
406 	/*
407 	 * Verify that this is a genuine test failure:
408 	 * ENOSYS -> clone3() not available
409 	 * E2BIG  -> CLONE_INTO_CGROUP not available
410 	 */
411 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
412 		goto pretend_enosys;
413 
414 	return pid;
415 
416 pretend_enosys:
417 #endif
418 	errno = ENOSYS;
419 	return -ENOSYS;
420 }
421 
422 int clone_reap(pid_t pid, int options)
423 {
424 	int ret;
425 	siginfo_t info = {
426 		.si_signo = 0,
427 	};
428 
429 again:
430 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
431 	if (ret < 0) {
432 		if (errno == EINTR)
433 			goto again;
434 		return -1;
435 	}
436 
437 	if (options & WEXITED) {
438 		if (WIFEXITED(info.si_status))
439 			return WEXITSTATUS(info.si_status);
440 	}
441 
442 	if (options & WSTOPPED) {
443 		if (WIFSTOPPED(info.si_status))
444 			return WSTOPSIG(info.si_status);
445 	}
446 
447 	if (options & WCONTINUED) {
448 		if (WIFCONTINUED(info.si_status))
449 			return 0;
450 	}
451 
452 	return -1;
453 }
454 
455 int dirfd_open_opath(const char *dir)
456 {
457 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
458 }
459 
460 #define close_prot_errno(fd)                                                   \
461 	if (fd >= 0) {                                                         \
462 		int _e_ = errno;                                               \
463 		close(fd);                                                     \
464 		errno = _e_;                                                   \
465 	}
466 
467 static int clone_into_cgroup_run_nowait(const char *cgroup,
468 					int (*fn)(const char *cgroup, void *arg),
469 					void *arg)
470 {
471 	int cgroup_fd;
472 	pid_t pid;
473 
474 	cgroup_fd =  dirfd_open_opath(cgroup);
475 	if (cgroup_fd < 0)
476 		return -1;
477 
478 	pid = clone_into_cgroup(cgroup_fd);
479 	close_prot_errno(cgroup_fd);
480 	if (pid == 0)
481 		exit(fn(cgroup, arg));
482 
483 	return pid;
484 }
485 
486 int cg_run_nowait(const char *cgroup,
487 		  int (*fn)(const char *cgroup, void *arg),
488 		  void *arg)
489 {
490 	int pid;
491 
492 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
493 	if (pid > 0)
494 		return pid;
495 
496 	/* Genuine test failure. */
497 	if (pid < 0 && errno != ENOSYS)
498 		return -1;
499 
500 	pid = fork();
501 	if (pid == 0) {
502 		char buf[64];
503 
504 		snprintf(buf, sizeof(buf), "%d", getpid());
505 		if (cg_write(cgroup, "cgroup.procs", buf))
506 			exit(EXIT_FAILURE);
507 		exit(fn(cgroup, arg));
508 	}
509 
510 	return pid;
511 }
512 
513 int proc_mount_contains(const char *option)
514 {
515 	char buf[4 * PAGE_SIZE];
516 	ssize_t read;
517 
518 	read = read_text("/proc/mounts", buf, sizeof(buf));
519 	if (read < 0)
520 		return read;
521 
522 	return strstr(buf, option) != NULL;
523 }
524 
525 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
526 {
527 	char path[PATH_MAX];
528 	ssize_t ret;
529 
530 	if (!pid)
531 		snprintf(path, sizeof(path), "/proc/%s/%s",
532 			 thread ? "thread-self" : "self", item);
533 	else
534 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
535 
536 	ret = read_text(path, buf, size);
537 	return ret < 0 ? -1 : ret;
538 }
539 
540 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
541 {
542 	char buf[PAGE_SIZE];
543 
544 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
545 		return -1;
546 
547 	return strstr(buf, needle) ? 0 : -1;
548 }
549 
550 int clone_into_cgroup_run_wait(const char *cgroup)
551 {
552 	int cgroup_fd;
553 	pid_t pid;
554 
555 	cgroup_fd =  dirfd_open_opath(cgroup);
556 	if (cgroup_fd < 0)
557 		return -1;
558 
559 	pid = clone_into_cgroup(cgroup_fd);
560 	close_prot_errno(cgroup_fd);
561 	if (pid < 0)
562 		return -1;
563 
564 	if (pid == 0)
565 		exit(EXIT_SUCCESS);
566 
567 	/*
568 	 * We don't care whether this fails. We only care whether the initial
569 	 * clone succeeded.
570 	 */
571 	(void)clone_reap(pid, WEXITED);
572 	return 0;
573 }
574 
575 static int __prepare_for_wait(const char *cgroup, const char *filename)
576 {
577 	int fd, ret = -1;
578 
579 	fd = inotify_init1(0);
580 	if (fd == -1)
581 		return fd;
582 
583 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
584 	if (ret == -1) {
585 		close(fd);
586 		fd = -1;
587 	}
588 
589 	return fd;
590 }
591 
592 int cg_prepare_for_wait(const char *cgroup)
593 {
594 	return __prepare_for_wait(cgroup, "cgroup.events");
595 }
596 
597 int memcg_prepare_for_wait(const char *cgroup)
598 {
599 	return __prepare_for_wait(cgroup, "memory.events");
600 }
601 
602 int cg_wait_for(int fd)
603 {
604 	int ret = -1;
605 	struct pollfd fds = {
606 		.fd = fd,
607 		.events = POLLIN,
608 	};
609 
610 	while (true) {
611 		ret = poll(&fds, 1, 10000);
612 
613 		if (ret == -1) {
614 			if (errno == EINTR)
615 				continue;
616 
617 			break;
618 		}
619 
620 		if (ret > 0 && fds.revents & POLLIN) {
621 			ret = 0;
622 			break;
623 		}
624 	}
625 
626 	return ret;
627 }
628