xref: /linux/tools/testing/selftests/cgroup/lib/cgroup_util.c (revision 25489a4f556414445d342951615178368ee45cde)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18 
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21 
22 /* Returns read len on success, or -errno on failure. */
23 ssize_t read_text(const char *path, char *buf, size_t max_len)
24 {
25 	ssize_t len;
26 	int fd;
27 
28 	fd = open(path, O_RDONLY);
29 	if (fd < 0)
30 		return -errno;
31 
32 	len = read(fd, buf, max_len - 1);
33 
34 	if (len >= 0)
35 		buf[len] = 0;
36 
37 	close(fd);
38 	return len < 0 ? -errno : len;
39 }
40 
41 /* Returns written len on success, or -errno on failure. */
42 ssize_t write_text(const char *path, char *buf, ssize_t len)
43 {
44 	int fd;
45 
46 	fd = open(path, O_WRONLY | O_APPEND);
47 	if (fd < 0)
48 		return -errno;
49 
50 	len = write(fd, buf, len);
51 	close(fd);
52 	return len < 0 ? -errno : len;
53 }
54 
55 char *cg_name(const char *root, const char *name)
56 {
57 	size_t len = strlen(root) + strlen(name) + 2;
58 	char *ret = malloc(len);
59 
60 	snprintf(ret, len, "%s/%s", root, name);
61 
62 	return ret;
63 }
64 
65 char *cg_name_indexed(const char *root, const char *name, int index)
66 {
67 	size_t len = strlen(root) + strlen(name) + 10;
68 	char *ret = malloc(len);
69 
70 	snprintf(ret, len, "%s/%s_%d", root, name, index);
71 
72 	return ret;
73 }
74 
75 char *cg_control(const char *cgroup, const char *control)
76 {
77 	size_t len = strlen(cgroup) + strlen(control) + 2;
78 	char *ret = malloc(len);
79 
80 	snprintf(ret, len, "%s/%s", cgroup, control);
81 
82 	return ret;
83 }
84 
85 /* Returns 0 on success, or -errno on failure. */
86 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
87 {
88 	char path[PATH_MAX];
89 	ssize_t ret;
90 
91 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
92 
93 	ret = read_text(path, buf, len);
94 	return ret >= 0 ? 0 : ret;
95 }
96 
97 int cg_read_strcmp(const char *cgroup, const char *control,
98 		   const char *expected)
99 {
100 	size_t size;
101 	char *buf;
102 	int ret;
103 
104 	/* Handle the case of comparing against empty string */
105 	if (!expected)
106 		return -1;
107 	else
108 		size = strlen(expected) + 1;
109 
110 	buf = malloc(size);
111 	if (!buf)
112 		return -1;
113 
114 	if (cg_read(cgroup, control, buf, size)) {
115 		free(buf);
116 		return -1;
117 	}
118 
119 	ret = strcmp(expected, buf);
120 	free(buf);
121 	return ret;
122 }
123 
124 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
125 {
126 	char buf[PAGE_SIZE];
127 
128 	if (cg_read(cgroup, control, buf, sizeof(buf)))
129 		return -1;
130 
131 	return strstr(buf, needle) ? 0 : -1;
132 }
133 
134 long cg_read_long(const char *cgroup, const char *control)
135 {
136 	char buf[128];
137 
138 	if (cg_read(cgroup, control, buf, sizeof(buf)))
139 		return -1;
140 
141 	return atol(buf);
142 }
143 
144 long cg_read_long_fd(int fd)
145 {
146 	char buf[128];
147 
148 	if (pread(fd, buf, sizeof(buf), 0) <= 0)
149 		return -1;
150 
151 	return atol(buf);
152 }
153 
154 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
155 {
156 	char buf[PAGE_SIZE];
157 	char *ptr;
158 
159 	if (cg_read(cgroup, control, buf, sizeof(buf)))
160 		return -1;
161 
162 	ptr = strstr(buf, key);
163 	if (!ptr)
164 		return -1;
165 
166 	return atol(ptr + strlen(key));
167 }
168 
169 long cg_read_lc(const char *cgroup, const char *control)
170 {
171 	char buf[PAGE_SIZE];
172 	const char delim[] = "\n";
173 	char *line;
174 	long cnt = 0;
175 
176 	if (cg_read(cgroup, control, buf, sizeof(buf)))
177 		return -1;
178 
179 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
180 		cnt++;
181 
182 	return cnt;
183 }
184 
185 /* Returns 0 on success, or -errno on failure. */
186 int cg_write(const char *cgroup, const char *control, char *buf)
187 {
188 	char path[PATH_MAX];
189 	ssize_t len = strlen(buf), ret;
190 
191 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
192 	ret = write_text(path, buf, len);
193 	return ret == len ? 0 : ret;
194 }
195 
196 /*
197  * Returns fd on success, or -1 on failure.
198  * (fd should be closed with close() as usual)
199  */
200 int cg_open(const char *cgroup, const char *control, int flags)
201 {
202 	char path[PATH_MAX];
203 
204 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
205 	return open(path, flags);
206 }
207 
208 int cg_write_numeric(const char *cgroup, const char *control, long value)
209 {
210 	char buf[64];
211 	int ret;
212 
213 	ret = sprintf(buf, "%lu", value);
214 	if (ret < 0)
215 		return ret;
216 
217 	return cg_write(cgroup, control, buf);
218 }
219 
220 static int cg_find_root(char *root, size_t len, const char *controller,
221 			bool *nsdelegate)
222 {
223 	char buf[10 * PAGE_SIZE];
224 	char *fs, *mount, *type, *options;
225 	const char delim[] = "\n\t ";
226 
227 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
228 		return -1;
229 
230 	/*
231 	 * Example:
232 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
233 	 */
234 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
235 		mount = strtok(NULL, delim);
236 		type = strtok(NULL, delim);
237 		options = strtok(NULL, delim);
238 		strtok(NULL, delim);
239 		strtok(NULL, delim);
240 		if (strcmp(type, "cgroup") == 0) {
241 			if (!controller || !strstr(options, controller))
242 				continue;
243 		} else if (strcmp(type, "cgroup2") == 0) {
244 			if (controller &&
245 					cg_read_strstr(mount, "cgroup.controllers", controller))
246 				continue;
247 		} else {
248 			continue;
249 		}
250 		strncpy(root, mount, len);
251 
252 		if (nsdelegate)
253 			*nsdelegate = !!strstr(options, "nsdelegate");
254 		return 0;
255 
256 	}
257 
258 	return -1;
259 }
260 
261 int cg_find_controller_root(char *root, size_t len, const char *controller)
262 {
263 	return cg_find_root(root, len, controller, NULL);
264 }
265 
266 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
267 {
268 	return cg_find_root(root, len, NULL, nsdelegate);
269 }
270 
271 int cg_create(const char *cgroup)
272 {
273 	return mkdir(cgroup, 0755);
274 }
275 
276 int cg_wait_for_proc_count(const char *cgroup, int count)
277 {
278 	char buf[10 * PAGE_SIZE] = {0};
279 	int attempts;
280 	char *ptr;
281 
282 	for (attempts = 10; attempts >= 0; attempts--) {
283 		int nr = 0;
284 
285 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
286 			break;
287 
288 		for (ptr = buf; *ptr; ptr++)
289 			if (*ptr == '\n')
290 				nr++;
291 
292 		if (nr >= count)
293 			return 0;
294 
295 		usleep(100000);
296 	}
297 
298 	return -1;
299 }
300 
301 int cg_killall(const char *cgroup)
302 {
303 	char buf[PAGE_SIZE];
304 	char *ptr = buf;
305 
306 	/* If cgroup.kill exists use it. */
307 	if (!cg_write(cgroup, "cgroup.kill", "1"))
308 		return 0;
309 
310 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
311 		return -1;
312 
313 	while (ptr < buf + sizeof(buf)) {
314 		int pid = strtol(ptr, &ptr, 10);
315 
316 		if (pid == 0)
317 			break;
318 		if (*ptr)
319 			ptr++;
320 		else
321 			break;
322 		if (kill(pid, SIGKILL))
323 			return -1;
324 	}
325 
326 	return 0;
327 }
328 
329 int cg_destroy(const char *cgroup)
330 {
331 	int ret;
332 
333 	if (!cgroup)
334 		return 0;
335 retry:
336 	ret = rmdir(cgroup);
337 	if (ret && errno == EBUSY) {
338 		cg_killall(cgroup);
339 		usleep(100);
340 		goto retry;
341 	}
342 
343 	if (ret && errno == ENOENT)
344 		ret = 0;
345 
346 	return ret;
347 }
348 
349 int cg_enter(const char *cgroup, int pid)
350 {
351 	char pidbuf[64];
352 
353 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
354 	return cg_write(cgroup, "cgroup.procs", pidbuf);
355 }
356 
357 int cg_enter_current(const char *cgroup)
358 {
359 	return cg_write(cgroup, "cgroup.procs", "0");
360 }
361 
362 int cg_enter_current_thread(const char *cgroup)
363 {
364 	return cg_write(cgroup, "cgroup.threads", "0");
365 }
366 
367 int cg_run(const char *cgroup,
368 	   int (*fn)(const char *cgroup, void *arg),
369 	   void *arg)
370 {
371 	int pid, retcode;
372 
373 	pid = fork();
374 	if (pid < 0) {
375 		return pid;
376 	} else if (pid == 0) {
377 		char buf[64];
378 
379 		snprintf(buf, sizeof(buf), "%d", getpid());
380 		if (cg_write(cgroup, "cgroup.procs", buf))
381 			exit(EXIT_FAILURE);
382 		exit(fn(cgroup, arg));
383 	} else {
384 		waitpid(pid, &retcode, 0);
385 		if (WIFEXITED(retcode))
386 			return WEXITSTATUS(retcode);
387 		else
388 			return -1;
389 	}
390 }
391 
392 pid_t clone_into_cgroup(int cgroup_fd)
393 {
394 #ifdef CLONE_ARGS_SIZE_VER2
395 	pid_t pid;
396 
397 	struct __clone_args args = {
398 		.flags = CLONE_INTO_CGROUP,
399 		.exit_signal = SIGCHLD,
400 		.cgroup = cgroup_fd,
401 	};
402 
403 	pid = sys_clone3(&args, sizeof(struct __clone_args));
404 	/*
405 	 * Verify that this is a genuine test failure:
406 	 * ENOSYS -> clone3() not available
407 	 * E2BIG  -> CLONE_INTO_CGROUP not available
408 	 */
409 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
410 		goto pretend_enosys;
411 
412 	return pid;
413 
414 pretend_enosys:
415 #endif
416 	errno = ENOSYS;
417 	return -ENOSYS;
418 }
419 
420 int clone_reap(pid_t pid, int options)
421 {
422 	int ret;
423 	siginfo_t info = {
424 		.si_signo = 0,
425 	};
426 
427 again:
428 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
429 	if (ret < 0) {
430 		if (errno == EINTR)
431 			goto again;
432 		return -1;
433 	}
434 
435 	if (options & WEXITED) {
436 		if (WIFEXITED(info.si_status))
437 			return WEXITSTATUS(info.si_status);
438 	}
439 
440 	if (options & WSTOPPED) {
441 		if (WIFSTOPPED(info.si_status))
442 			return WSTOPSIG(info.si_status);
443 	}
444 
445 	if (options & WCONTINUED) {
446 		if (WIFCONTINUED(info.si_status))
447 			return 0;
448 	}
449 
450 	return -1;
451 }
452 
453 int dirfd_open_opath(const char *dir)
454 {
455 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
456 }
457 
458 #define close_prot_errno(fd)                                                   \
459 	if (fd >= 0) {                                                         \
460 		int _e_ = errno;                                               \
461 		close(fd);                                                     \
462 		errno = _e_;                                                   \
463 	}
464 
465 static int clone_into_cgroup_run_nowait(const char *cgroup,
466 					int (*fn)(const char *cgroup, void *arg),
467 					void *arg)
468 {
469 	int cgroup_fd;
470 	pid_t pid;
471 
472 	cgroup_fd =  dirfd_open_opath(cgroup);
473 	if (cgroup_fd < 0)
474 		return -1;
475 
476 	pid = clone_into_cgroup(cgroup_fd);
477 	close_prot_errno(cgroup_fd);
478 	if (pid == 0)
479 		exit(fn(cgroup, arg));
480 
481 	return pid;
482 }
483 
484 int cg_run_nowait(const char *cgroup,
485 		  int (*fn)(const char *cgroup, void *arg),
486 		  void *arg)
487 {
488 	int pid;
489 
490 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
491 	if (pid > 0)
492 		return pid;
493 
494 	/* Genuine test failure. */
495 	if (pid < 0 && errno != ENOSYS)
496 		return -1;
497 
498 	pid = fork();
499 	if (pid == 0) {
500 		char buf[64];
501 
502 		snprintf(buf, sizeof(buf), "%d", getpid());
503 		if (cg_write(cgroup, "cgroup.procs", buf))
504 			exit(EXIT_FAILURE);
505 		exit(fn(cgroup, arg));
506 	}
507 
508 	return pid;
509 }
510 
511 int proc_mount_contains(const char *option)
512 {
513 	char buf[4 * PAGE_SIZE];
514 	ssize_t read;
515 
516 	read = read_text("/proc/mounts", buf, sizeof(buf));
517 	if (read < 0)
518 		return read;
519 
520 	return strstr(buf, option) != NULL;
521 }
522 
523 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
524 {
525 	char path[PATH_MAX];
526 	ssize_t ret;
527 
528 	if (!pid)
529 		snprintf(path, sizeof(path), "/proc/%s/%s",
530 			 thread ? "thread-self" : "self", item);
531 	else
532 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
533 
534 	ret = read_text(path, buf, size);
535 	return ret < 0 ? -1 : ret;
536 }
537 
538 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
539 {
540 	char buf[PAGE_SIZE];
541 
542 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
543 		return -1;
544 
545 	return strstr(buf, needle) ? 0 : -1;
546 }
547 
548 int clone_into_cgroup_run_wait(const char *cgroup)
549 {
550 	int cgroup_fd;
551 	pid_t pid;
552 
553 	cgroup_fd =  dirfd_open_opath(cgroup);
554 	if (cgroup_fd < 0)
555 		return -1;
556 
557 	pid = clone_into_cgroup(cgroup_fd);
558 	close_prot_errno(cgroup_fd);
559 	if (pid < 0)
560 		return -1;
561 
562 	if (pid == 0)
563 		exit(EXIT_SUCCESS);
564 
565 	/*
566 	 * We don't care whether this fails. We only care whether the initial
567 	 * clone succeeded.
568 	 */
569 	(void)clone_reap(pid, WEXITED);
570 	return 0;
571 }
572 
573 static int __prepare_for_wait(const char *cgroup, const char *filename)
574 {
575 	int fd, ret = -1;
576 
577 	fd = inotify_init1(0);
578 	if (fd == -1)
579 		return fd;
580 
581 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
582 	if (ret == -1) {
583 		close(fd);
584 		fd = -1;
585 	}
586 
587 	return fd;
588 }
589 
590 int cg_prepare_for_wait(const char *cgroup)
591 {
592 	return __prepare_for_wait(cgroup, "cgroup.events");
593 }
594 
595 int memcg_prepare_for_wait(const char *cgroup)
596 {
597 	return __prepare_for_wait(cgroup, "memory.events");
598 }
599 
600 int cg_wait_for(int fd)
601 {
602 	int ret = -1;
603 	struct pollfd fds = {
604 		.fd = fd,
605 		.events = POLLIN,
606 	};
607 
608 	while (true) {
609 		ret = poll(&fds, 1, 10000);
610 
611 		if (ret == -1) {
612 			if (errno == EINTR)
613 				continue;
614 
615 			break;
616 		}
617 
618 		if (ret > 0 && fds.revents & POLLIN) {
619 			ret = 0;
620 			break;
621 		}
622 	}
623 
624 	return ret;
625 }
626