1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #define _GNU_SOURCE
4
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21
22 bool cg_test_v1_named;
23
24 /* Returns read len on success, or -errno on failure. */
read_text(const char * path,char * buf,size_t max_len)25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 ssize_t len;
28 int fd;
29
30 fd = open(path, O_RDONLY);
31 if (fd < 0)
32 return -errno;
33
34 len = read(fd, buf, max_len - 1);
35
36 if (len >= 0)
37 buf[len] = 0;
38
39 close(fd);
40 return len < 0 ? -errno : len;
41 }
42
43 /* Returns written len on success, or -errno on failure. */
write_text(const char * path,char * buf,ssize_t len)44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 int fd;
47
48 fd = open(path, O_WRONLY | O_APPEND);
49 if (fd < 0)
50 return -errno;
51
52 len = write(fd, buf, len);
53 close(fd);
54 return len < 0 ? -errno : len;
55 }
56
cg_name(const char * root,const char * name)57 char *cg_name(const char *root, const char *name)
58 {
59 size_t len = strlen(root) + strlen(name) + 2;
60 char *ret = malloc(len);
61
62 snprintf(ret, len, "%s/%s", root, name);
63
64 return ret;
65 }
66
cg_name_indexed(const char * root,const char * name,int index)67 char *cg_name_indexed(const char *root, const char *name, int index)
68 {
69 size_t len = strlen(root) + strlen(name) + 10;
70 char *ret = malloc(len);
71
72 snprintf(ret, len, "%s/%s_%d", root, name, index);
73
74 return ret;
75 }
76
cg_control(const char * cgroup,const char * control)77 char *cg_control(const char *cgroup, const char *control)
78 {
79 size_t len = strlen(cgroup) + strlen(control) + 2;
80 char *ret = malloc(len);
81
82 snprintf(ret, len, "%s/%s", cgroup, control);
83
84 return ret;
85 }
86
87 /* Returns 0 on success, or -errno on failure. */
cg_read(const char * cgroup,const char * control,char * buf,size_t len)88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 char path[PATH_MAX];
91 ssize_t ret;
92
93 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
94
95 ret = read_text(path, buf, len);
96 return ret >= 0 ? 0 : ret;
97 }
98
cg_read_strcmp(const char * cgroup,const char * control,const char * expected)99 int cg_read_strcmp(const char *cgroup, const char *control,
100 const char *expected)
101 {
102 size_t size;
103 char *buf;
104 int ret;
105
106 /* Handle the case of comparing against empty string */
107 if (!expected)
108 return -1;
109 else
110 size = strlen(expected) + 1;
111
112 buf = malloc(size);
113 if (!buf)
114 return -1;
115
116 if (cg_read(cgroup, control, buf, size)) {
117 free(buf);
118 return -1;
119 }
120
121 ret = strcmp(expected, buf);
122 free(buf);
123 return ret;
124 }
125
cg_read_strcmp_wait(const char * cgroup,const char * control,const char * expected)126 int cg_read_strcmp_wait(const char *cgroup, const char *control,
127 const char *expected)
128 {
129 int i, ret;
130
131 for (i = 0; i < 100; i++) {
132 ret = cg_read_strcmp(cgroup, control, expected);
133 if (!ret)
134 return ret;
135 usleep(10000);
136 }
137
138 return ret;
139 }
140
cg_read_strstr(const char * cgroup,const char * control,const char * needle)141 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
142 {
143 char buf[PAGE_SIZE];
144
145 if (cg_read(cgroup, control, buf, sizeof(buf)))
146 return -1;
147
148 return strstr(buf, needle) ? 0 : -1;
149 }
150
cg_read_long(const char * cgroup,const char * control)151 long cg_read_long(const char *cgroup, const char *control)
152 {
153 char buf[128];
154
155 if (cg_read(cgroup, control, buf, sizeof(buf)))
156 return -1;
157
158 return atol(buf);
159 }
160
cg_read_long_fd(int fd)161 long cg_read_long_fd(int fd)
162 {
163 char buf[128];
164
165 if (pread(fd, buf, sizeof(buf), 0) <= 0)
166 return -1;
167
168 return atol(buf);
169 }
170
cg_read_key_long(const char * cgroup,const char * control,const char * key)171 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
172 {
173 char buf[PAGE_SIZE];
174 char *ptr;
175
176 if (cg_read(cgroup, control, buf, sizeof(buf)))
177 return -1;
178
179 ptr = strstr(buf, key);
180 if (!ptr)
181 return -1;
182
183 return atol(ptr + strlen(key));
184 }
185
cg_read_key_long_poll(const char * cgroup,const char * control,const char * key,long expected,int retries,useconds_t wait_interval_us)186 long cg_read_key_long_poll(const char *cgroup, const char *control,
187 const char *key, long expected, int retries,
188 useconds_t wait_interval_us)
189 {
190 long val = -1;
191 int i;
192
193 for (i = 0; i < retries; i++) {
194 val = cg_read_key_long(cgroup, control, key);
195 if (val < 0)
196 return val;
197
198 if (val == expected)
199 break;
200
201 usleep(wait_interval_us);
202 }
203
204 return val;
205 }
206
cg_read_lc(const char * cgroup,const char * control)207 long cg_read_lc(const char *cgroup, const char *control)
208 {
209 char buf[PAGE_SIZE];
210 const char delim[] = "\n";
211 char *line;
212 long cnt = 0;
213
214 if (cg_read(cgroup, control, buf, sizeof(buf)))
215 return -1;
216
217 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
218 cnt++;
219
220 return cnt;
221 }
222
223 /* Returns 0 on success, or -errno on failure. */
cg_write(const char * cgroup,const char * control,char * buf)224 int cg_write(const char *cgroup, const char *control, char *buf)
225 {
226 char path[PATH_MAX];
227 ssize_t len = strlen(buf), ret;
228
229 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
230 ret = write_text(path, buf, len);
231 return ret == len ? 0 : ret;
232 }
233
234 /*
235 * Returns fd on success, or -1 on failure.
236 * (fd should be closed with close() as usual)
237 */
cg_open(const char * cgroup,const char * control,int flags)238 int cg_open(const char *cgroup, const char *control, int flags)
239 {
240 char path[PATH_MAX];
241
242 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
243 return open(path, flags);
244 }
245
cg_write_numeric(const char * cgroup,const char * control,long value)246 int cg_write_numeric(const char *cgroup, const char *control, long value)
247 {
248 char buf[64];
249 int ret;
250
251 ret = sprintf(buf, "%lu", value);
252 if (ret < 0)
253 return ret;
254
255 return cg_write(cgroup, control, buf);
256 }
257
cg_find_root(char * root,size_t len,const char * controller,bool * nsdelegate)258 static int cg_find_root(char *root, size_t len, const char *controller,
259 bool *nsdelegate)
260 {
261 char buf[10 * PAGE_SIZE];
262 char *fs, *mount, *type, *options;
263 const char delim[] = "\n\t ";
264
265 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
266 return -1;
267
268 /*
269 * Example:
270 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
271 */
272 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
273 mount = strtok(NULL, delim);
274 type = strtok(NULL, delim);
275 options = strtok(NULL, delim);
276 strtok(NULL, delim);
277 strtok(NULL, delim);
278 if (strcmp(type, "cgroup") == 0) {
279 if (!controller || !strstr(options, controller))
280 continue;
281 } else if (strcmp(type, "cgroup2") == 0) {
282 if (controller &&
283 cg_read_strstr(mount, "cgroup.controllers", controller))
284 continue;
285 } else {
286 continue;
287 }
288 strncpy(root, mount, len);
289
290 if (nsdelegate)
291 *nsdelegate = !!strstr(options, "nsdelegate");
292 return 0;
293
294 }
295
296 return -1;
297 }
298
cg_find_controller_root(char * root,size_t len,const char * controller)299 int cg_find_controller_root(char *root, size_t len, const char *controller)
300 {
301 return cg_find_root(root, len, controller, NULL);
302 }
303
cg_find_unified_root(char * root,size_t len,bool * nsdelegate)304 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
305 {
306 return cg_find_root(root, len, NULL, nsdelegate);
307 }
308
cg_create(const char * cgroup)309 int cg_create(const char *cgroup)
310 {
311 return mkdir(cgroup, 0755);
312 }
313
cg_wait_for_proc_count(const char * cgroup,int count)314 int cg_wait_for_proc_count(const char *cgroup, int count)
315 {
316 char buf[10 * PAGE_SIZE] = {0};
317 int attempts;
318 char *ptr;
319
320 for (attempts = 10; attempts >= 0; attempts--) {
321 int nr = 0;
322
323 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
324 break;
325
326 for (ptr = buf; *ptr; ptr++)
327 if (*ptr == '\n')
328 nr++;
329
330 if (nr >= count)
331 return 0;
332
333 usleep(100000);
334 }
335
336 return -1;
337 }
338
cg_killall(const char * cgroup)339 int cg_killall(const char *cgroup)
340 {
341 char buf[PAGE_SIZE];
342 char *ptr = buf;
343
344 /* If cgroup.kill exists use it. */
345 if (!cg_write(cgroup, "cgroup.kill", "1"))
346 return 0;
347
348 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
349 return -1;
350
351 while (ptr < buf + sizeof(buf)) {
352 int pid = strtol(ptr, &ptr, 10);
353
354 if (pid == 0)
355 break;
356 if (*ptr)
357 ptr++;
358 else
359 break;
360 if (kill(pid, SIGKILL))
361 return -1;
362 }
363
364 return 0;
365 }
366
cg_destroy(const char * cgroup)367 int cg_destroy(const char *cgroup)
368 {
369 int ret;
370
371 if (!cgroup)
372 return 0;
373 retry:
374 ret = rmdir(cgroup);
375 if (ret && errno == EBUSY) {
376 cg_killall(cgroup);
377 usleep(100);
378 goto retry;
379 }
380
381 if (ret && errno == ENOENT)
382 ret = 0;
383
384 return ret;
385 }
386
cg_enter(const char * cgroup,int pid)387 int cg_enter(const char *cgroup, int pid)
388 {
389 char pidbuf[64];
390
391 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
392 return cg_write(cgroup, "cgroup.procs", pidbuf);
393 }
394
cg_enter_current(const char * cgroup)395 int cg_enter_current(const char *cgroup)
396 {
397 return cg_write(cgroup, "cgroup.procs", "0");
398 }
399
cg_enter_current_thread(const char * cgroup)400 int cg_enter_current_thread(const char *cgroup)
401 {
402 return cg_write(cgroup, CG_THREADS_FILE, "0");
403 }
404
cg_run(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)405 int cg_run(const char *cgroup,
406 int (*fn)(const char *cgroup, void *arg),
407 void *arg)
408 {
409 int pid, retcode;
410
411 pid = fork();
412 if (pid < 0) {
413 return pid;
414 } else if (pid == 0) {
415 char buf[64];
416
417 snprintf(buf, sizeof(buf), "%d", getpid());
418 if (cg_write(cgroup, "cgroup.procs", buf))
419 exit(EXIT_FAILURE);
420 exit(fn(cgroup, arg));
421 } else {
422 waitpid(pid, &retcode, 0);
423 if (WIFEXITED(retcode))
424 return WEXITSTATUS(retcode);
425 else
426 return -1;
427 }
428 }
429
clone_into_cgroup(int cgroup_fd)430 pid_t clone_into_cgroup(int cgroup_fd)
431 {
432 #ifdef CLONE_ARGS_SIZE_VER2
433 pid_t pid;
434
435 struct __clone_args args = {
436 .flags = CLONE_INTO_CGROUP,
437 .exit_signal = SIGCHLD,
438 .cgroup = cgroup_fd,
439 };
440
441 pid = sys_clone3(&args, sizeof(struct __clone_args));
442 /*
443 * Verify that this is a genuine test failure:
444 * ENOSYS -> clone3() not available
445 * E2BIG -> CLONE_INTO_CGROUP not available
446 */
447 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
448 goto pretend_enosys;
449
450 return pid;
451
452 pretend_enosys:
453 #endif
454 errno = ENOSYS;
455 return -ENOSYS;
456 }
457
clone_reap(pid_t pid,int options)458 int clone_reap(pid_t pid, int options)
459 {
460 int ret;
461 siginfo_t info = {
462 .si_signo = 0,
463 };
464
465 again:
466 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
467 if (ret < 0) {
468 if (errno == EINTR)
469 goto again;
470 return -1;
471 }
472
473 if (options & WEXITED) {
474 if (WIFEXITED(info.si_status))
475 return WEXITSTATUS(info.si_status);
476 }
477
478 if (options & WSTOPPED) {
479 if (WIFSTOPPED(info.si_status))
480 return WSTOPSIG(info.si_status);
481 }
482
483 if (options & WCONTINUED) {
484 if (WIFCONTINUED(info.si_status))
485 return 0;
486 }
487
488 return -1;
489 }
490
dirfd_open_opath(const char * dir)491 int dirfd_open_opath(const char *dir)
492 {
493 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
494 }
495
496 #define close_prot_errno(fd) \
497 if (fd >= 0) { \
498 int _e_ = errno; \
499 close(fd); \
500 errno = _e_; \
501 }
502
clone_into_cgroup_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)503 static int clone_into_cgroup_run_nowait(const char *cgroup,
504 int (*fn)(const char *cgroup, void *arg),
505 void *arg)
506 {
507 int cgroup_fd;
508 pid_t pid;
509
510 cgroup_fd = dirfd_open_opath(cgroup);
511 if (cgroup_fd < 0)
512 return -1;
513
514 pid = clone_into_cgroup(cgroup_fd);
515 close_prot_errno(cgroup_fd);
516 if (pid == 0)
517 exit(fn(cgroup, arg));
518
519 return pid;
520 }
521
cg_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)522 int cg_run_nowait(const char *cgroup,
523 int (*fn)(const char *cgroup, void *arg),
524 void *arg)
525 {
526 int pid;
527
528 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
529 if (pid > 0)
530 return pid;
531
532 /* Genuine test failure. */
533 if (pid < 0 && errno != ENOSYS)
534 return -1;
535
536 pid = fork();
537 if (pid == 0) {
538 char buf[64];
539
540 snprintf(buf, sizeof(buf), "%d", getpid());
541 if (cg_write(cgroup, "cgroup.procs", buf))
542 exit(EXIT_FAILURE);
543 exit(fn(cgroup, arg));
544 }
545
546 return pid;
547 }
548
proc_mount_contains(const char * option)549 int proc_mount_contains(const char *option)
550 {
551 char buf[4 * PAGE_SIZE];
552 ssize_t read;
553
554 read = read_text("/proc/mounts", buf, sizeof(buf));
555 if (read < 0)
556 return read;
557
558 return strstr(buf, option) != NULL;
559 }
560
cgroup_feature(const char * feature)561 int cgroup_feature(const char *feature)
562 {
563 char buf[PAGE_SIZE];
564 ssize_t read;
565
566 read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
567 if (read < 0)
568 return read;
569
570 return strstr(buf, feature) != NULL;
571 }
572
proc_read_text(int pid,bool thread,const char * item,char * buf,size_t size)573 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
574 {
575 char path[PATH_MAX];
576 ssize_t ret;
577
578 if (!pid)
579 snprintf(path, sizeof(path), "/proc/%s/%s",
580 thread ? "thread-self" : "self", item);
581 else
582 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
583
584 ret = read_text(path, buf, size);
585 return ret < 0 ? -1 : ret;
586 }
587
proc_read_strstr(int pid,bool thread,const char * item,const char * needle)588 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
589 {
590 char buf[PAGE_SIZE];
591
592 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
593 return -1;
594
595 return strstr(buf, needle) ? 0 : -1;
596 }
597
clone_into_cgroup_run_wait(const char * cgroup)598 int clone_into_cgroup_run_wait(const char *cgroup)
599 {
600 int cgroup_fd;
601 pid_t pid;
602
603 cgroup_fd = dirfd_open_opath(cgroup);
604 if (cgroup_fd < 0)
605 return -1;
606
607 pid = clone_into_cgroup(cgroup_fd);
608 close_prot_errno(cgroup_fd);
609 if (pid < 0)
610 return -1;
611
612 if (pid == 0)
613 exit(EXIT_SUCCESS);
614
615 /*
616 * We don't care whether this fails. We only care whether the initial
617 * clone succeeded.
618 */
619 (void)clone_reap(pid, WEXITED);
620 return 0;
621 }
622
__prepare_for_wait(const char * cgroup,const char * filename)623 static int __prepare_for_wait(const char *cgroup, const char *filename)
624 {
625 int fd, ret = -1;
626
627 fd = inotify_init1(0);
628 if (fd == -1)
629 return fd;
630
631 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
632 if (ret == -1) {
633 close(fd);
634 fd = -1;
635 }
636
637 return fd;
638 }
639
cg_prepare_for_wait(const char * cgroup)640 int cg_prepare_for_wait(const char *cgroup)
641 {
642 return __prepare_for_wait(cgroup, "cgroup.events");
643 }
644
memcg_prepare_for_wait(const char * cgroup)645 int memcg_prepare_for_wait(const char *cgroup)
646 {
647 return __prepare_for_wait(cgroup, "memory.events");
648 }
649
cg_wait_for(int fd)650 int cg_wait_for(int fd)
651 {
652 int ret = -1;
653 struct pollfd fds = {
654 .fd = fd,
655 .events = POLLIN,
656 };
657
658 while (true) {
659 ret = poll(&fds, 1, 10000);
660
661 if (ret == -1) {
662 if (errno == EINTR)
663 continue;
664
665 break;
666 }
667
668 if (ret > 0 && fds.revents & POLLIN) {
669 ret = 0;
670 break;
671 }
672 }
673
674 return ret;
675 }
676