1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #define _GNU_SOURCE
4
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21
22 bool cg_test_v1_named;
23
24 /* Returns read len on success, or -errno on failure. */
read_text(const char * path,char * buf,size_t max_len)25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 ssize_t len;
28 int fd;
29
30 fd = open(path, O_RDONLY);
31 if (fd < 0)
32 return -errno;
33
34 len = read(fd, buf, max_len - 1);
35
36 if (len >= 0)
37 buf[len] = 0;
38
39 close(fd);
40 return len < 0 ? -errno : len;
41 }
42
43 /* Returns written len on success, or -errno on failure. */
write_text(const char * path,char * buf,ssize_t len)44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 int fd;
47
48 fd = open(path, O_WRONLY | O_APPEND);
49 if (fd < 0)
50 return -errno;
51
52 len = write(fd, buf, len);
53 close(fd);
54 return len < 0 ? -errno : len;
55 }
56
cg_name(const char * root,const char * name)57 char *cg_name(const char *root, const char *name)
58 {
59 size_t len = strlen(root) + strlen(name) + 2;
60 char *ret = malloc(len);
61
62 snprintf(ret, len, "%s/%s", root, name);
63
64 return ret;
65 }
66
cg_name_indexed(const char * root,const char * name,int index)67 char *cg_name_indexed(const char *root, const char *name, int index)
68 {
69 size_t len = strlen(root) + strlen(name) + 10;
70 char *ret = malloc(len);
71
72 snprintf(ret, len, "%s/%s_%d", root, name, index);
73
74 return ret;
75 }
76
cg_control(const char * cgroup,const char * control)77 char *cg_control(const char *cgroup, const char *control)
78 {
79 size_t len = strlen(cgroup) + strlen(control) + 2;
80 char *ret = malloc(len);
81
82 snprintf(ret, len, "%s/%s", cgroup, control);
83
84 return ret;
85 }
86
87 /* Returns 0 on success, or -errno on failure. */
cg_read(const char * cgroup,const char * control,char * buf,size_t len)88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 char path[PATH_MAX];
91 ssize_t ret;
92
93 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
94
95 ret = read_text(path, buf, len);
96 return ret >= 0 ? 0 : ret;
97 }
98
cg_read_strcmp(const char * cgroup,const char * control,const char * expected)99 int cg_read_strcmp(const char *cgroup, const char *control,
100 const char *expected)
101 {
102 size_t size;
103 char *buf;
104 int ret;
105
106 /* Handle the case of comparing against empty string */
107 if (!expected)
108 return -1;
109
110 /* needs size > 1, otherwise cg_read() reads 0 bytes */
111 size = (expected[0] == '\0') ? 2 : strlen(expected) + 1;
112
113 buf = malloc(size);
114 if (!buf)
115 return -1;
116
117 if (cg_read(cgroup, control, buf, size)) {
118 free(buf);
119 return -1;
120 }
121
122 ret = strcmp(expected, buf);
123 free(buf);
124 return ret;
125 }
126
cg_read_strcmp_wait(const char * cgroup,const char * control,const char * expected)127 int cg_read_strcmp_wait(const char *cgroup, const char *control,
128 const char *expected)
129 {
130 int i, ret;
131
132 for (i = 0; i < 100; i++) {
133 ret = cg_read_strcmp(cgroup, control, expected);
134 if (!ret)
135 return ret;
136 usleep(10000);
137 }
138
139 return ret;
140 }
141
cg_read_strstr(const char * cgroup,const char * control,const char * needle)142 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
143 {
144 char buf[PAGE_SIZE];
145
146 if (cg_read(cgroup, control, buf, sizeof(buf)))
147 return -1;
148
149 return strstr(buf, needle) ? 0 : -1;
150 }
151
cg_read_long(const char * cgroup,const char * control)152 long cg_read_long(const char *cgroup, const char *control)
153 {
154 char buf[128];
155
156 if (cg_read(cgroup, control, buf, sizeof(buf)))
157 return -1;
158
159 return atol(buf);
160 }
161
cg_read_long_fd(int fd)162 long cg_read_long_fd(int fd)
163 {
164 char buf[128];
165
166 if (pread(fd, buf, sizeof(buf), 0) <= 0)
167 return -1;
168
169 return atol(buf);
170 }
171
cg_read_key_long(const char * cgroup,const char * control,const char * key)172 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
173 {
174 char buf[PAGE_SIZE];
175 char *ptr;
176
177 if (cg_read(cgroup, control, buf, sizeof(buf)))
178 return -1;
179
180 ptr = strstr(buf, key);
181 if (!ptr)
182 return -1;
183
184 return atol(ptr + strlen(key));
185 }
186
cg_read_key_long_poll(const char * cgroup,const char * control,const char * key,long expected,int retries,useconds_t wait_interval_us)187 long cg_read_key_long_poll(const char *cgroup, const char *control,
188 const char *key, long expected, int retries,
189 useconds_t wait_interval_us)
190 {
191 long val = -1;
192 int i;
193
194 for (i = 0; i < retries; i++) {
195 val = cg_read_key_long(cgroup, control, key);
196 if (val < 0)
197 return val;
198
199 if (val == expected)
200 break;
201
202 usleep(wait_interval_us);
203 }
204
205 return val;
206 }
207
cg_read_lc(const char * cgroup,const char * control)208 long cg_read_lc(const char *cgroup, const char *control)
209 {
210 char buf[PAGE_SIZE];
211 const char delim[] = "\n";
212 char *line;
213 long cnt = 0;
214
215 if (cg_read(cgroup, control, buf, sizeof(buf)))
216 return -1;
217
218 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
219 cnt++;
220
221 return cnt;
222 }
223
224 /* Returns 0 on success, or -errno on failure. */
cg_write(const char * cgroup,const char * control,char * buf)225 int cg_write(const char *cgroup, const char *control, char *buf)
226 {
227 char path[PATH_MAX];
228 ssize_t len = strlen(buf), ret;
229
230 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
231 ret = write_text(path, buf, len);
232 return ret == len ? 0 : ret;
233 }
234
235 /*
236 * Returns fd on success, or -1 on failure.
237 * (fd should be closed with close() as usual)
238 */
cg_open(const char * cgroup,const char * control,int flags)239 int cg_open(const char *cgroup, const char *control, int flags)
240 {
241 char path[PATH_MAX];
242
243 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
244 return open(path, flags);
245 }
246
cg_write_numeric(const char * cgroup,const char * control,long value)247 int cg_write_numeric(const char *cgroup, const char *control, long value)
248 {
249 char buf[64];
250 int ret;
251
252 ret = sprintf(buf, "%lu", value);
253 if (ret < 0)
254 return ret;
255
256 return cg_write(cgroup, control, buf);
257 }
258
cg_find_root(char * root,size_t len,const char * controller,bool * nsdelegate)259 static int cg_find_root(char *root, size_t len, const char *controller,
260 bool *nsdelegate)
261 {
262 char buf[10 * PAGE_SIZE];
263 char *fs, *mount, *type, *options;
264 const char delim[] = "\n\t ";
265
266 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
267 return -1;
268
269 /*
270 * Example:
271 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
272 */
273 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
274 mount = strtok(NULL, delim);
275 type = strtok(NULL, delim);
276 options = strtok(NULL, delim);
277 strtok(NULL, delim);
278 strtok(NULL, delim);
279 if (strcmp(type, "cgroup") == 0) {
280 if (!controller || !strstr(options, controller))
281 continue;
282 } else if (strcmp(type, "cgroup2") == 0) {
283 if (controller &&
284 cg_read_strstr(mount, "cgroup.controllers", controller))
285 continue;
286 } else {
287 continue;
288 }
289 strncpy(root, mount, len);
290
291 if (nsdelegate)
292 *nsdelegate = !!strstr(options, "nsdelegate");
293 return 0;
294
295 }
296
297 return -1;
298 }
299
cg_find_controller_root(char * root,size_t len,const char * controller)300 int cg_find_controller_root(char *root, size_t len, const char *controller)
301 {
302 return cg_find_root(root, len, controller, NULL);
303 }
304
cg_find_unified_root(char * root,size_t len,bool * nsdelegate)305 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
306 {
307 return cg_find_root(root, len, NULL, nsdelegate);
308 }
309
cg_create(const char * cgroup)310 int cg_create(const char *cgroup)
311 {
312 return mkdir(cgroup, 0755);
313 }
314
cg_wait_for_proc_count(const char * cgroup,int count)315 int cg_wait_for_proc_count(const char *cgroup, int count)
316 {
317 char buf[10 * PAGE_SIZE] = {0};
318 int attempts;
319 char *ptr;
320
321 for (attempts = 10; attempts >= 0; attempts--) {
322 int nr = 0;
323
324 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
325 break;
326
327 for (ptr = buf; *ptr; ptr++)
328 if (*ptr == '\n')
329 nr++;
330
331 if (nr >= count)
332 return 0;
333
334 usleep(100000);
335 }
336
337 return -1;
338 }
339
cg_killall(const char * cgroup)340 int cg_killall(const char *cgroup)
341 {
342 char buf[PAGE_SIZE];
343 char *ptr = buf;
344
345 /* If cgroup.kill exists use it. */
346 if (!cg_write(cgroup, "cgroup.kill", "1"))
347 return 0;
348
349 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
350 return -1;
351
352 while (ptr < buf + sizeof(buf)) {
353 int pid = strtol(ptr, &ptr, 10);
354
355 if (pid == 0)
356 break;
357 if (*ptr)
358 ptr++;
359 else
360 break;
361 if (kill(pid, SIGKILL))
362 return -1;
363 }
364
365 return 0;
366 }
367
cg_destroy(const char * cgroup)368 int cg_destroy(const char *cgroup)
369 {
370 int ret;
371
372 if (!cgroup)
373 return 0;
374 retry:
375 ret = rmdir(cgroup);
376 if (ret && errno == EBUSY) {
377 cg_killall(cgroup);
378 usleep(100);
379 goto retry;
380 }
381
382 if (ret && errno == ENOENT)
383 ret = 0;
384
385 return ret;
386 }
387
cg_enter(const char * cgroup,int pid)388 int cg_enter(const char *cgroup, int pid)
389 {
390 char pidbuf[64];
391
392 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
393 return cg_write(cgroup, "cgroup.procs", pidbuf);
394 }
395
cg_enter_current(const char * cgroup)396 int cg_enter_current(const char *cgroup)
397 {
398 return cg_write(cgroup, "cgroup.procs", "0");
399 }
400
cg_enter_current_thread(const char * cgroup)401 int cg_enter_current_thread(const char *cgroup)
402 {
403 return cg_write(cgroup, CG_THREADS_FILE, "0");
404 }
405
cg_run(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)406 int cg_run(const char *cgroup,
407 int (*fn)(const char *cgroup, void *arg),
408 void *arg)
409 {
410 int pid, retcode;
411
412 pid = fork();
413 if (pid < 0) {
414 return pid;
415 } else if (pid == 0) {
416 char buf[64];
417
418 snprintf(buf, sizeof(buf), "%d", getpid());
419 if (cg_write(cgroup, "cgroup.procs", buf))
420 exit(EXIT_FAILURE);
421 exit(fn(cgroup, arg));
422 } else {
423 waitpid(pid, &retcode, 0);
424 if (WIFEXITED(retcode))
425 return WEXITSTATUS(retcode);
426 else
427 return -1;
428 }
429 }
430
clone_into_cgroup(int cgroup_fd)431 pid_t clone_into_cgroup(int cgroup_fd)
432 {
433 #ifdef CLONE_ARGS_SIZE_VER2
434 pid_t pid;
435
436 struct __clone_args args = {
437 .flags = CLONE_INTO_CGROUP,
438 .exit_signal = SIGCHLD,
439 .cgroup = cgroup_fd,
440 };
441
442 pid = sys_clone3(&args, sizeof(struct __clone_args));
443 /*
444 * Verify that this is a genuine test failure:
445 * ENOSYS -> clone3() not available
446 * E2BIG -> CLONE_INTO_CGROUP not available
447 */
448 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
449 goto pretend_enosys;
450
451 return pid;
452
453 pretend_enosys:
454 #endif
455 errno = ENOSYS;
456 return -ENOSYS;
457 }
458
clone_reap(pid_t pid,int options)459 int clone_reap(pid_t pid, int options)
460 {
461 int ret;
462 siginfo_t info = {
463 .si_signo = 0,
464 };
465
466 again:
467 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
468 if (ret < 0) {
469 if (errno == EINTR)
470 goto again;
471 return -1;
472 }
473
474 if (options & WEXITED) {
475 if (WIFEXITED(info.si_status))
476 return WEXITSTATUS(info.si_status);
477 }
478
479 if (options & WSTOPPED) {
480 if (WIFSTOPPED(info.si_status))
481 return WSTOPSIG(info.si_status);
482 }
483
484 if (options & WCONTINUED) {
485 if (WIFCONTINUED(info.si_status))
486 return 0;
487 }
488
489 return -1;
490 }
491
dirfd_open_opath(const char * dir)492 int dirfd_open_opath(const char *dir)
493 {
494 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
495 }
496
497 #define close_prot_errno(fd) \
498 if (fd >= 0) { \
499 int _e_ = errno; \
500 close(fd); \
501 errno = _e_; \
502 }
503
clone_into_cgroup_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)504 static int clone_into_cgroup_run_nowait(const char *cgroup,
505 int (*fn)(const char *cgroup, void *arg),
506 void *arg)
507 {
508 int cgroup_fd;
509 pid_t pid;
510
511 cgroup_fd = dirfd_open_opath(cgroup);
512 if (cgroup_fd < 0)
513 return -1;
514
515 pid = clone_into_cgroup(cgroup_fd);
516 close_prot_errno(cgroup_fd);
517 if (pid == 0)
518 exit(fn(cgroup, arg));
519
520 return pid;
521 }
522
cg_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)523 int cg_run_nowait(const char *cgroup,
524 int (*fn)(const char *cgroup, void *arg),
525 void *arg)
526 {
527 int pid;
528
529 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
530 if (pid > 0)
531 return pid;
532
533 /* Genuine test failure. */
534 if (pid < 0 && errno != ENOSYS)
535 return -1;
536
537 pid = fork();
538 if (pid == 0) {
539 char buf[64];
540
541 snprintf(buf, sizeof(buf), "%d", getpid());
542 if (cg_write(cgroup, "cgroup.procs", buf))
543 exit(EXIT_FAILURE);
544 exit(fn(cgroup, arg));
545 }
546
547 return pid;
548 }
549
proc_mount_contains(const char * option)550 int proc_mount_contains(const char *option)
551 {
552 char buf[4 * PAGE_SIZE];
553 ssize_t read;
554
555 read = read_text("/proc/mounts", buf, sizeof(buf));
556 if (read < 0)
557 return read;
558
559 return strstr(buf, option) != NULL;
560 }
561
cgroup_feature(const char * feature)562 int cgroup_feature(const char *feature)
563 {
564 char buf[PAGE_SIZE];
565 ssize_t read;
566
567 read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
568 if (read < 0)
569 return read;
570
571 return strstr(buf, feature) != NULL;
572 }
573
proc_read_text(int pid,bool thread,const char * item,char * buf,size_t size)574 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
575 {
576 char path[PATH_MAX];
577 ssize_t ret;
578
579 if (!pid)
580 snprintf(path, sizeof(path), "/proc/%s/%s",
581 thread ? "thread-self" : "self", item);
582 else
583 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
584
585 ret = read_text(path, buf, size);
586 return ret < 0 ? -1 : ret;
587 }
588
proc_read_strstr(int pid,bool thread,const char * item,const char * needle)589 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
590 {
591 char buf[PAGE_SIZE];
592
593 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
594 return -1;
595
596 return strstr(buf, needle) ? 0 : -1;
597 }
598
clone_into_cgroup_run_wait(const char * cgroup)599 int clone_into_cgroup_run_wait(const char *cgroup)
600 {
601 int cgroup_fd;
602 pid_t pid;
603
604 cgroup_fd = dirfd_open_opath(cgroup);
605 if (cgroup_fd < 0)
606 return -1;
607
608 pid = clone_into_cgroup(cgroup_fd);
609 close_prot_errno(cgroup_fd);
610 if (pid < 0)
611 return -1;
612
613 if (pid == 0)
614 exit(EXIT_SUCCESS);
615
616 /*
617 * We don't care whether this fails. We only care whether the initial
618 * clone succeeded.
619 */
620 (void)clone_reap(pid, WEXITED);
621 return 0;
622 }
623
__prepare_for_wait(const char * cgroup,const char * filename)624 static int __prepare_for_wait(const char *cgroup, const char *filename)
625 {
626 int fd, ret = -1;
627
628 fd = inotify_init1(0);
629 if (fd == -1)
630 return fd;
631
632 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
633 if (ret == -1) {
634 close(fd);
635 fd = -1;
636 }
637
638 return fd;
639 }
640
cg_prepare_for_wait(const char * cgroup)641 int cg_prepare_for_wait(const char *cgroup)
642 {
643 return __prepare_for_wait(cgroup, "cgroup.events");
644 }
645
memcg_prepare_for_wait(const char * cgroup)646 int memcg_prepare_for_wait(const char *cgroup)
647 {
648 return __prepare_for_wait(cgroup, "memory.events");
649 }
650
cg_wait_for(int fd)651 int cg_wait_for(int fd)
652 {
653 int ret = -1;
654 struct pollfd fds = {
655 .fd = fd,
656 .events = POLLIN,
657 };
658
659 while (true) {
660 ret = poll(&fds, 1, 10000);
661
662 if (ret == -1) {
663 if (errno == EINTR)
664 continue;
665
666 break;
667 }
668
669 if (ret > 0 && fds.revents & POLLIN) {
670 ret = 0;
671 break;
672 }
673 }
674
675 return ret;
676 }
677