1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <test_progs.h>
5 #include <bpf/btf.h>
6 #include "cap_helpers.h"
7 #include <fcntl.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <unistd.h>
11 #include <linux/filter.h>
12 #include <linux/unistd.h>
13 #include <linux/mount.h>
14 #include <sys/socket.h>
15 #include <sys/stat.h>
16 #include <sys/syscall.h>
17 #include <sys/un.h>
18 #include "priv_map.skel.h"
19 #include "priv_prog.skel.h"
20 #include "dummy_st_ops_success.skel.h"
21 #include "token_lsm.skel.h"
22
sys_mount(const char * dev_name,const char * dir_name,const char * type,unsigned long flags,const void * data)23 static inline int sys_mount(const char *dev_name, const char *dir_name,
24 const char *type, unsigned long flags,
25 const void *data)
26 {
27 return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
28 }
29
sys_fsopen(const char * fsname,unsigned flags)30 static inline int sys_fsopen(const char *fsname, unsigned flags)
31 {
32 return syscall(__NR_fsopen, fsname, flags);
33 }
34
sys_fspick(int dfd,const char * path,unsigned flags)35 static inline int sys_fspick(int dfd, const char *path, unsigned flags)
36 {
37 return syscall(__NR_fspick, dfd, path, flags);
38 }
39
sys_fsconfig(int fs_fd,unsigned cmd,const char * key,const void * val,int aux)40 static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
41 {
42 return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
43 }
44
sys_fsmount(int fs_fd,unsigned flags,unsigned ms_flags)45 static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
46 {
47 return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
48 }
49
sys_move_mount(int from_dfd,const char * from_path,int to_dfd,const char * to_path,unsigned flags)50 static inline int sys_move_mount(int from_dfd, const char *from_path,
51 int to_dfd, const char *to_path,
52 unsigned flags)
53 {
54 return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
55 }
56
drop_priv_caps(__u64 * old_caps)57 static int drop_priv_caps(__u64 *old_caps)
58 {
59 return cap_disable_effective((1ULL << CAP_BPF) |
60 (1ULL << CAP_PERFMON) |
61 (1ULL << CAP_NET_ADMIN) |
62 (1ULL << CAP_SYS_ADMIN), old_caps);
63 }
64
restore_priv_caps(__u64 old_caps)65 static int restore_priv_caps(__u64 old_caps)
66 {
67 return cap_enable_effective(old_caps, NULL);
68 }
69
set_delegate_mask(int fs_fd,const char * key,__u64 mask,const char * mask_str)70 static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
71 {
72 char buf[32];
73 int err;
74
75 if (!mask_str) {
76 if (mask == ~0ULL) {
77 mask_str = "any";
78 } else {
79 snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
80 mask_str = buf;
81 }
82 }
83
84 err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
85 mask_str, 0);
86 if (err < 0)
87 err = -errno;
88 return err;
89 }
90
91 #define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
92
93 struct bpffs_opts {
94 __u64 cmds;
95 __u64 maps;
96 __u64 progs;
97 __u64 attachs;
98 const char *cmds_str;
99 const char *maps_str;
100 const char *progs_str;
101 const char *attachs_str;
102 };
103
create_bpffs_fd(void)104 static int create_bpffs_fd(void)
105 {
106 int fs_fd;
107
108 /* create VFS context */
109 fs_fd = sys_fsopen("bpf", 0);
110 ASSERT_GE(fs_fd, 0, "fs_fd");
111
112 return fs_fd;
113 }
114
materialize_bpffs_fd(int fs_fd,struct bpffs_opts * opts)115 static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
116 {
117 int mnt_fd, err;
118
119 /* set up token delegation mount options */
120 err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
121 if (!ASSERT_OK(err, "fs_cfg_cmds"))
122 return err;
123 err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
124 if (!ASSERT_OK(err, "fs_cfg_maps"))
125 return err;
126 err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
127 if (!ASSERT_OK(err, "fs_cfg_progs"))
128 return err;
129 err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
130 if (!ASSERT_OK(err, "fs_cfg_attachs"))
131 return err;
132
133 /* instantiate FS object */
134 err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
135 if (err < 0)
136 return -errno;
137
138 /* create O_PATH fd for detached mount */
139 mnt_fd = sys_fsmount(fs_fd, 0, 0);
140 if (err < 0)
141 return -errno;
142
143 return mnt_fd;
144 }
145
146 /* send FD over Unix domain (AF_UNIX) socket */
sendfd(int sockfd,int fd)147 static int sendfd(int sockfd, int fd)
148 {
149 struct msghdr msg = {};
150 struct cmsghdr *cmsg;
151 int fds[1] = { fd }, err;
152 char iobuf[1];
153 struct iovec io = {
154 .iov_base = iobuf,
155 .iov_len = sizeof(iobuf),
156 };
157 union {
158 char buf[CMSG_SPACE(sizeof(fds))];
159 struct cmsghdr align;
160 } u;
161
162 msg.msg_iov = &io;
163 msg.msg_iovlen = 1;
164 msg.msg_control = u.buf;
165 msg.msg_controllen = sizeof(u.buf);
166 cmsg = CMSG_FIRSTHDR(&msg);
167 cmsg->cmsg_level = SOL_SOCKET;
168 cmsg->cmsg_type = SCM_RIGHTS;
169 cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
170 memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
171
172 err = sendmsg(sockfd, &msg, 0);
173 if (err < 0)
174 err = -errno;
175 if (!ASSERT_EQ(err, 1, "sendmsg"))
176 return -EINVAL;
177
178 return 0;
179 }
180
181 /* receive FD over Unix domain (AF_UNIX) socket */
recvfd(int sockfd,int * fd)182 static int recvfd(int sockfd, int *fd)
183 {
184 struct msghdr msg = {};
185 struct cmsghdr *cmsg;
186 int fds[1], err;
187 char iobuf[1];
188 struct iovec io = {
189 .iov_base = iobuf,
190 .iov_len = sizeof(iobuf),
191 };
192 union {
193 char buf[CMSG_SPACE(sizeof(fds))];
194 struct cmsghdr align;
195 } u;
196
197 msg.msg_iov = &io;
198 msg.msg_iovlen = 1;
199 msg.msg_control = u.buf;
200 msg.msg_controllen = sizeof(u.buf);
201
202 err = recvmsg(sockfd, &msg, 0);
203 if (err < 0)
204 err = -errno;
205 if (!ASSERT_EQ(err, 1, "recvmsg"))
206 return -EINVAL;
207
208 cmsg = CMSG_FIRSTHDR(&msg);
209 if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
210 !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
211 !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
212 !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
213 return -EINVAL;
214
215 memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
216 *fd = fds[0];
217
218 return 0;
219 }
220
write_nointr(int fd,const void * buf,size_t count)221 static ssize_t write_nointr(int fd, const void *buf, size_t count)
222 {
223 ssize_t ret;
224
225 do {
226 ret = write(fd, buf, count);
227 } while (ret < 0 && errno == EINTR);
228
229 return ret;
230 }
231
write_file(const char * path,const void * buf,size_t count)232 static int write_file(const char *path, const void *buf, size_t count)
233 {
234 int fd;
235 ssize_t ret;
236
237 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
238 if (fd < 0)
239 return -1;
240
241 ret = write_nointr(fd, buf, count);
242 close(fd);
243 if (ret < 0 || (size_t)ret != count)
244 return -1;
245
246 return 0;
247 }
248
create_and_enter_userns(void)249 static int create_and_enter_userns(void)
250 {
251 uid_t uid;
252 gid_t gid;
253 char map[100];
254
255 uid = getuid();
256 gid = getgid();
257
258 if (unshare(CLONE_NEWUSER))
259 return -1;
260
261 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
262 errno != ENOENT)
263 return -1;
264
265 snprintf(map, sizeof(map), "0 %d 1", uid);
266 if (write_file("/proc/self/uid_map", map, strlen(map)))
267 return -1;
268
269
270 snprintf(map, sizeof(map), "0 %d 1", gid);
271 if (write_file("/proc/self/gid_map", map, strlen(map)))
272 return -1;
273
274 if (setgid(0))
275 return -1;
276
277 if (setuid(0))
278 return -1;
279
280 return 0;
281 }
282
283 typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
284
child(int sock_fd,struct bpffs_opts * opts,child_callback_fn callback)285 static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
286 {
287 int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
288 struct token_lsm *lsm_skel = NULL;
289
290 /* load and attach LSM "policy" before we go into unpriv userns */
291 lsm_skel = token_lsm__open_and_load();
292 if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
293 err = -EINVAL;
294 goto cleanup;
295 }
296 lsm_skel->bss->my_pid = getpid();
297 err = token_lsm__attach(lsm_skel);
298 if (!ASSERT_OK(err, "lsm_skel_attach"))
299 goto cleanup;
300
301 /* setup userns with root mappings */
302 err = create_and_enter_userns();
303 if (!ASSERT_OK(err, "create_and_enter_userns"))
304 goto cleanup;
305
306 /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
307 err = unshare(CLONE_NEWNS);
308 if (!ASSERT_OK(err, "create_mountns"))
309 goto cleanup;
310
311 err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
312 if (!ASSERT_OK(err, "remount_root"))
313 goto cleanup;
314
315 fs_fd = create_bpffs_fd();
316 if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
317 err = -EINVAL;
318 goto cleanup;
319 }
320
321 /* ensure unprivileged child cannot set delegation options */
322 err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
323 ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
324 err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
325 ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
326 err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
327 ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
328 err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
329 ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
330
331 /* pass BPF FS context object to parent */
332 err = sendfd(sock_fd, fs_fd);
333 if (!ASSERT_OK(err, "send_fs_fd"))
334 goto cleanup;
335 zclose(fs_fd);
336
337 /* avoid mucking around with mount namespaces and mounting at
338 * well-known path, just get detach-mounted BPF FS fd back from parent
339 */
340 err = recvfd(sock_fd, &mnt_fd);
341 if (!ASSERT_OK(err, "recv_mnt_fd"))
342 goto cleanup;
343
344 /* try to fspick() BPF FS and try to add some delegation options */
345 fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
346 if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
347 err = -EINVAL;
348 goto cleanup;
349 }
350
351 /* ensure unprivileged child cannot reconfigure to set delegation options */
352 err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
353 if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
354 err = -EINVAL;
355 goto cleanup;
356 }
357 err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
358 if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
359 err = -EINVAL;
360 goto cleanup;
361 }
362 err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
363 if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
364 err = -EINVAL;
365 goto cleanup;
366 }
367 err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
368 if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
369 err = -EINVAL;
370 goto cleanup;
371 }
372 zclose(fs_fd);
373
374 bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
375 if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
376 err = -EINVAL;
377 goto cleanup;
378 }
379
380 /* create BPF token FD and pass it to parent for some extra checks */
381 token_fd = bpf_token_create(bpffs_fd, NULL);
382 if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
383 err = -EINVAL;
384 goto cleanup;
385 }
386 err = sendfd(sock_fd, token_fd);
387 if (!ASSERT_OK(err, "send_token_fd"))
388 goto cleanup;
389 zclose(token_fd);
390
391 /* do custom test logic with customly set up BPF FS instance */
392 err = callback(bpffs_fd, lsm_skel);
393 if (!ASSERT_OK(err, "test_callback"))
394 goto cleanup;
395
396 err = 0;
397 cleanup:
398 zclose(sock_fd);
399 zclose(mnt_fd);
400 zclose(fs_fd);
401 zclose(bpffs_fd);
402 zclose(token_fd);
403
404 lsm_skel->bss->my_pid = 0;
405 token_lsm__destroy(lsm_skel);
406
407 exit(-err);
408 }
409
wait_for_pid(pid_t pid)410 static int wait_for_pid(pid_t pid)
411 {
412 int status, ret;
413
414 again:
415 ret = waitpid(pid, &status, 0);
416 if (ret == -1) {
417 if (errno == EINTR)
418 goto again;
419
420 return -1;
421 }
422
423 if (!WIFEXITED(status))
424 return -1;
425
426 return WEXITSTATUS(status);
427 }
428
parent(int child_pid,struct bpffs_opts * bpffs_opts,int sock_fd)429 static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
430 {
431 int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
432
433 err = recvfd(sock_fd, &fs_fd);
434 if (!ASSERT_OK(err, "recv_bpffs_fd"))
435 goto cleanup;
436
437 mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
438 if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
439 err = -EINVAL;
440 goto cleanup;
441 }
442 zclose(fs_fd);
443
444 /* pass BPF FS context object to parent */
445 err = sendfd(sock_fd, mnt_fd);
446 if (!ASSERT_OK(err, "send_mnt_fd"))
447 goto cleanup;
448 zclose(mnt_fd);
449
450 /* receive BPF token FD back from child for some extra tests */
451 err = recvfd(sock_fd, &token_fd);
452 if (!ASSERT_OK(err, "recv_token_fd"))
453 goto cleanup;
454
455 err = wait_for_pid(child_pid);
456 ASSERT_OK(err, "waitpid_child");
457
458 cleanup:
459 zclose(sock_fd);
460 zclose(fs_fd);
461 zclose(mnt_fd);
462 zclose(token_fd);
463
464 if (child_pid > 0)
465 (void)kill(child_pid, SIGKILL);
466 }
467
subtest_userns(struct bpffs_opts * bpffs_opts,child_callback_fn child_cb)468 static void subtest_userns(struct bpffs_opts *bpffs_opts,
469 child_callback_fn child_cb)
470 {
471 int sock_fds[2] = { -1, -1 };
472 int child_pid = 0, err;
473
474 err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
475 if (!ASSERT_OK(err, "socketpair"))
476 goto cleanup;
477
478 child_pid = fork();
479 if (!ASSERT_GE(child_pid, 0, "fork"))
480 goto cleanup;
481
482 if (child_pid == 0) {
483 zclose(sock_fds[0]);
484 return child(sock_fds[1], bpffs_opts, child_cb);
485
486 } else {
487 zclose(sock_fds[1]);
488 return parent(child_pid, bpffs_opts, sock_fds[0]);
489 }
490
491 cleanup:
492 zclose(sock_fds[0]);
493 zclose(sock_fds[1]);
494 if (child_pid > 0)
495 (void)kill(child_pid, SIGKILL);
496 }
497
userns_map_create(int mnt_fd,struct token_lsm * lsm_skel)498 static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
499 {
500 LIBBPF_OPTS(bpf_map_create_opts, map_opts);
501 int err, token_fd = -1, map_fd = -1;
502 __u64 old_caps = 0;
503
504 /* create BPF token from BPF FS mount */
505 token_fd = bpf_token_create(mnt_fd, NULL);
506 if (!ASSERT_GT(token_fd, 0, "token_create")) {
507 err = -EINVAL;
508 goto cleanup;
509 }
510
511 /* while inside non-init userns, we need both a BPF token *and*
512 * CAP_BPF inside current userns to create privileged map; let's test
513 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
514 */
515 err = drop_priv_caps(&old_caps);
516 if (!ASSERT_OK(err, "drop_caps"))
517 goto cleanup;
518
519 /* no token, no CAP_BPF -> fail */
520 map_opts.map_flags = 0;
521 map_opts.token_fd = 0;
522 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
523 if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
524 err = -EINVAL;
525 goto cleanup;
526 }
527
528 /* token without CAP_BPF -> fail */
529 map_opts.map_flags = BPF_F_TOKEN_FD;
530 map_opts.token_fd = token_fd;
531 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
532 if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
533 err = -EINVAL;
534 goto cleanup;
535 }
536
537 /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
538 err = restore_priv_caps(old_caps);
539 if (!ASSERT_OK(err, "restore_caps"))
540 goto cleanup;
541
542 /* CAP_BPF without token -> fail */
543 map_opts.map_flags = 0;
544 map_opts.token_fd = 0;
545 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
546 if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
547 err = -EINVAL;
548 goto cleanup;
549 }
550
551 /* finally, namespaced CAP_BPF + token -> success */
552 map_opts.map_flags = BPF_F_TOKEN_FD;
553 map_opts.token_fd = token_fd;
554 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
555 if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
556 err = -EINVAL;
557 goto cleanup;
558 }
559
560 cleanup:
561 zclose(token_fd);
562 zclose(map_fd);
563 return err;
564 }
565
userns_btf_load(int mnt_fd,struct token_lsm * lsm_skel)566 static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
567 {
568 LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
569 int err, token_fd = -1, btf_fd = -1;
570 const void *raw_btf_data;
571 struct btf *btf = NULL;
572 __u32 raw_btf_size;
573 __u64 old_caps = 0;
574
575 /* create BPF token from BPF FS mount */
576 token_fd = bpf_token_create(mnt_fd, NULL);
577 if (!ASSERT_GT(token_fd, 0, "token_create")) {
578 err = -EINVAL;
579 goto cleanup;
580 }
581
582 /* while inside non-init userns, we need both a BPF token *and*
583 * CAP_BPF inside current userns to create privileged map; let's test
584 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
585 */
586 err = drop_priv_caps(&old_caps);
587 if (!ASSERT_OK(err, "drop_caps"))
588 goto cleanup;
589
590 /* setup a trivial BTF data to load to the kernel */
591 btf = btf__new_empty();
592 if (!ASSERT_OK_PTR(btf, "empty_btf"))
593 goto cleanup;
594
595 ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
596
597 raw_btf_data = btf__raw_data(btf, &raw_btf_size);
598 if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
599 goto cleanup;
600
601 /* no token + no CAP_BPF -> failure */
602 btf_opts.btf_flags = 0;
603 btf_opts.token_fd = 0;
604 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
605 if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
606 goto cleanup;
607
608 /* token + no CAP_BPF -> failure */
609 btf_opts.btf_flags = BPF_F_TOKEN_FD;
610 btf_opts.token_fd = token_fd;
611 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
612 if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
613 goto cleanup;
614
615 /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
616 err = restore_priv_caps(old_caps);
617 if (!ASSERT_OK(err, "restore_caps"))
618 goto cleanup;
619
620 /* token + CAP_BPF -> success */
621 btf_opts.btf_flags = BPF_F_TOKEN_FD;
622 btf_opts.token_fd = token_fd;
623 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
624 if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
625 goto cleanup;
626
627 err = 0;
628 cleanup:
629 btf__free(btf);
630 zclose(btf_fd);
631 zclose(token_fd);
632 return err;
633 }
634
userns_prog_load(int mnt_fd,struct token_lsm * lsm_skel)635 static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
636 {
637 LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
638 int err, token_fd = -1, prog_fd = -1;
639 struct bpf_insn insns[] = {
640 /* bpf_jiffies64() requires CAP_BPF */
641 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
642 /* bpf_get_current_task() requires CAP_PERFMON */
643 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
644 /* r0 = 0; exit; */
645 BPF_MOV64_IMM(BPF_REG_0, 0),
646 BPF_EXIT_INSN(),
647 };
648 size_t insn_cnt = ARRAY_SIZE(insns);
649 __u64 old_caps = 0;
650
651 /* create BPF token from BPF FS mount */
652 token_fd = bpf_token_create(mnt_fd, NULL);
653 if (!ASSERT_GT(token_fd, 0, "token_create")) {
654 err = -EINVAL;
655 goto cleanup;
656 }
657
658 /* validate we can successfully load BPF program with token; this
659 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
660 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
661 * BPF token wired properly in a bunch of places in the kernel
662 */
663 prog_opts.prog_flags = BPF_F_TOKEN_FD;
664 prog_opts.token_fd = token_fd;
665 prog_opts.expected_attach_type = BPF_XDP;
666 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
667 insns, insn_cnt, &prog_opts);
668 if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
669 err = -EPERM;
670 goto cleanup;
671 }
672
673 /* no token + caps -> failure */
674 prog_opts.prog_flags = 0;
675 prog_opts.token_fd = 0;
676 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
677 insns, insn_cnt, &prog_opts);
678 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
679 err = -EPERM;
680 goto cleanup;
681 }
682
683 err = drop_priv_caps(&old_caps);
684 if (!ASSERT_OK(err, "drop_caps"))
685 goto cleanup;
686
687 /* no caps + token -> failure */
688 prog_opts.prog_flags = BPF_F_TOKEN_FD;
689 prog_opts.token_fd = token_fd;
690 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
691 insns, insn_cnt, &prog_opts);
692 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
693 err = -EPERM;
694 goto cleanup;
695 }
696
697 /* no caps + no token -> definitely a failure */
698 prog_opts.prog_flags = 0;
699 prog_opts.token_fd = 0;
700 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
701 insns, insn_cnt, &prog_opts);
702 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
703 err = -EPERM;
704 goto cleanup;
705 }
706
707 err = 0;
708 cleanup:
709 zclose(prog_fd);
710 zclose(token_fd);
711 return err;
712 }
713
userns_obj_priv_map(int mnt_fd,struct token_lsm * lsm_skel)714 static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
715 {
716 LIBBPF_OPTS(bpf_object_open_opts, opts);
717 char buf[256];
718 struct priv_map *skel;
719 int err;
720
721 skel = priv_map__open_and_load();
722 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
723 priv_map__destroy(skel);
724 return -EINVAL;
725 }
726
727 /* use bpf_token_path to provide BPF FS path */
728 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
729 opts.bpf_token_path = buf;
730 skel = priv_map__open_opts(&opts);
731 if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
732 return -EINVAL;
733
734 err = priv_map__load(skel);
735 priv_map__destroy(skel);
736 if (!ASSERT_OK(err, "obj_token_path_load"))
737 return -EINVAL;
738
739 return 0;
740 }
741
userns_obj_priv_prog(int mnt_fd,struct token_lsm * lsm_skel)742 static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
743 {
744 LIBBPF_OPTS(bpf_object_open_opts, opts);
745 char buf[256];
746 struct priv_prog *skel;
747 int err;
748
749 skel = priv_prog__open_and_load();
750 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
751 priv_prog__destroy(skel);
752 return -EINVAL;
753 }
754
755 /* use bpf_token_path to provide BPF FS path */
756 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
757 opts.bpf_token_path = buf;
758 skel = priv_prog__open_opts(&opts);
759 if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
760 return -EINVAL;
761 err = priv_prog__load(skel);
762 priv_prog__destroy(skel);
763 if (!ASSERT_OK(err, "obj_token_path_load"))
764 return -EINVAL;
765
766 /* provide BPF token, but reject bpf_token_capable() with LSM */
767 lsm_skel->bss->reject_capable = true;
768 lsm_skel->bss->reject_cmd = false;
769 skel = priv_prog__open_opts(&opts);
770 if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
771 return -EINVAL;
772 err = priv_prog__load(skel);
773 priv_prog__destroy(skel);
774 if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
775 return -EINVAL;
776
777 /* provide BPF token, but reject bpf_token_cmd() with LSM */
778 lsm_skel->bss->reject_capable = false;
779 lsm_skel->bss->reject_cmd = true;
780 skel = priv_prog__open_opts(&opts);
781 if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
782 return -EINVAL;
783 err = priv_prog__load(skel);
784 priv_prog__destroy(skel);
785 if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
786 return -EINVAL;
787
788 return 0;
789 }
790
791 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
792 * which should cause struct_ops application to fail, as BTF won't be uploaded
793 * into the kernel, even if STRUCT_OPS programs themselves are allowed
794 */
validate_struct_ops_load(int mnt_fd,bool expect_success)795 static int validate_struct_ops_load(int mnt_fd, bool expect_success)
796 {
797 LIBBPF_OPTS(bpf_object_open_opts, opts);
798 char buf[256];
799 struct dummy_st_ops_success *skel;
800 int err;
801
802 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
803 opts.bpf_token_path = buf;
804 skel = dummy_st_ops_success__open_opts(&opts);
805 if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
806 return -EINVAL;
807
808 err = dummy_st_ops_success__load(skel);
809 dummy_st_ops_success__destroy(skel);
810 if (expect_success) {
811 if (!ASSERT_OK(err, "obj_token_path_load"))
812 return -EINVAL;
813 } else /* expect failure */ {
814 if (!ASSERT_ERR(err, "obj_token_path_load"))
815 return -EINVAL;
816 }
817
818 return 0;
819 }
820
userns_obj_priv_btf_fail(int mnt_fd,struct token_lsm * lsm_skel)821 static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
822 {
823 return validate_struct_ops_load(mnt_fd, false /* should fail */);
824 }
825
userns_obj_priv_btf_success(int mnt_fd,struct token_lsm * lsm_skel)826 static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
827 {
828 return validate_struct_ops_load(mnt_fd, true /* should succeed */);
829 }
830
831 #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
832 #define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
833
userns_obj_priv_implicit_token(int mnt_fd,struct token_lsm * lsm_skel)834 static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
835 {
836 LIBBPF_OPTS(bpf_object_open_opts, opts);
837 struct dummy_st_ops_success *skel;
838 int err;
839
840 /* before we mount BPF FS with token delegation, struct_ops skeleton
841 * should fail to load
842 */
843 skel = dummy_st_ops_success__open_and_load();
844 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
845 dummy_st_ops_success__destroy(skel);
846 return -EINVAL;
847 }
848
849 /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
850 * token automatically and implicitly
851 */
852 err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
853 if (!ASSERT_OK(err, "move_mount_bpffs"))
854 return -EINVAL;
855
856 /* disable implicit BPF token creation by setting
857 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
858 */
859 err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
860 if (!ASSERT_OK(err, "setenv_token_path"))
861 return -EINVAL;
862 skel = dummy_st_ops_success__open_and_load();
863 if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
864 unsetenv(TOKEN_ENVVAR);
865 dummy_st_ops_success__destroy(skel);
866 return -EINVAL;
867 }
868 unsetenv(TOKEN_ENVVAR);
869
870 /* now the same struct_ops skeleton should succeed thanks to libbpf
871 * creating BPF token from /sys/fs/bpf mount point
872 */
873 skel = dummy_st_ops_success__open_and_load();
874 if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
875 return -EINVAL;
876
877 dummy_st_ops_success__destroy(skel);
878
879 /* now disable implicit token through empty bpf_token_path, should fail */
880 opts.bpf_token_path = "";
881 skel = dummy_st_ops_success__open_opts(&opts);
882 if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
883 return -EINVAL;
884
885 err = dummy_st_ops_success__load(skel);
886 dummy_st_ops_success__destroy(skel);
887 if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
888 return -EINVAL;
889
890 return 0;
891 }
892
userns_obj_priv_implicit_token_envvar(int mnt_fd,struct token_lsm * lsm_skel)893 static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
894 {
895 LIBBPF_OPTS(bpf_object_open_opts, opts);
896 struct dummy_st_ops_success *skel;
897 int err;
898
899 /* before we mount BPF FS with token delegation, struct_ops skeleton
900 * should fail to load
901 */
902 skel = dummy_st_ops_success__open_and_load();
903 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
904 dummy_st_ops_success__destroy(skel);
905 return -EINVAL;
906 }
907
908 /* mount custom BPF FS over custom location, so libbpf can't create
909 * BPF token implicitly, unless pointed to it through
910 * LIBBPF_BPF_TOKEN_PATH envvar
911 */
912 rmdir(TOKEN_BPFFS_CUSTOM);
913 if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
914 goto err_out;
915 err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
916 if (!ASSERT_OK(err, "move_mount_bpffs"))
917 goto err_out;
918
919 /* even though we have BPF FS with delegation, it's not at default
920 * /sys/fs/bpf location, so we still fail to load until envvar is set up
921 */
922 skel = dummy_st_ops_success__open_and_load();
923 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
924 dummy_st_ops_success__destroy(skel);
925 goto err_out;
926 }
927
928 err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
929 if (!ASSERT_OK(err, "setenv_token_path"))
930 goto err_out;
931
932 /* now the same struct_ops skeleton should succeed thanks to libbpf
933 * creating BPF token from custom mount point
934 */
935 skel = dummy_st_ops_success__open_and_load();
936 if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
937 goto err_out;
938
939 dummy_st_ops_success__destroy(skel);
940
941 /* now disable implicit token through empty bpf_token_path, envvar
942 * will be ignored, should fail
943 */
944 opts.bpf_token_path = "";
945 skel = dummy_st_ops_success__open_opts(&opts);
946 if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
947 goto err_out;
948
949 err = dummy_st_ops_success__load(skel);
950 dummy_st_ops_success__destroy(skel);
951 if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
952 goto err_out;
953
954 rmdir(TOKEN_BPFFS_CUSTOM);
955 unsetenv(TOKEN_ENVVAR);
956 return 0;
957 err_out:
958 rmdir(TOKEN_BPFFS_CUSTOM);
959 unsetenv(TOKEN_ENVVAR);
960 return -EINVAL;
961 }
962
963 #define bit(n) (1ULL << (n))
964
test_token(void)965 void test_token(void)
966 {
967 if (test__start_subtest("map_token")) {
968 struct bpffs_opts opts = {
969 .cmds_str = "map_create",
970 .maps_str = "stack",
971 };
972
973 subtest_userns(&opts, userns_map_create);
974 }
975 if (test__start_subtest("btf_token")) {
976 struct bpffs_opts opts = {
977 .cmds = 1ULL << BPF_BTF_LOAD,
978 };
979
980 subtest_userns(&opts, userns_btf_load);
981 }
982 if (test__start_subtest("prog_token")) {
983 struct bpffs_opts opts = {
984 .cmds_str = "PROG_LOAD",
985 .progs_str = "XDP",
986 .attachs_str = "xdp",
987 };
988
989 subtest_userns(&opts, userns_prog_load);
990 }
991 if (test__start_subtest("obj_priv_map")) {
992 struct bpffs_opts opts = {
993 .cmds = bit(BPF_MAP_CREATE),
994 .maps = bit(BPF_MAP_TYPE_QUEUE),
995 };
996
997 subtest_userns(&opts, userns_obj_priv_map);
998 }
999 if (test__start_subtest("obj_priv_prog")) {
1000 struct bpffs_opts opts = {
1001 .cmds = bit(BPF_PROG_LOAD),
1002 .progs = bit(BPF_PROG_TYPE_KPROBE),
1003 .attachs = ~0ULL,
1004 };
1005
1006 subtest_userns(&opts, userns_obj_priv_prog);
1007 }
1008 if (test__start_subtest("obj_priv_btf_fail")) {
1009 struct bpffs_opts opts = {
1010 /* disallow BTF loading */
1011 .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1012 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1013 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1014 .attachs = ~0ULL,
1015 };
1016
1017 subtest_userns(&opts, userns_obj_priv_btf_fail);
1018 }
1019 if (test__start_subtest("obj_priv_btf_success")) {
1020 struct bpffs_opts opts = {
1021 /* allow BTF loading */
1022 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1023 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1024 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1025 .attachs = ~0ULL,
1026 };
1027
1028 subtest_userns(&opts, userns_obj_priv_btf_success);
1029 }
1030 if (test__start_subtest("obj_priv_implicit_token")) {
1031 struct bpffs_opts opts = {
1032 /* allow BTF loading */
1033 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1034 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1035 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1036 .attachs = ~0ULL,
1037 };
1038
1039 subtest_userns(&opts, userns_obj_priv_implicit_token);
1040 }
1041 if (test__start_subtest("obj_priv_implicit_token_envvar")) {
1042 struct bpffs_opts opts = {
1043 /* allow BTF loading */
1044 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1045 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1046 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1047 .attachs = ~0ULL,
1048 };
1049
1050 subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
1051 }
1052 }
1053