xref: /linux/tools/testing/selftests/bpf/prog_tests/token.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <test_progs.h>
5 #include <bpf/btf.h>
6 #include "cap_helpers.h"
7 #include <fcntl.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <unistd.h>
11 #include <linux/filter.h>
12 #include <linux/unistd.h>
13 #include <linux/mount.h>
14 #include <sys/socket.h>
15 #include <sys/stat.h>
16 #include <sys/syscall.h>
17 #include <sys/un.h>
18 #include "priv_map.skel.h"
19 #include "priv_prog.skel.h"
20 #include "dummy_st_ops_success.skel.h"
21 #include "token_lsm.skel.h"
22 
23 static inline int sys_mount(const char *dev_name, const char *dir_name,
24 			    const char *type, unsigned long flags,
25 			    const void *data)
26 {
27 	return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
28 }
29 
30 static inline int sys_fsopen(const char *fsname, unsigned flags)
31 {
32 	return syscall(__NR_fsopen, fsname, flags);
33 }
34 
35 static inline int sys_fspick(int dfd, const char *path, unsigned flags)
36 {
37 	return syscall(__NR_fspick, dfd, path, flags);
38 }
39 
40 static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
41 {
42 	return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
43 }
44 
45 static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
46 {
47 	return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
48 }
49 
50 static inline int sys_move_mount(int from_dfd, const char *from_path,
51 				 int to_dfd, const char *to_path,
52 				 unsigned flags)
53 {
54 	return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
55 }
56 
57 static int drop_priv_caps(__u64 *old_caps)
58 {
59 	return cap_disable_effective((1ULL << CAP_BPF) |
60 				     (1ULL << CAP_PERFMON) |
61 				     (1ULL << CAP_NET_ADMIN) |
62 				     (1ULL << CAP_SYS_ADMIN), old_caps);
63 }
64 
65 static int restore_priv_caps(__u64 old_caps)
66 {
67 	return cap_enable_effective(old_caps, NULL);
68 }
69 
70 static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
71 {
72 	char buf[32];
73 	int err;
74 
75 	if (!mask_str) {
76 		if (mask == ~0ULL) {
77 			mask_str = "any";
78 		} else {
79 			snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
80 			mask_str = buf;
81 		}
82 	}
83 
84 	err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
85 			   mask_str, 0);
86 	if (err < 0)
87 		err = -errno;
88 	return err;
89 }
90 
91 #define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
92 
93 struct bpffs_opts {
94 	__u64 cmds;
95 	__u64 maps;
96 	__u64 progs;
97 	__u64 attachs;
98 	const char *cmds_str;
99 	const char *maps_str;
100 	const char *progs_str;
101 	const char *attachs_str;
102 };
103 
104 static int create_bpffs_fd(void)
105 {
106 	int fs_fd;
107 
108 	/* create VFS context */
109 	fs_fd = sys_fsopen("bpf", 0);
110 	ASSERT_GE(fs_fd, 0, "fs_fd");
111 
112 	return fs_fd;
113 }
114 
115 static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
116 {
117 	int mnt_fd, err;
118 
119 	/* set up token delegation mount options */
120 	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
121 	if (!ASSERT_OK(err, "fs_cfg_cmds"))
122 		return err;
123 	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
124 	if (!ASSERT_OK(err, "fs_cfg_maps"))
125 		return err;
126 	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
127 	if (!ASSERT_OK(err, "fs_cfg_progs"))
128 		return err;
129 	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
130 	if (!ASSERT_OK(err, "fs_cfg_attachs"))
131 		return err;
132 
133 	/* instantiate FS object */
134 	err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
135 	if (err < 0)
136 		return -errno;
137 
138 	/* create O_PATH fd for detached mount */
139 	mnt_fd = sys_fsmount(fs_fd, 0, 0);
140 	if (err < 0)
141 		return -errno;
142 
143 	return mnt_fd;
144 }
145 
146 /* send FD over Unix domain (AF_UNIX) socket */
147 static int sendfd(int sockfd, int fd)
148 {
149 	struct msghdr msg = {};
150 	struct cmsghdr *cmsg;
151 	int fds[1] = { fd }, err;
152 	char iobuf[1];
153 	struct iovec io = {
154 		.iov_base = iobuf,
155 		.iov_len = sizeof(iobuf),
156 	};
157 	union {
158 		char buf[CMSG_SPACE(sizeof(fds))];
159 		struct cmsghdr align;
160 	} u;
161 
162 	msg.msg_iov = &io;
163 	msg.msg_iovlen = 1;
164 	msg.msg_control = u.buf;
165 	msg.msg_controllen = sizeof(u.buf);
166 	cmsg = CMSG_FIRSTHDR(&msg);
167 	cmsg->cmsg_level = SOL_SOCKET;
168 	cmsg->cmsg_type = SCM_RIGHTS;
169 	cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
170 	memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
171 
172 	err = sendmsg(sockfd, &msg, 0);
173 	if (err < 0)
174 		err = -errno;
175 	if (!ASSERT_EQ(err, 1, "sendmsg"))
176 		return -EINVAL;
177 
178 	return 0;
179 }
180 
181 /* receive FD over Unix domain (AF_UNIX) socket */
182 static int recvfd(int sockfd, int *fd)
183 {
184 	struct msghdr msg = {};
185 	struct cmsghdr *cmsg;
186 	int fds[1], err;
187 	char iobuf[1];
188 	struct iovec io = {
189 		.iov_base = iobuf,
190 		.iov_len = sizeof(iobuf),
191 	};
192 	union {
193 		char buf[CMSG_SPACE(sizeof(fds))];
194 		struct cmsghdr align;
195 	} u;
196 
197 	msg.msg_iov = &io;
198 	msg.msg_iovlen = 1;
199 	msg.msg_control = u.buf;
200 	msg.msg_controllen = sizeof(u.buf);
201 
202 	err = recvmsg(sockfd, &msg, 0);
203 	if (err < 0)
204 		err = -errno;
205 	if (!ASSERT_EQ(err, 1, "recvmsg"))
206 		return -EINVAL;
207 
208 	cmsg = CMSG_FIRSTHDR(&msg);
209 	if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
210 	    !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
211 	    !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
212 	    !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
213 		return -EINVAL;
214 
215 	memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
216 	*fd = fds[0];
217 
218 	return 0;
219 }
220 
221 static ssize_t write_nointr(int fd, const void *buf, size_t count)
222 {
223 	ssize_t ret;
224 
225 	do {
226 		ret = write(fd, buf, count);
227 	} while (ret < 0 && errno == EINTR);
228 
229 	return ret;
230 }
231 
232 static int write_file(const char *path, const void *buf, size_t count)
233 {
234 	int fd;
235 	ssize_t ret;
236 
237 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
238 	if (fd < 0)
239 		return -1;
240 
241 	ret = write_nointr(fd, buf, count);
242 	close(fd);
243 	if (ret < 0 || (size_t)ret != count)
244 		return -1;
245 
246 	return 0;
247 }
248 
249 static int create_and_enter_userns(void)
250 {
251 	uid_t uid;
252 	gid_t gid;
253 	char map[100];
254 
255 	uid = getuid();
256 	gid = getgid();
257 
258 	if (unshare(CLONE_NEWUSER))
259 		return -1;
260 
261 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
262 	    errno != ENOENT)
263 		return -1;
264 
265 	snprintf(map, sizeof(map), "0 %d 1", uid);
266 	if (write_file("/proc/self/uid_map", map, strlen(map)))
267 		return -1;
268 
269 
270 	snprintf(map, sizeof(map), "0 %d 1", gid);
271 	if (write_file("/proc/self/gid_map", map, strlen(map)))
272 		return -1;
273 
274 	if (setgid(0))
275 		return -1;
276 
277 	if (setuid(0))
278 		return -1;
279 
280 	return 0;
281 }
282 
283 typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
284 
285 static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
286 {
287 	int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
288 	struct token_lsm *lsm_skel = NULL;
289 
290 	/* load and attach LSM "policy" before we go into unpriv userns */
291 	lsm_skel = token_lsm__open_and_load();
292 	if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
293 		err = -EINVAL;
294 		goto cleanup;
295 	}
296 	lsm_skel->bss->my_pid = getpid();
297 	err = token_lsm__attach(lsm_skel);
298 	if (!ASSERT_OK(err, "lsm_skel_attach"))
299 		goto cleanup;
300 
301 	/* setup userns with root mappings */
302 	err = create_and_enter_userns();
303 	if (!ASSERT_OK(err, "create_and_enter_userns"))
304 		goto cleanup;
305 
306 	/* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
307 	err = unshare(CLONE_NEWNS);
308 	if (!ASSERT_OK(err, "create_mountns"))
309 		goto cleanup;
310 
311 	err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
312 	if (!ASSERT_OK(err, "remount_root"))
313 		goto cleanup;
314 
315 	fs_fd = create_bpffs_fd();
316 	if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
317 		err = -EINVAL;
318 		goto cleanup;
319 	}
320 
321 	/* ensure unprivileged child cannot set delegation options */
322 	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
323 	ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
324 	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
325 	ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
326 	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
327 	ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
328 	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
329 	ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
330 
331 	/* pass BPF FS context object to parent */
332 	err = sendfd(sock_fd, fs_fd);
333 	if (!ASSERT_OK(err, "send_fs_fd"))
334 		goto cleanup;
335 	zclose(fs_fd);
336 
337 	/* avoid mucking around with mount namespaces and mounting at
338 	 * well-known path, just get detach-mounted BPF FS fd back from parent
339 	 */
340 	err = recvfd(sock_fd, &mnt_fd);
341 	if (!ASSERT_OK(err, "recv_mnt_fd"))
342 		goto cleanup;
343 
344 	/* try to fspick() BPF FS and try to add some delegation options */
345 	fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
346 	if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
347 		err = -EINVAL;
348 		goto cleanup;
349 	}
350 
351 	/* ensure unprivileged child cannot reconfigure to set delegation options */
352 	err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
353 	if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
354 		err = -EINVAL;
355 		goto cleanup;
356 	}
357 	err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
358 	if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
359 		err = -EINVAL;
360 		goto cleanup;
361 	}
362 	err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
363 	if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
364 		err = -EINVAL;
365 		goto cleanup;
366 	}
367 	err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
368 	if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
369 		err = -EINVAL;
370 		goto cleanup;
371 	}
372 	zclose(fs_fd);
373 
374 	bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
375 	if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
376 		err = -EINVAL;
377 		goto cleanup;
378 	}
379 
380 	/* create BPF token FD and pass it to parent for some extra checks */
381 	token_fd = bpf_token_create(bpffs_fd, NULL);
382 	if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
383 		err = -EINVAL;
384 		goto cleanup;
385 	}
386 	err = sendfd(sock_fd, token_fd);
387 	if (!ASSERT_OK(err, "send_token_fd"))
388 		goto cleanup;
389 	zclose(token_fd);
390 
391 	/* do custom test logic with customly set up BPF FS instance */
392 	err = callback(bpffs_fd, lsm_skel);
393 	if (!ASSERT_OK(err, "test_callback"))
394 		goto cleanup;
395 
396 	err = 0;
397 cleanup:
398 	zclose(sock_fd);
399 	zclose(mnt_fd);
400 	zclose(fs_fd);
401 	zclose(bpffs_fd);
402 	zclose(token_fd);
403 
404 	lsm_skel->bss->my_pid = 0;
405 	token_lsm__destroy(lsm_skel);
406 
407 	exit(-err);
408 }
409 
410 static int wait_for_pid(pid_t pid)
411 {
412 	int status, ret;
413 
414 again:
415 	ret = waitpid(pid, &status, 0);
416 	if (ret == -1) {
417 		if (errno == EINTR)
418 			goto again;
419 
420 		return -1;
421 	}
422 
423 	if (!WIFEXITED(status))
424 		return -1;
425 
426 	return WEXITSTATUS(status);
427 }
428 
429 static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
430 {
431 	int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
432 
433 	err = recvfd(sock_fd, &fs_fd);
434 	if (!ASSERT_OK(err, "recv_bpffs_fd"))
435 		goto cleanup;
436 
437 	mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
438 	if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
439 		err = -EINVAL;
440 		goto cleanup;
441 	}
442 	zclose(fs_fd);
443 
444 	/* pass BPF FS context object to parent */
445 	err = sendfd(sock_fd, mnt_fd);
446 	if (!ASSERT_OK(err, "send_mnt_fd"))
447 		goto cleanup;
448 	zclose(mnt_fd);
449 
450 	/* receive BPF token FD back from child for some extra tests */
451 	err = recvfd(sock_fd, &token_fd);
452 	if (!ASSERT_OK(err, "recv_token_fd"))
453 		goto cleanup;
454 
455 	err = wait_for_pid(child_pid);
456 	ASSERT_OK(err, "waitpid_child");
457 
458 cleanup:
459 	zclose(sock_fd);
460 	zclose(fs_fd);
461 	zclose(mnt_fd);
462 	zclose(token_fd);
463 
464 	if (child_pid > 0)
465 		(void)kill(child_pid, SIGKILL);
466 }
467 
468 static void subtest_userns(struct bpffs_opts *bpffs_opts,
469 			   child_callback_fn child_cb)
470 {
471 	int sock_fds[2] = { -1, -1 };
472 	int child_pid = 0, err;
473 
474 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
475 	if (!ASSERT_OK(err, "socketpair"))
476 		goto cleanup;
477 
478 	child_pid = fork();
479 	if (!ASSERT_GE(child_pid, 0, "fork"))
480 		goto cleanup;
481 
482 	if (child_pid == 0) {
483 		zclose(sock_fds[0]);
484 		return child(sock_fds[1], bpffs_opts, child_cb);
485 
486 	} else {
487 		zclose(sock_fds[1]);
488 		return parent(child_pid, bpffs_opts, sock_fds[0]);
489 	}
490 
491 cleanup:
492 	zclose(sock_fds[0]);
493 	zclose(sock_fds[1]);
494 	if (child_pid > 0)
495 		(void)kill(child_pid, SIGKILL);
496 }
497 
498 static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
499 {
500 	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
501 	int err, token_fd = -1, map_fd = -1;
502 	__u64 old_caps = 0;
503 
504 	/* create BPF token from BPF FS mount */
505 	token_fd = bpf_token_create(mnt_fd, NULL);
506 	if (!ASSERT_GT(token_fd, 0, "token_create")) {
507 		err = -EINVAL;
508 		goto cleanup;
509 	}
510 
511 	/* while inside non-init userns, we need both a BPF token *and*
512 	 * CAP_BPF inside current userns to create privileged map; let's test
513 	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
514 	 */
515 	err = drop_priv_caps(&old_caps);
516 	if (!ASSERT_OK(err, "drop_caps"))
517 		goto cleanup;
518 
519 	/* no token, no CAP_BPF -> fail */
520 	map_opts.map_flags = 0;
521 	map_opts.token_fd = 0;
522 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
523 	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
524 		err = -EINVAL;
525 		goto cleanup;
526 	}
527 
528 	/* token without CAP_BPF -> fail */
529 	map_opts.map_flags = BPF_F_TOKEN_FD;
530 	map_opts.token_fd = token_fd;
531 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
532 	if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
533 		err = -EINVAL;
534 		goto cleanup;
535 	}
536 
537 	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
538 	err = restore_priv_caps(old_caps);
539 	if (!ASSERT_OK(err, "restore_caps"))
540 		goto cleanup;
541 
542 	/* CAP_BPF without token -> fail */
543 	map_opts.map_flags = 0;
544 	map_opts.token_fd = 0;
545 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
546 	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
547 		err = -EINVAL;
548 		goto cleanup;
549 	}
550 
551 	/* finally, namespaced CAP_BPF + token -> success */
552 	map_opts.map_flags = BPF_F_TOKEN_FD;
553 	map_opts.token_fd = token_fd;
554 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
555 	if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
556 		err = -EINVAL;
557 		goto cleanup;
558 	}
559 
560 cleanup:
561 	zclose(token_fd);
562 	zclose(map_fd);
563 	return err;
564 }
565 
566 static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
567 {
568 	LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
569 	int err, token_fd = -1, btf_fd = -1;
570 	const void *raw_btf_data;
571 	struct btf *btf = NULL;
572 	__u32 raw_btf_size;
573 	__u64 old_caps = 0;
574 
575 	/* create BPF token from BPF FS mount */
576 	token_fd = bpf_token_create(mnt_fd, NULL);
577 	if (!ASSERT_GT(token_fd, 0, "token_create")) {
578 		err = -EINVAL;
579 		goto cleanup;
580 	}
581 
582 	/* while inside non-init userns, we need both a BPF token *and*
583 	 * CAP_BPF inside current userns to create privileged map; let's test
584 	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
585 	 */
586 	err = drop_priv_caps(&old_caps);
587 	if (!ASSERT_OK(err, "drop_caps"))
588 		goto cleanup;
589 
590 	/* setup a trivial BTF data to load to the kernel */
591 	btf = btf__new_empty();
592 	if (!ASSERT_OK_PTR(btf, "empty_btf"))
593 		goto cleanup;
594 
595 	ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
596 
597 	raw_btf_data = btf__raw_data(btf, &raw_btf_size);
598 	if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
599 		goto cleanup;
600 
601 	/* no token + no CAP_BPF -> failure */
602 	btf_opts.btf_flags = 0;
603 	btf_opts.token_fd = 0;
604 	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
605 	if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
606 		goto cleanup;
607 
608 	/* token + no CAP_BPF -> failure */
609 	btf_opts.btf_flags = BPF_F_TOKEN_FD;
610 	btf_opts.token_fd = token_fd;
611 	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
612 	if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
613 		goto cleanup;
614 
615 	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
616 	err = restore_priv_caps(old_caps);
617 	if (!ASSERT_OK(err, "restore_caps"))
618 		goto cleanup;
619 
620 	/* token + CAP_BPF -> success */
621 	btf_opts.btf_flags = BPF_F_TOKEN_FD;
622 	btf_opts.token_fd = token_fd;
623 	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
624 	if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
625 		goto cleanup;
626 
627 	err = 0;
628 cleanup:
629 	btf__free(btf);
630 	zclose(btf_fd);
631 	zclose(token_fd);
632 	return err;
633 }
634 
635 static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
636 {
637 	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
638 	int err, token_fd = -1, prog_fd = -1;
639 	struct bpf_insn insns[] = {
640 		/* bpf_jiffies64() requires CAP_BPF */
641 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
642 		/* bpf_get_current_task() requires CAP_PERFMON */
643 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
644 		/* r0 = 0; exit; */
645 		BPF_MOV64_IMM(BPF_REG_0, 0),
646 		BPF_EXIT_INSN(),
647 	};
648 	size_t insn_cnt = ARRAY_SIZE(insns);
649 	__u64 old_caps = 0;
650 
651 	/* create BPF token from BPF FS mount */
652 	token_fd = bpf_token_create(mnt_fd, NULL);
653 	if (!ASSERT_GT(token_fd, 0, "token_create")) {
654 		err = -EINVAL;
655 		goto cleanup;
656 	}
657 
658 	/* validate we can successfully load BPF program with token; this
659 	 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
660 	 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
661 	 * BPF token wired properly in a bunch of places in the kernel
662 	 */
663 	prog_opts.prog_flags = BPF_F_TOKEN_FD;
664 	prog_opts.token_fd = token_fd;
665 	prog_opts.expected_attach_type = BPF_XDP;
666 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
667 				insns, insn_cnt, &prog_opts);
668 	if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
669 		err = -EPERM;
670 		goto cleanup;
671 	}
672 
673 	/* no token + caps -> failure */
674 	prog_opts.prog_flags = 0;
675 	prog_opts.token_fd = 0;
676 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
677 				insns, insn_cnt, &prog_opts);
678 	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
679 		err = -EPERM;
680 		goto cleanup;
681 	}
682 
683 	err = drop_priv_caps(&old_caps);
684 	if (!ASSERT_OK(err, "drop_caps"))
685 		goto cleanup;
686 
687 	/* no caps + token -> failure */
688 	prog_opts.prog_flags = BPF_F_TOKEN_FD;
689 	prog_opts.token_fd = token_fd;
690 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
691 				insns, insn_cnt, &prog_opts);
692 	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
693 		err = -EPERM;
694 		goto cleanup;
695 	}
696 
697 	/* no caps + no token -> definitely a failure */
698 	prog_opts.prog_flags = 0;
699 	prog_opts.token_fd = 0;
700 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
701 				insns, insn_cnt, &prog_opts);
702 	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
703 		err = -EPERM;
704 		goto cleanup;
705 	}
706 
707 	err = 0;
708 cleanup:
709 	zclose(prog_fd);
710 	zclose(token_fd);
711 	return err;
712 }
713 
714 static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
715 {
716 	LIBBPF_OPTS(bpf_object_open_opts, opts);
717 	char buf[256];
718 	struct priv_map *skel;
719 	int err;
720 
721 	skel = priv_map__open_and_load();
722 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
723 		priv_map__destroy(skel);
724 		return -EINVAL;
725 	}
726 
727 	/* use bpf_token_path to provide BPF FS path */
728 	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
729 	opts.bpf_token_path = buf;
730 	skel = priv_map__open_opts(&opts);
731 	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
732 		return -EINVAL;
733 
734 	err = priv_map__load(skel);
735 	priv_map__destroy(skel);
736 	if (!ASSERT_OK(err, "obj_token_path_load"))
737 		return -EINVAL;
738 
739 	return 0;
740 }
741 
742 static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
743 {
744 	LIBBPF_OPTS(bpf_object_open_opts, opts);
745 	char buf[256];
746 	struct priv_prog *skel;
747 	int err;
748 
749 	skel = priv_prog__open_and_load();
750 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
751 		priv_prog__destroy(skel);
752 		return -EINVAL;
753 	}
754 
755 	/* use bpf_token_path to provide BPF FS path */
756 	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
757 	opts.bpf_token_path = buf;
758 	skel = priv_prog__open_opts(&opts);
759 	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
760 		return -EINVAL;
761 	err = priv_prog__load(skel);
762 	priv_prog__destroy(skel);
763 	if (!ASSERT_OK(err, "obj_token_path_load"))
764 		return -EINVAL;
765 
766 	/* provide BPF token, but reject bpf_token_capable() with LSM */
767 	lsm_skel->bss->reject_capable = true;
768 	lsm_skel->bss->reject_cmd = false;
769 	skel = priv_prog__open_opts(&opts);
770 	if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
771 		return -EINVAL;
772 	err = priv_prog__load(skel);
773 	priv_prog__destroy(skel);
774 	if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
775 		return -EINVAL;
776 
777 	/* provide BPF token, but reject bpf_token_cmd() with LSM */
778 	lsm_skel->bss->reject_capable = false;
779 	lsm_skel->bss->reject_cmd = true;
780 	skel = priv_prog__open_opts(&opts);
781 	if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
782 		return -EINVAL;
783 	err = priv_prog__load(skel);
784 	priv_prog__destroy(skel);
785 	if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
786 		return -EINVAL;
787 
788 	return 0;
789 }
790 
791 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
792  * which should cause struct_ops application to fail, as BTF won't be uploaded
793  * into the kernel, even if STRUCT_OPS programs themselves are allowed
794  */
795 static int validate_struct_ops_load(int mnt_fd, bool expect_success)
796 {
797 	LIBBPF_OPTS(bpf_object_open_opts, opts);
798 	char buf[256];
799 	struct dummy_st_ops_success *skel;
800 	int err;
801 
802 	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
803 	opts.bpf_token_path = buf;
804 	skel = dummy_st_ops_success__open_opts(&opts);
805 	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
806 		return -EINVAL;
807 
808 	err = dummy_st_ops_success__load(skel);
809 	dummy_st_ops_success__destroy(skel);
810 	if (expect_success) {
811 		if (!ASSERT_OK(err, "obj_token_path_load"))
812 			return -EINVAL;
813 	} else /* expect failure */ {
814 		if (!ASSERT_ERR(err, "obj_token_path_load"))
815 			return -EINVAL;
816 	}
817 
818 	return 0;
819 }
820 
821 static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
822 {
823 	return validate_struct_ops_load(mnt_fd, false /* should fail */);
824 }
825 
826 static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
827 {
828 	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
829 }
830 
831 static const char *token_bpffs_custom_dir()
832 {
833 	return getenv("BPF_SELFTESTS_BPF_TOKEN_DIR") ?: "/tmp/bpf-token-fs";
834 }
835 
836 #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
837 
838 static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
839 {
840 	LIBBPF_OPTS(bpf_object_open_opts, opts);
841 	struct dummy_st_ops_success *skel;
842 	int err;
843 
844 	/* before we mount BPF FS with token delegation, struct_ops skeleton
845 	 * should fail to load
846 	 */
847 	skel = dummy_st_ops_success__open_and_load();
848 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
849 		dummy_st_ops_success__destroy(skel);
850 		return -EINVAL;
851 	}
852 
853 	/* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
854 	 * token automatically and implicitly
855 	 */
856 	err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
857 	if (!ASSERT_OK(err, "move_mount_bpffs"))
858 		return -EINVAL;
859 
860 	/* disable implicit BPF token creation by setting
861 	 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
862 	 */
863 	err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
864 	if (!ASSERT_OK(err, "setenv_token_path"))
865 		return -EINVAL;
866 	skel = dummy_st_ops_success__open_and_load();
867 	if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
868 		unsetenv(TOKEN_ENVVAR);
869 		dummy_st_ops_success__destroy(skel);
870 		return -EINVAL;
871 	}
872 	unsetenv(TOKEN_ENVVAR);
873 
874 	/* now the same struct_ops skeleton should succeed thanks to libbpf
875 	 * creating BPF token from /sys/fs/bpf mount point
876 	 */
877 	skel = dummy_st_ops_success__open_and_load();
878 	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
879 		return -EINVAL;
880 
881 	dummy_st_ops_success__destroy(skel);
882 
883 	/* now disable implicit token through empty bpf_token_path, should fail */
884 	opts.bpf_token_path = "";
885 	skel = dummy_st_ops_success__open_opts(&opts);
886 	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
887 		return -EINVAL;
888 
889 	err = dummy_st_ops_success__load(skel);
890 	dummy_st_ops_success__destroy(skel);
891 	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
892 		return -EINVAL;
893 
894 	return 0;
895 }
896 
897 static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
898 {
899 	const char *custom_dir = token_bpffs_custom_dir();
900 	LIBBPF_OPTS(bpf_object_open_opts, opts);
901 	struct dummy_st_ops_success *skel;
902 	int err;
903 
904 	/* before we mount BPF FS with token delegation, struct_ops skeleton
905 	 * should fail to load
906 	 */
907 	skel = dummy_st_ops_success__open_and_load();
908 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
909 		dummy_st_ops_success__destroy(skel);
910 		return -EINVAL;
911 	}
912 
913 	/* mount custom BPF FS over custom location, so libbpf can't create
914 	 * BPF token implicitly, unless pointed to it through
915 	 * LIBBPF_BPF_TOKEN_PATH envvar
916 	 */
917 	rmdir(custom_dir);
918 	if (!ASSERT_OK(mkdir(custom_dir, 0777), "mkdir_bpffs_custom"))
919 		goto err_out;
920 	err = sys_move_mount(mnt_fd, "", AT_FDCWD, custom_dir, MOVE_MOUNT_F_EMPTY_PATH);
921 	if (!ASSERT_OK(err, "move_mount_bpffs"))
922 		goto err_out;
923 
924 	/* even though we have BPF FS with delegation, it's not at default
925 	 * /sys/fs/bpf location, so we still fail to load until envvar is set up
926 	 */
927 	skel = dummy_st_ops_success__open_and_load();
928 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
929 		dummy_st_ops_success__destroy(skel);
930 		goto err_out;
931 	}
932 
933 	err = setenv(TOKEN_ENVVAR, custom_dir, 1 /*overwrite*/);
934 	if (!ASSERT_OK(err, "setenv_token_path"))
935 		goto err_out;
936 
937 	/* now the same struct_ops skeleton should succeed thanks to libbpf
938 	 * creating BPF token from custom mount point
939 	 */
940 	skel = dummy_st_ops_success__open_and_load();
941 	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
942 		goto err_out;
943 
944 	dummy_st_ops_success__destroy(skel);
945 
946 	/* now disable implicit token through empty bpf_token_path, envvar
947 	 * will be ignored, should fail
948 	 */
949 	opts.bpf_token_path = "";
950 	skel = dummy_st_ops_success__open_opts(&opts);
951 	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
952 		goto err_out;
953 
954 	err = dummy_st_ops_success__load(skel);
955 	dummy_st_ops_success__destroy(skel);
956 	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
957 		goto err_out;
958 
959 	rmdir(custom_dir);
960 	unsetenv(TOKEN_ENVVAR);
961 	return 0;
962 err_out:
963 	rmdir(custom_dir);
964 	unsetenv(TOKEN_ENVVAR);
965 	return -EINVAL;
966 }
967 
968 #define bit(n) (1ULL << (n))
969 
970 void test_token(void)
971 {
972 	if (test__start_subtest("map_token")) {
973 		struct bpffs_opts opts = {
974 			.cmds_str = "map_create",
975 			.maps_str = "stack",
976 		};
977 
978 		subtest_userns(&opts, userns_map_create);
979 	}
980 	if (test__start_subtest("btf_token")) {
981 		struct bpffs_opts opts = {
982 			.cmds = 1ULL << BPF_BTF_LOAD,
983 		};
984 
985 		subtest_userns(&opts, userns_btf_load);
986 	}
987 	if (test__start_subtest("prog_token")) {
988 		struct bpffs_opts opts = {
989 			.cmds_str = "PROG_LOAD",
990 			.progs_str = "XDP",
991 			.attachs_str = "xdp",
992 		};
993 
994 		subtest_userns(&opts, userns_prog_load);
995 	}
996 	if (test__start_subtest("obj_priv_map")) {
997 		struct bpffs_opts opts = {
998 			.cmds = bit(BPF_MAP_CREATE),
999 			.maps = bit(BPF_MAP_TYPE_QUEUE),
1000 		};
1001 
1002 		subtest_userns(&opts, userns_obj_priv_map);
1003 	}
1004 	if (test__start_subtest("obj_priv_prog")) {
1005 		struct bpffs_opts opts = {
1006 			.cmds = bit(BPF_PROG_LOAD),
1007 			.progs = bit(BPF_PROG_TYPE_KPROBE),
1008 			.attachs = ~0ULL,
1009 		};
1010 
1011 		subtest_userns(&opts, userns_obj_priv_prog);
1012 	}
1013 	if (test__start_subtest("obj_priv_btf_fail")) {
1014 		struct bpffs_opts opts = {
1015 			/* disallow BTF loading */
1016 			.cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1017 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1018 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1019 			.attachs = ~0ULL,
1020 		};
1021 
1022 		subtest_userns(&opts, userns_obj_priv_btf_fail);
1023 	}
1024 	if (test__start_subtest("obj_priv_btf_success")) {
1025 		struct bpffs_opts opts = {
1026 			/* allow BTF loading */
1027 			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1028 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1029 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1030 			.attachs = ~0ULL,
1031 		};
1032 
1033 		subtest_userns(&opts, userns_obj_priv_btf_success);
1034 	}
1035 	if (test__start_subtest("obj_priv_implicit_token")) {
1036 		struct bpffs_opts opts = {
1037 			/* allow BTF loading */
1038 			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1039 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1040 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1041 			.attachs = ~0ULL,
1042 		};
1043 
1044 		subtest_userns(&opts, userns_obj_priv_implicit_token);
1045 	}
1046 	if (test__start_subtest("obj_priv_implicit_token_envvar")) {
1047 		struct bpffs_opts opts = {
1048 			/* allow BTF loading */
1049 			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1050 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1051 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1052 			.attachs = ~0ULL,
1053 		};
1054 
1055 		subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
1056 	}
1057 }
1058