xref: /linux/tools/testing/selftests/bpf/prog_tests/token.c (revision 9d027a35a52a4ea9400390ef4414e4e9dcd54193)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <test_progs.h>
5 #include <bpf/btf.h>
6 #include "cap_helpers.h"
7 #include <fcntl.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <unistd.h>
11 #include <linux/filter.h>
12 #include <linux/unistd.h>
13 #include <linux/mount.h>
14 #include <sys/socket.h>
15 #include <sys/stat.h>
16 #include <sys/syscall.h>
17 #include <sys/un.h>
18 #include "priv_map.skel.h"
19 #include "priv_prog.skel.h"
20 #include "dummy_st_ops_success.skel.h"
21 
22 static inline int sys_mount(const char *dev_name, const char *dir_name,
23 			    const char *type, unsigned long flags,
24 			    const void *data)
25 {
26 	return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
27 }
28 
29 static inline int sys_fsopen(const char *fsname, unsigned flags)
30 {
31 	return syscall(__NR_fsopen, fsname, flags);
32 }
33 
34 static inline int sys_fspick(int dfd, const char *path, unsigned flags)
35 {
36 	return syscall(__NR_fspick, dfd, path, flags);
37 }
38 
39 static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
40 {
41 	return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
42 }
43 
44 static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
45 {
46 	return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
47 }
48 
49 static inline int sys_move_mount(int from_dfd, const char *from_path,
50 				 int to_dfd, const char *to_path,
51 				 unsigned flags)
52 {
53 	return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
54 }
55 
56 static int drop_priv_caps(__u64 *old_caps)
57 {
58 	return cap_disable_effective((1ULL << CAP_BPF) |
59 				     (1ULL << CAP_PERFMON) |
60 				     (1ULL << CAP_NET_ADMIN) |
61 				     (1ULL << CAP_SYS_ADMIN), old_caps);
62 }
63 
64 static int restore_priv_caps(__u64 old_caps)
65 {
66 	return cap_enable_effective(old_caps, NULL);
67 }
68 
69 static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
70 {
71 	char buf[32];
72 	int err;
73 
74 	if (!mask_str) {
75 		if (mask == ~0ULL) {
76 			mask_str = "any";
77 		} else {
78 			snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
79 			mask_str = buf;
80 		}
81 	}
82 
83 	err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
84 			   mask_str, 0);
85 	if (err < 0)
86 		err = -errno;
87 	return err;
88 }
89 
90 #define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
91 
92 struct bpffs_opts {
93 	__u64 cmds;
94 	__u64 maps;
95 	__u64 progs;
96 	__u64 attachs;
97 	const char *cmds_str;
98 	const char *maps_str;
99 	const char *progs_str;
100 	const char *attachs_str;
101 };
102 
103 static int create_bpffs_fd(void)
104 {
105 	int fs_fd;
106 
107 	/* create VFS context */
108 	fs_fd = sys_fsopen("bpf", 0);
109 	ASSERT_GE(fs_fd, 0, "fs_fd");
110 
111 	return fs_fd;
112 }
113 
114 static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
115 {
116 	int mnt_fd, err;
117 
118 	/* set up token delegation mount options */
119 	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
120 	if (!ASSERT_OK(err, "fs_cfg_cmds"))
121 		return err;
122 	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
123 	if (!ASSERT_OK(err, "fs_cfg_maps"))
124 		return err;
125 	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
126 	if (!ASSERT_OK(err, "fs_cfg_progs"))
127 		return err;
128 	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
129 	if (!ASSERT_OK(err, "fs_cfg_attachs"))
130 		return err;
131 
132 	/* instantiate FS object */
133 	err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
134 	if (err < 0)
135 		return -errno;
136 
137 	/* create O_PATH fd for detached mount */
138 	mnt_fd = sys_fsmount(fs_fd, 0, 0);
139 	if (err < 0)
140 		return -errno;
141 
142 	return mnt_fd;
143 }
144 
145 /* send FD over Unix domain (AF_UNIX) socket */
146 static int sendfd(int sockfd, int fd)
147 {
148 	struct msghdr msg = {};
149 	struct cmsghdr *cmsg;
150 	int fds[1] = { fd }, err;
151 	char iobuf[1];
152 	struct iovec io = {
153 		.iov_base = iobuf,
154 		.iov_len = sizeof(iobuf),
155 	};
156 	union {
157 		char buf[CMSG_SPACE(sizeof(fds))];
158 		struct cmsghdr align;
159 	} u;
160 
161 	msg.msg_iov = &io;
162 	msg.msg_iovlen = 1;
163 	msg.msg_control = u.buf;
164 	msg.msg_controllen = sizeof(u.buf);
165 	cmsg = CMSG_FIRSTHDR(&msg);
166 	cmsg->cmsg_level = SOL_SOCKET;
167 	cmsg->cmsg_type = SCM_RIGHTS;
168 	cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
169 	memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
170 
171 	err = sendmsg(sockfd, &msg, 0);
172 	if (err < 0)
173 		err = -errno;
174 	if (!ASSERT_EQ(err, 1, "sendmsg"))
175 		return -EINVAL;
176 
177 	return 0;
178 }
179 
180 /* receive FD over Unix domain (AF_UNIX) socket */
181 static int recvfd(int sockfd, int *fd)
182 {
183 	struct msghdr msg = {};
184 	struct cmsghdr *cmsg;
185 	int fds[1], err;
186 	char iobuf[1];
187 	struct iovec io = {
188 		.iov_base = iobuf,
189 		.iov_len = sizeof(iobuf),
190 	};
191 	union {
192 		char buf[CMSG_SPACE(sizeof(fds))];
193 		struct cmsghdr align;
194 	} u;
195 
196 	msg.msg_iov = &io;
197 	msg.msg_iovlen = 1;
198 	msg.msg_control = u.buf;
199 	msg.msg_controllen = sizeof(u.buf);
200 
201 	err = recvmsg(sockfd, &msg, 0);
202 	if (err < 0)
203 		err = -errno;
204 	if (!ASSERT_EQ(err, 1, "recvmsg"))
205 		return -EINVAL;
206 
207 	cmsg = CMSG_FIRSTHDR(&msg);
208 	if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
209 	    !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
210 	    !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
211 	    !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
212 		return -EINVAL;
213 
214 	memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
215 	*fd = fds[0];
216 
217 	return 0;
218 }
219 
220 static ssize_t write_nointr(int fd, const void *buf, size_t count)
221 {
222 	ssize_t ret;
223 
224 	do {
225 		ret = write(fd, buf, count);
226 	} while (ret < 0 && errno == EINTR);
227 
228 	return ret;
229 }
230 
231 static int write_file(const char *path, const void *buf, size_t count)
232 {
233 	int fd;
234 	ssize_t ret;
235 
236 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
237 	if (fd < 0)
238 		return -1;
239 
240 	ret = write_nointr(fd, buf, count);
241 	close(fd);
242 	if (ret < 0 || (size_t)ret != count)
243 		return -1;
244 
245 	return 0;
246 }
247 
248 static int create_and_enter_userns(void)
249 {
250 	uid_t uid;
251 	gid_t gid;
252 	char map[100];
253 
254 	uid = getuid();
255 	gid = getgid();
256 
257 	if (unshare(CLONE_NEWUSER))
258 		return -1;
259 
260 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
261 	    errno != ENOENT)
262 		return -1;
263 
264 	snprintf(map, sizeof(map), "0 %d 1", uid);
265 	if (write_file("/proc/self/uid_map", map, strlen(map)))
266 		return -1;
267 
268 
269 	snprintf(map, sizeof(map), "0 %d 1", gid);
270 	if (write_file("/proc/self/gid_map", map, strlen(map)))
271 		return -1;
272 
273 	if (setgid(0))
274 		return -1;
275 
276 	if (setuid(0))
277 		return -1;
278 
279 	return 0;
280 }
281 
282 typedef int (*child_callback_fn)(int);
283 
284 static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
285 {
286 	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
287 	int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1;
288 
289 	/* setup userns with root mappings */
290 	err = create_and_enter_userns();
291 	if (!ASSERT_OK(err, "create_and_enter_userns"))
292 		goto cleanup;
293 
294 	/* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
295 	err = unshare(CLONE_NEWNS);
296 	if (!ASSERT_OK(err, "create_mountns"))
297 		goto cleanup;
298 
299 	err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
300 	if (!ASSERT_OK(err, "remount_root"))
301 		goto cleanup;
302 
303 	fs_fd = create_bpffs_fd();
304 	if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
305 		err = -EINVAL;
306 		goto cleanup;
307 	}
308 
309 	/* ensure unprivileged child cannot set delegation options */
310 	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
311 	ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
312 	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
313 	ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
314 	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
315 	ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
316 	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
317 	ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
318 
319 	/* pass BPF FS context object to parent */
320 	err = sendfd(sock_fd, fs_fd);
321 	if (!ASSERT_OK(err, "send_fs_fd"))
322 		goto cleanup;
323 	zclose(fs_fd);
324 
325 	/* avoid mucking around with mount namespaces and mounting at
326 	 * well-known path, just get detach-mounted BPF FS fd back from parent
327 	 */
328 	err = recvfd(sock_fd, &mnt_fd);
329 	if (!ASSERT_OK(err, "recv_mnt_fd"))
330 		goto cleanup;
331 
332 	/* try to fspick() BPF FS and try to add some delegation options */
333 	fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
334 	if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
335 		err = -EINVAL;
336 		goto cleanup;
337 	}
338 
339 	/* ensure unprivileged child cannot reconfigure to set delegation options */
340 	err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
341 	if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
342 		err = -EINVAL;
343 		goto cleanup;
344 	}
345 	err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
346 	if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
347 		err = -EINVAL;
348 		goto cleanup;
349 	}
350 	err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
351 	if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
352 		err = -EINVAL;
353 		goto cleanup;
354 	}
355 	err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
356 	if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
357 		err = -EINVAL;
358 		goto cleanup;
359 	}
360 	zclose(fs_fd);
361 
362 	bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
363 	if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
364 		err = -EINVAL;
365 		goto cleanup;
366 	}
367 
368 	/* do custom test logic with customly set up BPF FS instance */
369 	err = callback(bpffs_fd);
370 	if (!ASSERT_OK(err, "test_callback"))
371 		goto cleanup;
372 
373 	err = 0;
374 cleanup:
375 	zclose(sock_fd);
376 	zclose(mnt_fd);
377 	zclose(fs_fd);
378 	zclose(bpffs_fd);
379 
380 	exit(-err);
381 }
382 
383 static int wait_for_pid(pid_t pid)
384 {
385 	int status, ret;
386 
387 again:
388 	ret = waitpid(pid, &status, 0);
389 	if (ret == -1) {
390 		if (errno == EINTR)
391 			goto again;
392 
393 		return -1;
394 	}
395 
396 	if (!WIFEXITED(status))
397 		return -1;
398 
399 	return WEXITSTATUS(status);
400 }
401 
402 static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
403 {
404 	int fs_fd = -1, mnt_fd = -1, err;
405 
406 	err = recvfd(sock_fd, &fs_fd);
407 	if (!ASSERT_OK(err, "recv_bpffs_fd"))
408 		goto cleanup;
409 
410 	mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
411 	if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
412 		err = -EINVAL;
413 		goto cleanup;
414 	}
415 	zclose(fs_fd);
416 
417 	/* pass BPF FS context object to parent */
418 	err = sendfd(sock_fd, mnt_fd);
419 	if (!ASSERT_OK(err, "send_mnt_fd"))
420 		goto cleanup;
421 	zclose(mnt_fd);
422 
423 	err = wait_for_pid(child_pid);
424 	ASSERT_OK(err, "waitpid_child");
425 
426 cleanup:
427 	zclose(sock_fd);
428 	zclose(fs_fd);
429 	zclose(mnt_fd);
430 
431 	if (child_pid > 0)
432 		(void)kill(child_pid, SIGKILL);
433 }
434 
435 static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb)
436 {
437 	int sock_fds[2] = { -1, -1 };
438 	int child_pid = 0, err;
439 
440 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
441 	if (!ASSERT_OK(err, "socketpair"))
442 		goto cleanup;
443 
444 	child_pid = fork();
445 	if (!ASSERT_GE(child_pid, 0, "fork"))
446 		goto cleanup;
447 
448 	if (child_pid == 0) {
449 		zclose(sock_fds[0]);
450 		return child(sock_fds[1], bpffs_opts, cb);
451 
452 	} else {
453 		zclose(sock_fds[1]);
454 		return parent(child_pid, bpffs_opts, sock_fds[0]);
455 	}
456 
457 cleanup:
458 	zclose(sock_fds[0]);
459 	zclose(sock_fds[1]);
460 	if (child_pid > 0)
461 		(void)kill(child_pid, SIGKILL);
462 }
463 
464 static int userns_map_create(int mnt_fd)
465 {
466 	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
467 	int err, token_fd = -1, map_fd = -1;
468 	__u64 old_caps = 0;
469 
470 	/* create BPF token from BPF FS mount */
471 	token_fd = bpf_token_create(mnt_fd, NULL);
472 	if (!ASSERT_GT(token_fd, 0, "token_create")) {
473 		err = -EINVAL;
474 		goto cleanup;
475 	}
476 
477 	/* while inside non-init userns, we need both a BPF token *and*
478 	 * CAP_BPF inside current userns to create privileged map; let's test
479 	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
480 	 */
481 	err = drop_priv_caps(&old_caps);
482 	if (!ASSERT_OK(err, "drop_caps"))
483 		goto cleanup;
484 
485 	/* no token, no CAP_BPF -> fail */
486 	map_opts.token_fd = 0;
487 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
488 	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
489 		err = -EINVAL;
490 		goto cleanup;
491 	}
492 
493 	/* token without CAP_BPF -> fail */
494 	map_opts.token_fd = token_fd;
495 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
496 	if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
497 		err = -EINVAL;
498 		goto cleanup;
499 	}
500 
501 	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
502 	err = restore_priv_caps(old_caps);
503 	if (!ASSERT_OK(err, "restore_caps"))
504 		goto cleanup;
505 
506 	/* CAP_BPF without token -> fail */
507 	map_opts.token_fd = 0;
508 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
509 	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
510 		err = -EINVAL;
511 		goto cleanup;
512 	}
513 
514 	/* finally, namespaced CAP_BPF + token -> success */
515 	map_opts.token_fd = token_fd;
516 	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
517 	if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
518 		err = -EINVAL;
519 		goto cleanup;
520 	}
521 
522 cleanup:
523 	zclose(token_fd);
524 	zclose(map_fd);
525 	return err;
526 }
527 
528 static int userns_btf_load(int mnt_fd)
529 {
530 	LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
531 	int err, token_fd = -1, btf_fd = -1;
532 	const void *raw_btf_data;
533 	struct btf *btf = NULL;
534 	__u32 raw_btf_size;
535 	__u64 old_caps = 0;
536 
537 	/* create BPF token from BPF FS mount */
538 	token_fd = bpf_token_create(mnt_fd, NULL);
539 	if (!ASSERT_GT(token_fd, 0, "token_create")) {
540 		err = -EINVAL;
541 		goto cleanup;
542 	}
543 
544 	/* while inside non-init userns, we need both a BPF token *and*
545 	 * CAP_BPF inside current userns to create privileged map; let's test
546 	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
547 	 */
548 	err = drop_priv_caps(&old_caps);
549 	if (!ASSERT_OK(err, "drop_caps"))
550 		goto cleanup;
551 
552 	/* setup a trivial BTF data to load to the kernel */
553 	btf = btf__new_empty();
554 	if (!ASSERT_OK_PTR(btf, "empty_btf"))
555 		goto cleanup;
556 
557 	ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
558 
559 	raw_btf_data = btf__raw_data(btf, &raw_btf_size);
560 	if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
561 		goto cleanup;
562 
563 	/* no token + no CAP_BPF -> failure */
564 	btf_opts.token_fd = 0;
565 	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
566 	if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
567 		goto cleanup;
568 
569 	/* token + no CAP_BPF -> failure */
570 	btf_opts.token_fd = token_fd;
571 	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
572 	if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
573 		goto cleanup;
574 
575 	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
576 	err = restore_priv_caps(old_caps);
577 	if (!ASSERT_OK(err, "restore_caps"))
578 		goto cleanup;
579 
580 	/* token + CAP_BPF -> success */
581 	btf_opts.token_fd = token_fd;
582 	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
583 	if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
584 		goto cleanup;
585 
586 	err = 0;
587 cleanup:
588 	btf__free(btf);
589 	zclose(btf_fd);
590 	zclose(token_fd);
591 	return err;
592 }
593 
594 static int userns_prog_load(int mnt_fd)
595 {
596 	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
597 	int err, token_fd = -1, prog_fd = -1;
598 	struct bpf_insn insns[] = {
599 		/* bpf_jiffies64() requires CAP_BPF */
600 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
601 		/* bpf_get_current_task() requires CAP_PERFMON */
602 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
603 		/* r0 = 0; exit; */
604 		BPF_MOV64_IMM(BPF_REG_0, 0),
605 		BPF_EXIT_INSN(),
606 	};
607 	size_t insn_cnt = ARRAY_SIZE(insns);
608 	__u64 old_caps = 0;
609 
610 	/* create BPF token from BPF FS mount */
611 	token_fd = bpf_token_create(mnt_fd, NULL);
612 	if (!ASSERT_GT(token_fd, 0, "token_create")) {
613 		err = -EINVAL;
614 		goto cleanup;
615 	}
616 
617 	/* validate we can successfully load BPF program with token; this
618 	 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
619 	 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
620 	 * BPF token wired properly in a bunch of places in the kernel
621 	 */
622 	prog_opts.token_fd = token_fd;
623 	prog_opts.expected_attach_type = BPF_XDP;
624 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
625 				insns, insn_cnt, &prog_opts);
626 	if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
627 		err = -EPERM;
628 		goto cleanup;
629 	}
630 
631 	/* no token + caps -> failure */
632 	prog_opts.token_fd = 0;
633 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
634 				insns, insn_cnt, &prog_opts);
635 	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
636 		err = -EPERM;
637 		goto cleanup;
638 	}
639 
640 	err = drop_priv_caps(&old_caps);
641 	if (!ASSERT_OK(err, "drop_caps"))
642 		goto cleanup;
643 
644 	/* no caps + token -> failure */
645 	prog_opts.token_fd = token_fd;
646 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
647 				insns, insn_cnt, &prog_opts);
648 	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
649 		err = -EPERM;
650 		goto cleanup;
651 	}
652 
653 	/* no caps + no token -> definitely a failure */
654 	prog_opts.token_fd = 0;
655 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
656 				insns, insn_cnt, &prog_opts);
657 	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
658 		err = -EPERM;
659 		goto cleanup;
660 	}
661 
662 	err = 0;
663 cleanup:
664 	zclose(prog_fd);
665 	zclose(token_fd);
666 	return err;
667 }
668 
669 static int userns_obj_priv_map(int mnt_fd)
670 {
671 	LIBBPF_OPTS(bpf_object_open_opts, opts);
672 	char buf[256];
673 	struct priv_map *skel;
674 	int err, token_fd;
675 
676 	skel = priv_map__open_and_load();
677 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
678 		priv_map__destroy(skel);
679 		return -EINVAL;
680 	}
681 
682 	/* use bpf_token_path to provide BPF FS path */
683 	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
684 	opts.bpf_token_path = buf;
685 	skel = priv_map__open_opts(&opts);
686 	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
687 		return -EINVAL;
688 
689 	err = priv_map__load(skel);
690 	priv_map__destroy(skel);
691 	if (!ASSERT_OK(err, "obj_token_path_load"))
692 		return -EINVAL;
693 
694 	/* create token and pass it through bpf_token_fd */
695 	token_fd = bpf_token_create(mnt_fd, NULL);
696 	if (!ASSERT_GT(token_fd, 0, "create_token"))
697 		return -EINVAL;
698 
699 	opts.bpf_token_path = NULL;
700 	opts.bpf_token_fd = token_fd;
701 	skel = priv_map__open_opts(&opts);
702 	if (!ASSERT_OK_PTR(skel, "obj_token_fd_open"))
703 		return -EINVAL;
704 
705 	/* we can close our token FD, bpf_object owns dup()'ed FD now */
706 	close(token_fd);
707 
708 	err = priv_map__load(skel);
709 	priv_map__destroy(skel);
710 	if (!ASSERT_OK(err, "obj_token_fd_load"))
711 		return -EINVAL;
712 
713 	return 0;
714 }
715 
716 static int userns_obj_priv_prog(int mnt_fd)
717 {
718 	LIBBPF_OPTS(bpf_object_open_opts, opts);
719 	char buf[256];
720 	struct priv_prog *skel;
721 	int err;
722 
723 	skel = priv_prog__open_and_load();
724 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
725 		priv_prog__destroy(skel);
726 		return -EINVAL;
727 	}
728 
729 	/* use bpf_token_path to provide BPF FS path */
730 	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
731 	opts.bpf_token_path = buf;
732 	skel = priv_prog__open_opts(&opts);
733 	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
734 		return -EINVAL;
735 
736 	err = priv_prog__load(skel);
737 	priv_prog__destroy(skel);
738 	if (!ASSERT_OK(err, "obj_token_path_load"))
739 		return -EINVAL;
740 
741 	return 0;
742 }
743 
744 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
745  * which should cause struct_ops application to fail, as BTF won't be uploaded
746  * into the kernel, even if STRUCT_OPS programs themselves are allowed
747  */
748 static int validate_struct_ops_load(int mnt_fd, bool expect_success)
749 {
750 	LIBBPF_OPTS(bpf_object_open_opts, opts);
751 	char buf[256];
752 	struct dummy_st_ops_success *skel;
753 	int err;
754 
755 	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
756 	opts.bpf_token_path = buf;
757 	skel = dummy_st_ops_success__open_opts(&opts);
758 	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
759 		return -EINVAL;
760 
761 	err = dummy_st_ops_success__load(skel);
762 	dummy_st_ops_success__destroy(skel);
763 	if (expect_success) {
764 		if (!ASSERT_OK(err, "obj_token_path_load"))
765 			return -EINVAL;
766 	} else /* expect failure */ {
767 		if (!ASSERT_ERR(err, "obj_token_path_load"))
768 			return -EINVAL;
769 	}
770 
771 	return 0;
772 }
773 
774 static int userns_obj_priv_btf_fail(int mnt_fd)
775 {
776 	return validate_struct_ops_load(mnt_fd, false /* should fail */);
777 }
778 
779 static int userns_obj_priv_btf_success(int mnt_fd)
780 {
781 	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
782 }
783 
784 #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
785 #define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
786 
787 static int userns_obj_priv_implicit_token(int mnt_fd)
788 {
789 	LIBBPF_OPTS(bpf_object_open_opts, opts);
790 	struct dummy_st_ops_success *skel;
791 	int err;
792 
793 	/* before we mount BPF FS with token delegation, struct_ops skeleton
794 	 * should fail to load
795 	 */
796 	skel = dummy_st_ops_success__open_and_load();
797 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
798 		dummy_st_ops_success__destroy(skel);
799 		return -EINVAL;
800 	}
801 
802 	/* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
803 	 * token automatically and implicitly
804 	 */
805 	err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
806 	if (!ASSERT_OK(err, "move_mount_bpffs"))
807 		return -EINVAL;
808 
809 	/* disable implicit BPF token creation by setting
810 	 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
811 	 */
812 	err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
813 	if (!ASSERT_OK(err, "setenv_token_path"))
814 		return -EINVAL;
815 	skel = dummy_st_ops_success__open_and_load();
816 	if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
817 		unsetenv(TOKEN_ENVVAR);
818 		dummy_st_ops_success__destroy(skel);
819 		return -EINVAL;
820 	}
821 	unsetenv(TOKEN_ENVVAR);
822 
823 	/* now the same struct_ops skeleton should succeed thanks to libppf
824 	 * creating BPF token from /sys/fs/bpf mount point
825 	 */
826 	skel = dummy_st_ops_success__open_and_load();
827 	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
828 		return -EINVAL;
829 
830 	dummy_st_ops_success__destroy(skel);
831 
832 	/* now disable implicit token through empty bpf_token_path, should fail */
833 	opts.bpf_token_path = "";
834 	skel = dummy_st_ops_success__open_opts(&opts);
835 	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
836 		return -EINVAL;
837 
838 	err = dummy_st_ops_success__load(skel);
839 	dummy_st_ops_success__destroy(skel);
840 	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
841 		return -EINVAL;
842 
843 	/* now disable implicit token through negative bpf_token_fd, should fail */
844 	opts.bpf_token_path = NULL;
845 	opts.bpf_token_fd = -1;
846 	skel = dummy_st_ops_success__open_opts(&opts);
847 	if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
848 		return -EINVAL;
849 
850 	err = dummy_st_ops_success__load(skel);
851 	dummy_st_ops_success__destroy(skel);
852 	if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
853 		return -EINVAL;
854 
855 	return 0;
856 }
857 
858 static int userns_obj_priv_implicit_token_envvar(int mnt_fd)
859 {
860 	LIBBPF_OPTS(bpf_object_open_opts, opts);
861 	struct dummy_st_ops_success *skel;
862 	int err;
863 
864 	/* before we mount BPF FS with token delegation, struct_ops skeleton
865 	 * should fail to load
866 	 */
867 	skel = dummy_st_ops_success__open_and_load();
868 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
869 		dummy_st_ops_success__destroy(skel);
870 		return -EINVAL;
871 	}
872 
873 	/* mount custom BPF FS over custom location, so libbpf can't create
874 	 * BPF token implicitly, unless pointed to it through
875 	 * LIBBPF_BPF_TOKEN_PATH envvar
876 	 */
877 	rmdir(TOKEN_BPFFS_CUSTOM);
878 	if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
879 		goto err_out;
880 	err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
881 	if (!ASSERT_OK(err, "move_mount_bpffs"))
882 		goto err_out;
883 
884 	/* even though we have BPF FS with delegation, it's not at default
885 	 * /sys/fs/bpf location, so we still fail to load until envvar is set up
886 	 */
887 	skel = dummy_st_ops_success__open_and_load();
888 	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
889 		dummy_st_ops_success__destroy(skel);
890 		goto err_out;
891 	}
892 
893 	err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
894 	if (!ASSERT_OK(err, "setenv_token_path"))
895 		goto err_out;
896 
897 	/* now the same struct_ops skeleton should succeed thanks to libppf
898 	 * creating BPF token from custom mount point
899 	 */
900 	skel = dummy_st_ops_success__open_and_load();
901 	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
902 		goto err_out;
903 
904 	dummy_st_ops_success__destroy(skel);
905 
906 	/* now disable implicit token through empty bpf_token_path, envvar
907 	 * will be ignored, should fail
908 	 */
909 	opts.bpf_token_path = "";
910 	skel = dummy_st_ops_success__open_opts(&opts);
911 	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
912 		goto err_out;
913 
914 	err = dummy_st_ops_success__load(skel);
915 	dummy_st_ops_success__destroy(skel);
916 	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
917 		goto err_out;
918 
919 	/* now disable implicit token through negative bpf_token_fd, envvar
920 	 * will be ignored, should fail
921 	 */
922 	opts.bpf_token_path = NULL;
923 	opts.bpf_token_fd = -1;
924 	skel = dummy_st_ops_success__open_opts(&opts);
925 	if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
926 		goto err_out;
927 
928 	err = dummy_st_ops_success__load(skel);
929 	dummy_st_ops_success__destroy(skel);
930 	if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
931 		goto err_out;
932 
933 	rmdir(TOKEN_BPFFS_CUSTOM);
934 	unsetenv(TOKEN_ENVVAR);
935 	return 0;
936 err_out:
937 	rmdir(TOKEN_BPFFS_CUSTOM);
938 	unsetenv(TOKEN_ENVVAR);
939 	return -EINVAL;
940 }
941 
942 #define bit(n) (1ULL << (n))
943 
944 void test_token(void)
945 {
946 	if (test__start_subtest("map_token")) {
947 		struct bpffs_opts opts = {
948 			.cmds_str = "map_create",
949 			.maps_str = "stack",
950 		};
951 
952 		subtest_userns(&opts, userns_map_create);
953 	}
954 	if (test__start_subtest("btf_token")) {
955 		struct bpffs_opts opts = {
956 			.cmds = 1ULL << BPF_BTF_LOAD,
957 		};
958 
959 		subtest_userns(&opts, userns_btf_load);
960 	}
961 	if (test__start_subtest("prog_token")) {
962 		struct bpffs_opts opts = {
963 			.cmds_str = "PROG_LOAD",
964 			.progs_str = "XDP",
965 			.attachs_str = "xdp",
966 		};
967 
968 		subtest_userns(&opts, userns_prog_load);
969 	}
970 	if (test__start_subtest("obj_priv_map")) {
971 		struct bpffs_opts opts = {
972 			.cmds = bit(BPF_MAP_CREATE),
973 			.maps = bit(BPF_MAP_TYPE_QUEUE),
974 		};
975 
976 		subtest_userns(&opts, userns_obj_priv_map);
977 	}
978 	if (test__start_subtest("obj_priv_prog")) {
979 		struct bpffs_opts opts = {
980 			.cmds = bit(BPF_PROG_LOAD),
981 			.progs = bit(BPF_PROG_TYPE_KPROBE),
982 			.attachs = ~0ULL,
983 		};
984 
985 		subtest_userns(&opts, userns_obj_priv_prog);
986 	}
987 	if (test__start_subtest("obj_priv_btf_fail")) {
988 		struct bpffs_opts opts = {
989 			/* disallow BTF loading */
990 			.cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
991 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
992 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
993 			.attachs = ~0ULL,
994 		};
995 
996 		subtest_userns(&opts, userns_obj_priv_btf_fail);
997 	}
998 	if (test__start_subtest("obj_priv_btf_success")) {
999 		struct bpffs_opts opts = {
1000 			/* allow BTF loading */
1001 			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1002 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1003 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1004 			.attachs = ~0ULL,
1005 		};
1006 
1007 		subtest_userns(&opts, userns_obj_priv_btf_success);
1008 	}
1009 	if (test__start_subtest("obj_priv_implicit_token")) {
1010 		struct bpffs_opts opts = {
1011 			/* allow BTF loading */
1012 			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1013 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1014 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1015 			.attachs = ~0ULL,
1016 		};
1017 
1018 		subtest_userns(&opts, userns_obj_priv_implicit_token);
1019 	}
1020 	if (test__start_subtest("obj_priv_implicit_token_envvar")) {
1021 		struct bpffs_opts opts = {
1022 			/* allow BTF loading */
1023 			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1024 			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1025 			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1026 			.attachs = ~0ULL,
1027 		};
1028 
1029 		subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
1030 	}
1031 }
1032