1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 #define _GNU_SOURCE 4 #include <test_progs.h> 5 #include <bpf/btf.h> 6 #include "cap_helpers.h" 7 #include <fcntl.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <unistd.h> 11 #include <linux/filter.h> 12 #include <linux/unistd.h> 13 #include <linux/mount.h> 14 #include <sys/socket.h> 15 #include <sys/stat.h> 16 #include <sys/syscall.h> 17 #include <sys/un.h> 18 #include "priv_map.skel.h" 19 #include "priv_prog.skel.h" 20 #include "dummy_st_ops_success.skel.h" 21 22 static inline int sys_mount(const char *dev_name, const char *dir_name, 23 const char *type, unsigned long flags, 24 const void *data) 25 { 26 return syscall(__NR_mount, dev_name, dir_name, type, flags, data); 27 } 28 29 static inline int sys_fsopen(const char *fsname, unsigned flags) 30 { 31 return syscall(__NR_fsopen, fsname, flags); 32 } 33 34 static inline int sys_fspick(int dfd, const char *path, unsigned flags) 35 { 36 return syscall(__NR_fspick, dfd, path, flags); 37 } 38 39 static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) 40 { 41 return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); 42 } 43 44 static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) 45 { 46 return syscall(__NR_fsmount, fs_fd, flags, ms_flags); 47 } 48 49 static inline int sys_move_mount(int from_dfd, const char *from_path, 50 int to_dfd, const char *to_path, 51 unsigned flags) 52 { 53 return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); 54 } 55 56 static int drop_priv_caps(__u64 *old_caps) 57 { 58 return cap_disable_effective((1ULL << CAP_BPF) | 59 (1ULL << CAP_PERFMON) | 60 (1ULL << CAP_NET_ADMIN) | 61 (1ULL << CAP_SYS_ADMIN), old_caps); 62 } 63 64 static int restore_priv_caps(__u64 old_caps) 65 { 66 return cap_enable_effective(old_caps, NULL); 67 } 68 69 static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) 70 { 71 char buf[32]; 72 int err; 73 74 if (!mask_str) { 75 if (mask == ~0ULL) { 76 mask_str = "any"; 77 } else { 78 snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); 79 mask_str = buf; 80 } 81 } 82 83 err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, 84 mask_str, 0); 85 if (err < 0) 86 err = -errno; 87 return err; 88 } 89 90 #define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) 91 92 struct bpffs_opts { 93 __u64 cmds; 94 __u64 maps; 95 __u64 progs; 96 __u64 attachs; 97 const char *cmds_str; 98 const char *maps_str; 99 const char *progs_str; 100 const char *attachs_str; 101 }; 102 103 static int create_bpffs_fd(void) 104 { 105 int fs_fd; 106 107 /* create VFS context */ 108 fs_fd = sys_fsopen("bpf", 0); 109 ASSERT_GE(fs_fd, 0, "fs_fd"); 110 111 return fs_fd; 112 } 113 114 static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) 115 { 116 int mnt_fd, err; 117 118 /* set up token delegation mount options */ 119 err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); 120 if (!ASSERT_OK(err, "fs_cfg_cmds")) 121 return err; 122 err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); 123 if (!ASSERT_OK(err, "fs_cfg_maps")) 124 return err; 125 err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); 126 if (!ASSERT_OK(err, "fs_cfg_progs")) 127 return err; 128 err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); 129 if (!ASSERT_OK(err, "fs_cfg_attachs")) 130 return err; 131 132 /* instantiate FS object */ 133 err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); 134 if (err < 0) 135 return -errno; 136 137 /* create O_PATH fd for detached mount */ 138 mnt_fd = sys_fsmount(fs_fd, 0, 0); 139 if (err < 0) 140 return -errno; 141 142 return mnt_fd; 143 } 144 145 /* send FD over Unix domain (AF_UNIX) socket */ 146 static int sendfd(int sockfd, int fd) 147 { 148 struct msghdr msg = {}; 149 struct cmsghdr *cmsg; 150 int fds[1] = { fd }, err; 151 char iobuf[1]; 152 struct iovec io = { 153 .iov_base = iobuf, 154 .iov_len = sizeof(iobuf), 155 }; 156 union { 157 char buf[CMSG_SPACE(sizeof(fds))]; 158 struct cmsghdr align; 159 } u; 160 161 msg.msg_iov = &io; 162 msg.msg_iovlen = 1; 163 msg.msg_control = u.buf; 164 msg.msg_controllen = sizeof(u.buf); 165 cmsg = CMSG_FIRSTHDR(&msg); 166 cmsg->cmsg_level = SOL_SOCKET; 167 cmsg->cmsg_type = SCM_RIGHTS; 168 cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); 169 memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); 170 171 err = sendmsg(sockfd, &msg, 0); 172 if (err < 0) 173 err = -errno; 174 if (!ASSERT_EQ(err, 1, "sendmsg")) 175 return -EINVAL; 176 177 return 0; 178 } 179 180 /* receive FD over Unix domain (AF_UNIX) socket */ 181 static int recvfd(int sockfd, int *fd) 182 { 183 struct msghdr msg = {}; 184 struct cmsghdr *cmsg; 185 int fds[1], err; 186 char iobuf[1]; 187 struct iovec io = { 188 .iov_base = iobuf, 189 .iov_len = sizeof(iobuf), 190 }; 191 union { 192 char buf[CMSG_SPACE(sizeof(fds))]; 193 struct cmsghdr align; 194 } u; 195 196 msg.msg_iov = &io; 197 msg.msg_iovlen = 1; 198 msg.msg_control = u.buf; 199 msg.msg_controllen = sizeof(u.buf); 200 201 err = recvmsg(sockfd, &msg, 0); 202 if (err < 0) 203 err = -errno; 204 if (!ASSERT_EQ(err, 1, "recvmsg")) 205 return -EINVAL; 206 207 cmsg = CMSG_FIRSTHDR(&msg); 208 if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || 209 !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || 210 !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || 211 !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) 212 return -EINVAL; 213 214 memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); 215 *fd = fds[0]; 216 217 return 0; 218 } 219 220 static ssize_t write_nointr(int fd, const void *buf, size_t count) 221 { 222 ssize_t ret; 223 224 do { 225 ret = write(fd, buf, count); 226 } while (ret < 0 && errno == EINTR); 227 228 return ret; 229 } 230 231 static int write_file(const char *path, const void *buf, size_t count) 232 { 233 int fd; 234 ssize_t ret; 235 236 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); 237 if (fd < 0) 238 return -1; 239 240 ret = write_nointr(fd, buf, count); 241 close(fd); 242 if (ret < 0 || (size_t)ret != count) 243 return -1; 244 245 return 0; 246 } 247 248 static int create_and_enter_userns(void) 249 { 250 uid_t uid; 251 gid_t gid; 252 char map[100]; 253 254 uid = getuid(); 255 gid = getgid(); 256 257 if (unshare(CLONE_NEWUSER)) 258 return -1; 259 260 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && 261 errno != ENOENT) 262 return -1; 263 264 snprintf(map, sizeof(map), "0 %d 1", uid); 265 if (write_file("/proc/self/uid_map", map, strlen(map))) 266 return -1; 267 268 269 snprintf(map, sizeof(map), "0 %d 1", gid); 270 if (write_file("/proc/self/gid_map", map, strlen(map))) 271 return -1; 272 273 if (setgid(0)) 274 return -1; 275 276 if (setuid(0)) 277 return -1; 278 279 return 0; 280 } 281 282 typedef int (*child_callback_fn)(int); 283 284 static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) 285 { 286 LIBBPF_OPTS(bpf_map_create_opts, map_opts); 287 int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; 288 289 /* setup userns with root mappings */ 290 err = create_and_enter_userns(); 291 if (!ASSERT_OK(err, "create_and_enter_userns")) 292 goto cleanup; 293 294 /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ 295 err = unshare(CLONE_NEWNS); 296 if (!ASSERT_OK(err, "create_mountns")) 297 goto cleanup; 298 299 err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); 300 if (!ASSERT_OK(err, "remount_root")) 301 goto cleanup; 302 303 fs_fd = create_bpffs_fd(); 304 if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { 305 err = -EINVAL; 306 goto cleanup; 307 } 308 309 /* ensure unprivileged child cannot set delegation options */ 310 err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); 311 ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); 312 err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); 313 ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); 314 err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); 315 ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); 316 err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); 317 ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); 318 319 /* pass BPF FS context object to parent */ 320 err = sendfd(sock_fd, fs_fd); 321 if (!ASSERT_OK(err, "send_fs_fd")) 322 goto cleanup; 323 zclose(fs_fd); 324 325 /* avoid mucking around with mount namespaces and mounting at 326 * well-known path, just get detach-mounted BPF FS fd back from parent 327 */ 328 err = recvfd(sock_fd, &mnt_fd); 329 if (!ASSERT_OK(err, "recv_mnt_fd")) 330 goto cleanup; 331 332 /* try to fspick() BPF FS and try to add some delegation options */ 333 fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); 334 if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { 335 err = -EINVAL; 336 goto cleanup; 337 } 338 339 /* ensure unprivileged child cannot reconfigure to set delegation options */ 340 err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); 341 if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { 342 err = -EINVAL; 343 goto cleanup; 344 } 345 err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); 346 if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { 347 err = -EINVAL; 348 goto cleanup; 349 } 350 err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); 351 if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { 352 err = -EINVAL; 353 goto cleanup; 354 } 355 err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); 356 if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { 357 err = -EINVAL; 358 goto cleanup; 359 } 360 zclose(fs_fd); 361 362 bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); 363 if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { 364 err = -EINVAL; 365 goto cleanup; 366 } 367 368 /* do custom test logic with customly set up BPF FS instance */ 369 err = callback(bpffs_fd); 370 if (!ASSERT_OK(err, "test_callback")) 371 goto cleanup; 372 373 err = 0; 374 cleanup: 375 zclose(sock_fd); 376 zclose(mnt_fd); 377 zclose(fs_fd); 378 zclose(bpffs_fd); 379 380 exit(-err); 381 } 382 383 static int wait_for_pid(pid_t pid) 384 { 385 int status, ret; 386 387 again: 388 ret = waitpid(pid, &status, 0); 389 if (ret == -1) { 390 if (errno == EINTR) 391 goto again; 392 393 return -1; 394 } 395 396 if (!WIFEXITED(status)) 397 return -1; 398 399 return WEXITSTATUS(status); 400 } 401 402 static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) 403 { 404 int fs_fd = -1, mnt_fd = -1, err; 405 406 err = recvfd(sock_fd, &fs_fd); 407 if (!ASSERT_OK(err, "recv_bpffs_fd")) 408 goto cleanup; 409 410 mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); 411 if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { 412 err = -EINVAL; 413 goto cleanup; 414 } 415 zclose(fs_fd); 416 417 /* pass BPF FS context object to parent */ 418 err = sendfd(sock_fd, mnt_fd); 419 if (!ASSERT_OK(err, "send_mnt_fd")) 420 goto cleanup; 421 zclose(mnt_fd); 422 423 err = wait_for_pid(child_pid); 424 ASSERT_OK(err, "waitpid_child"); 425 426 cleanup: 427 zclose(sock_fd); 428 zclose(fs_fd); 429 zclose(mnt_fd); 430 431 if (child_pid > 0) 432 (void)kill(child_pid, SIGKILL); 433 } 434 435 static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) 436 { 437 int sock_fds[2] = { -1, -1 }; 438 int child_pid = 0, err; 439 440 err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); 441 if (!ASSERT_OK(err, "socketpair")) 442 goto cleanup; 443 444 child_pid = fork(); 445 if (!ASSERT_GE(child_pid, 0, "fork")) 446 goto cleanup; 447 448 if (child_pid == 0) { 449 zclose(sock_fds[0]); 450 return child(sock_fds[1], bpffs_opts, cb); 451 452 } else { 453 zclose(sock_fds[1]); 454 return parent(child_pid, bpffs_opts, sock_fds[0]); 455 } 456 457 cleanup: 458 zclose(sock_fds[0]); 459 zclose(sock_fds[1]); 460 if (child_pid > 0) 461 (void)kill(child_pid, SIGKILL); 462 } 463 464 static int userns_map_create(int mnt_fd) 465 { 466 LIBBPF_OPTS(bpf_map_create_opts, map_opts); 467 int err, token_fd = -1, map_fd = -1; 468 __u64 old_caps = 0; 469 470 /* create BPF token from BPF FS mount */ 471 token_fd = bpf_token_create(mnt_fd, NULL); 472 if (!ASSERT_GT(token_fd, 0, "token_create")) { 473 err = -EINVAL; 474 goto cleanup; 475 } 476 477 /* while inside non-init userns, we need both a BPF token *and* 478 * CAP_BPF inside current userns to create privileged map; let's test 479 * that neither BPF token alone nor namespaced CAP_BPF is sufficient 480 */ 481 err = drop_priv_caps(&old_caps); 482 if (!ASSERT_OK(err, "drop_caps")) 483 goto cleanup; 484 485 /* no token, no CAP_BPF -> fail */ 486 map_opts.token_fd = 0; 487 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); 488 if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { 489 err = -EINVAL; 490 goto cleanup; 491 } 492 493 /* token without CAP_BPF -> fail */ 494 map_opts.token_fd = token_fd; 495 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); 496 if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { 497 err = -EINVAL; 498 goto cleanup; 499 } 500 501 /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ 502 err = restore_priv_caps(old_caps); 503 if (!ASSERT_OK(err, "restore_caps")) 504 goto cleanup; 505 506 /* CAP_BPF without token -> fail */ 507 map_opts.token_fd = 0; 508 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); 509 if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { 510 err = -EINVAL; 511 goto cleanup; 512 } 513 514 /* finally, namespaced CAP_BPF + token -> success */ 515 map_opts.token_fd = token_fd; 516 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); 517 if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { 518 err = -EINVAL; 519 goto cleanup; 520 } 521 522 cleanup: 523 zclose(token_fd); 524 zclose(map_fd); 525 return err; 526 } 527 528 static int userns_btf_load(int mnt_fd) 529 { 530 LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); 531 int err, token_fd = -1, btf_fd = -1; 532 const void *raw_btf_data; 533 struct btf *btf = NULL; 534 __u32 raw_btf_size; 535 __u64 old_caps = 0; 536 537 /* create BPF token from BPF FS mount */ 538 token_fd = bpf_token_create(mnt_fd, NULL); 539 if (!ASSERT_GT(token_fd, 0, "token_create")) { 540 err = -EINVAL; 541 goto cleanup; 542 } 543 544 /* while inside non-init userns, we need both a BPF token *and* 545 * CAP_BPF inside current userns to create privileged map; let's test 546 * that neither BPF token alone nor namespaced CAP_BPF is sufficient 547 */ 548 err = drop_priv_caps(&old_caps); 549 if (!ASSERT_OK(err, "drop_caps")) 550 goto cleanup; 551 552 /* setup a trivial BTF data to load to the kernel */ 553 btf = btf__new_empty(); 554 if (!ASSERT_OK_PTR(btf, "empty_btf")) 555 goto cleanup; 556 557 ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); 558 559 raw_btf_data = btf__raw_data(btf, &raw_btf_size); 560 if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) 561 goto cleanup; 562 563 /* no token + no CAP_BPF -> failure */ 564 btf_opts.token_fd = 0; 565 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); 566 if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) 567 goto cleanup; 568 569 /* token + no CAP_BPF -> failure */ 570 btf_opts.token_fd = token_fd; 571 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); 572 if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) 573 goto cleanup; 574 575 /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ 576 err = restore_priv_caps(old_caps); 577 if (!ASSERT_OK(err, "restore_caps")) 578 goto cleanup; 579 580 /* token + CAP_BPF -> success */ 581 btf_opts.token_fd = token_fd; 582 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); 583 if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) 584 goto cleanup; 585 586 err = 0; 587 cleanup: 588 btf__free(btf); 589 zclose(btf_fd); 590 zclose(token_fd); 591 return err; 592 } 593 594 static int userns_prog_load(int mnt_fd) 595 { 596 LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); 597 int err, token_fd = -1, prog_fd = -1; 598 struct bpf_insn insns[] = { 599 /* bpf_jiffies64() requires CAP_BPF */ 600 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), 601 /* bpf_get_current_task() requires CAP_PERFMON */ 602 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), 603 /* r0 = 0; exit; */ 604 BPF_MOV64_IMM(BPF_REG_0, 0), 605 BPF_EXIT_INSN(), 606 }; 607 size_t insn_cnt = ARRAY_SIZE(insns); 608 __u64 old_caps = 0; 609 610 /* create BPF token from BPF FS mount */ 611 token_fd = bpf_token_create(mnt_fd, NULL); 612 if (!ASSERT_GT(token_fd, 0, "token_create")) { 613 err = -EINVAL; 614 goto cleanup; 615 } 616 617 /* validate we can successfully load BPF program with token; this 618 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) 619 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have 620 * BPF token wired properly in a bunch of places in the kernel 621 */ 622 prog_opts.token_fd = token_fd; 623 prog_opts.expected_attach_type = BPF_XDP; 624 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 625 insns, insn_cnt, &prog_opts); 626 if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { 627 err = -EPERM; 628 goto cleanup; 629 } 630 631 /* no token + caps -> failure */ 632 prog_opts.token_fd = 0; 633 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 634 insns, insn_cnt, &prog_opts); 635 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { 636 err = -EPERM; 637 goto cleanup; 638 } 639 640 err = drop_priv_caps(&old_caps); 641 if (!ASSERT_OK(err, "drop_caps")) 642 goto cleanup; 643 644 /* no caps + token -> failure */ 645 prog_opts.token_fd = token_fd; 646 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 647 insns, insn_cnt, &prog_opts); 648 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { 649 err = -EPERM; 650 goto cleanup; 651 } 652 653 /* no caps + no token -> definitely a failure */ 654 prog_opts.token_fd = 0; 655 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 656 insns, insn_cnt, &prog_opts); 657 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { 658 err = -EPERM; 659 goto cleanup; 660 } 661 662 err = 0; 663 cleanup: 664 zclose(prog_fd); 665 zclose(token_fd); 666 return err; 667 } 668 669 static int userns_obj_priv_map(int mnt_fd) 670 { 671 LIBBPF_OPTS(bpf_object_open_opts, opts); 672 char buf[256]; 673 struct priv_map *skel; 674 int err, token_fd; 675 676 skel = priv_map__open_and_load(); 677 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 678 priv_map__destroy(skel); 679 return -EINVAL; 680 } 681 682 /* use bpf_token_path to provide BPF FS path */ 683 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 684 opts.bpf_token_path = buf; 685 skel = priv_map__open_opts(&opts); 686 if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) 687 return -EINVAL; 688 689 err = priv_map__load(skel); 690 priv_map__destroy(skel); 691 if (!ASSERT_OK(err, "obj_token_path_load")) 692 return -EINVAL; 693 694 /* create token and pass it through bpf_token_fd */ 695 token_fd = bpf_token_create(mnt_fd, NULL); 696 if (!ASSERT_GT(token_fd, 0, "create_token")) 697 return -EINVAL; 698 699 opts.bpf_token_path = NULL; 700 opts.bpf_token_fd = token_fd; 701 skel = priv_map__open_opts(&opts); 702 if (!ASSERT_OK_PTR(skel, "obj_token_fd_open")) 703 return -EINVAL; 704 705 /* we can close our token FD, bpf_object owns dup()'ed FD now */ 706 close(token_fd); 707 708 err = priv_map__load(skel); 709 priv_map__destroy(skel); 710 if (!ASSERT_OK(err, "obj_token_fd_load")) 711 return -EINVAL; 712 713 return 0; 714 } 715 716 static int userns_obj_priv_prog(int mnt_fd) 717 { 718 LIBBPF_OPTS(bpf_object_open_opts, opts); 719 char buf[256]; 720 struct priv_prog *skel; 721 int err; 722 723 skel = priv_prog__open_and_load(); 724 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 725 priv_prog__destroy(skel); 726 return -EINVAL; 727 } 728 729 /* use bpf_token_path to provide BPF FS path */ 730 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 731 opts.bpf_token_path = buf; 732 skel = priv_prog__open_opts(&opts); 733 if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) 734 return -EINVAL; 735 736 err = priv_prog__load(skel); 737 priv_prog__destroy(skel); 738 if (!ASSERT_OK(err, "obj_token_path_load")) 739 return -EINVAL; 740 741 return 0; 742 } 743 744 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, 745 * which should cause struct_ops application to fail, as BTF won't be uploaded 746 * into the kernel, even if STRUCT_OPS programs themselves are allowed 747 */ 748 static int validate_struct_ops_load(int mnt_fd, bool expect_success) 749 { 750 LIBBPF_OPTS(bpf_object_open_opts, opts); 751 char buf[256]; 752 struct dummy_st_ops_success *skel; 753 int err; 754 755 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 756 opts.bpf_token_path = buf; 757 skel = dummy_st_ops_success__open_opts(&opts); 758 if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) 759 return -EINVAL; 760 761 err = dummy_st_ops_success__load(skel); 762 dummy_st_ops_success__destroy(skel); 763 if (expect_success) { 764 if (!ASSERT_OK(err, "obj_token_path_load")) 765 return -EINVAL; 766 } else /* expect failure */ { 767 if (!ASSERT_ERR(err, "obj_token_path_load")) 768 return -EINVAL; 769 } 770 771 return 0; 772 } 773 774 static int userns_obj_priv_btf_fail(int mnt_fd) 775 { 776 return validate_struct_ops_load(mnt_fd, false /* should fail */); 777 } 778 779 static int userns_obj_priv_btf_success(int mnt_fd) 780 { 781 return validate_struct_ops_load(mnt_fd, true /* should succeed */); 782 } 783 784 #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" 785 #define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" 786 787 static int userns_obj_priv_implicit_token(int mnt_fd) 788 { 789 LIBBPF_OPTS(bpf_object_open_opts, opts); 790 struct dummy_st_ops_success *skel; 791 int err; 792 793 /* before we mount BPF FS with token delegation, struct_ops skeleton 794 * should fail to load 795 */ 796 skel = dummy_st_ops_success__open_and_load(); 797 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 798 dummy_st_ops_success__destroy(skel); 799 return -EINVAL; 800 } 801 802 /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF 803 * token automatically and implicitly 804 */ 805 err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); 806 if (!ASSERT_OK(err, "move_mount_bpffs")) 807 return -EINVAL; 808 809 /* disable implicit BPF token creation by setting 810 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail 811 */ 812 err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); 813 if (!ASSERT_OK(err, "setenv_token_path")) 814 return -EINVAL; 815 skel = dummy_st_ops_success__open_and_load(); 816 if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { 817 unsetenv(TOKEN_ENVVAR); 818 dummy_st_ops_success__destroy(skel); 819 return -EINVAL; 820 } 821 unsetenv(TOKEN_ENVVAR); 822 823 /* now the same struct_ops skeleton should succeed thanks to libppf 824 * creating BPF token from /sys/fs/bpf mount point 825 */ 826 skel = dummy_st_ops_success__open_and_load(); 827 if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) 828 return -EINVAL; 829 830 dummy_st_ops_success__destroy(skel); 831 832 /* now disable implicit token through empty bpf_token_path, should fail */ 833 opts.bpf_token_path = ""; 834 skel = dummy_st_ops_success__open_opts(&opts); 835 if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) 836 return -EINVAL; 837 838 err = dummy_st_ops_success__load(skel); 839 dummy_st_ops_success__destroy(skel); 840 if (!ASSERT_ERR(err, "obj_empty_token_path_load")) 841 return -EINVAL; 842 843 /* now disable implicit token through negative bpf_token_fd, should fail */ 844 opts.bpf_token_path = NULL; 845 opts.bpf_token_fd = -1; 846 skel = dummy_st_ops_success__open_opts(&opts); 847 if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) 848 return -EINVAL; 849 850 err = dummy_st_ops_success__load(skel); 851 dummy_st_ops_success__destroy(skel); 852 if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) 853 return -EINVAL; 854 855 return 0; 856 } 857 858 static int userns_obj_priv_implicit_token_envvar(int mnt_fd) 859 { 860 LIBBPF_OPTS(bpf_object_open_opts, opts); 861 struct dummy_st_ops_success *skel; 862 int err; 863 864 /* before we mount BPF FS with token delegation, struct_ops skeleton 865 * should fail to load 866 */ 867 skel = dummy_st_ops_success__open_and_load(); 868 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 869 dummy_st_ops_success__destroy(skel); 870 return -EINVAL; 871 } 872 873 /* mount custom BPF FS over custom location, so libbpf can't create 874 * BPF token implicitly, unless pointed to it through 875 * LIBBPF_BPF_TOKEN_PATH envvar 876 */ 877 rmdir(TOKEN_BPFFS_CUSTOM); 878 if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) 879 goto err_out; 880 err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); 881 if (!ASSERT_OK(err, "move_mount_bpffs")) 882 goto err_out; 883 884 /* even though we have BPF FS with delegation, it's not at default 885 * /sys/fs/bpf location, so we still fail to load until envvar is set up 886 */ 887 skel = dummy_st_ops_success__open_and_load(); 888 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { 889 dummy_st_ops_success__destroy(skel); 890 goto err_out; 891 } 892 893 err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); 894 if (!ASSERT_OK(err, "setenv_token_path")) 895 goto err_out; 896 897 /* now the same struct_ops skeleton should succeed thanks to libppf 898 * creating BPF token from custom mount point 899 */ 900 skel = dummy_st_ops_success__open_and_load(); 901 if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) 902 goto err_out; 903 904 dummy_st_ops_success__destroy(skel); 905 906 /* now disable implicit token through empty bpf_token_path, envvar 907 * will be ignored, should fail 908 */ 909 opts.bpf_token_path = ""; 910 skel = dummy_st_ops_success__open_opts(&opts); 911 if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) 912 goto err_out; 913 914 err = dummy_st_ops_success__load(skel); 915 dummy_st_ops_success__destroy(skel); 916 if (!ASSERT_ERR(err, "obj_empty_token_path_load")) 917 goto err_out; 918 919 /* now disable implicit token through negative bpf_token_fd, envvar 920 * will be ignored, should fail 921 */ 922 opts.bpf_token_path = NULL; 923 opts.bpf_token_fd = -1; 924 skel = dummy_st_ops_success__open_opts(&opts); 925 if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) 926 goto err_out; 927 928 err = dummy_st_ops_success__load(skel); 929 dummy_st_ops_success__destroy(skel); 930 if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) 931 goto err_out; 932 933 rmdir(TOKEN_BPFFS_CUSTOM); 934 unsetenv(TOKEN_ENVVAR); 935 return 0; 936 err_out: 937 rmdir(TOKEN_BPFFS_CUSTOM); 938 unsetenv(TOKEN_ENVVAR); 939 return -EINVAL; 940 } 941 942 #define bit(n) (1ULL << (n)) 943 944 void test_token(void) 945 { 946 if (test__start_subtest("map_token")) { 947 struct bpffs_opts opts = { 948 .cmds_str = "map_create", 949 .maps_str = "stack", 950 }; 951 952 subtest_userns(&opts, userns_map_create); 953 } 954 if (test__start_subtest("btf_token")) { 955 struct bpffs_opts opts = { 956 .cmds = 1ULL << BPF_BTF_LOAD, 957 }; 958 959 subtest_userns(&opts, userns_btf_load); 960 } 961 if (test__start_subtest("prog_token")) { 962 struct bpffs_opts opts = { 963 .cmds_str = "PROG_LOAD", 964 .progs_str = "XDP", 965 .attachs_str = "xdp", 966 }; 967 968 subtest_userns(&opts, userns_prog_load); 969 } 970 if (test__start_subtest("obj_priv_map")) { 971 struct bpffs_opts opts = { 972 .cmds = bit(BPF_MAP_CREATE), 973 .maps = bit(BPF_MAP_TYPE_QUEUE), 974 }; 975 976 subtest_userns(&opts, userns_obj_priv_map); 977 } 978 if (test__start_subtest("obj_priv_prog")) { 979 struct bpffs_opts opts = { 980 .cmds = bit(BPF_PROG_LOAD), 981 .progs = bit(BPF_PROG_TYPE_KPROBE), 982 .attachs = ~0ULL, 983 }; 984 985 subtest_userns(&opts, userns_obj_priv_prog); 986 } 987 if (test__start_subtest("obj_priv_btf_fail")) { 988 struct bpffs_opts opts = { 989 /* disallow BTF loading */ 990 .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 991 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 992 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 993 .attachs = ~0ULL, 994 }; 995 996 subtest_userns(&opts, userns_obj_priv_btf_fail); 997 } 998 if (test__start_subtest("obj_priv_btf_success")) { 999 struct bpffs_opts opts = { 1000 /* allow BTF loading */ 1001 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1002 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1003 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1004 .attachs = ~0ULL, 1005 }; 1006 1007 subtest_userns(&opts, userns_obj_priv_btf_success); 1008 } 1009 if (test__start_subtest("obj_priv_implicit_token")) { 1010 struct bpffs_opts opts = { 1011 /* allow BTF loading */ 1012 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1013 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1014 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1015 .attachs = ~0ULL, 1016 }; 1017 1018 subtest_userns(&opts, userns_obj_priv_implicit_token); 1019 } 1020 if (test__start_subtest("obj_priv_implicit_token_envvar")) { 1021 struct bpffs_opts opts = { 1022 /* allow BTF loading */ 1023 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1024 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1025 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1026 .attachs = ~0ULL, 1027 }; 1028 1029 subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); 1030 } 1031 } 1032