1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 #define _GNU_SOURCE 4 #include <test_progs.h> 5 #include <bpf/btf.h> 6 #include "cap_helpers.h" 7 #include <fcntl.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <unistd.h> 11 #include <linux/filter.h> 12 #include <linux/unistd.h> 13 #include <linux/mount.h> 14 #include <sys/socket.h> 15 #include <sys/stat.h> 16 #include <sys/syscall.h> 17 #include <sys/un.h> 18 #include "priv_map.skel.h" 19 #include "priv_prog.skel.h" 20 #include "dummy_st_ops_success.skel.h" 21 #include "token_lsm.skel.h" 22 23 static inline int sys_mount(const char *dev_name, const char *dir_name, 24 const char *type, unsigned long flags, 25 const void *data) 26 { 27 return syscall(__NR_mount, dev_name, dir_name, type, flags, data); 28 } 29 30 static inline int sys_fsopen(const char *fsname, unsigned flags) 31 { 32 return syscall(__NR_fsopen, fsname, flags); 33 } 34 35 static inline int sys_fspick(int dfd, const char *path, unsigned flags) 36 { 37 return syscall(__NR_fspick, dfd, path, flags); 38 } 39 40 static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) 41 { 42 return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); 43 } 44 45 static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) 46 { 47 return syscall(__NR_fsmount, fs_fd, flags, ms_flags); 48 } 49 50 static inline int sys_move_mount(int from_dfd, const char *from_path, 51 int to_dfd, const char *to_path, 52 unsigned flags) 53 { 54 return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); 55 } 56 57 static int drop_priv_caps(__u64 *old_caps) 58 { 59 return cap_disable_effective((1ULL << CAP_BPF) | 60 (1ULL << CAP_PERFMON) | 61 (1ULL << CAP_NET_ADMIN) | 62 (1ULL << CAP_SYS_ADMIN), old_caps); 63 } 64 65 static int restore_priv_caps(__u64 old_caps) 66 { 67 return cap_enable_effective(old_caps, NULL); 68 } 69 70 static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) 71 { 72 char buf[32]; 73 int err; 74 75 if (!mask_str) { 76 if (mask == ~0ULL) { 77 mask_str = "any"; 78 } else { 79 snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); 80 mask_str = buf; 81 } 82 } 83 84 err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, 85 mask_str, 0); 86 if (err < 0) 87 err = -errno; 88 return err; 89 } 90 91 #define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) 92 93 struct bpffs_opts { 94 __u64 cmds; 95 __u64 maps; 96 __u64 progs; 97 __u64 attachs; 98 const char *cmds_str; 99 const char *maps_str; 100 const char *progs_str; 101 const char *attachs_str; 102 }; 103 104 static int create_bpffs_fd(void) 105 { 106 int fs_fd; 107 108 /* create VFS context */ 109 fs_fd = sys_fsopen("bpf", 0); 110 ASSERT_GE(fs_fd, 0, "fs_fd"); 111 112 return fs_fd; 113 } 114 115 static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) 116 { 117 int mnt_fd, err; 118 119 /* set up token delegation mount options */ 120 err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); 121 if (!ASSERT_OK(err, "fs_cfg_cmds")) 122 return err; 123 err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); 124 if (!ASSERT_OK(err, "fs_cfg_maps")) 125 return err; 126 err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); 127 if (!ASSERT_OK(err, "fs_cfg_progs")) 128 return err; 129 err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); 130 if (!ASSERT_OK(err, "fs_cfg_attachs")) 131 return err; 132 133 /* instantiate FS object */ 134 err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); 135 if (err < 0) 136 return -errno; 137 138 /* create O_PATH fd for detached mount */ 139 mnt_fd = sys_fsmount(fs_fd, 0, 0); 140 if (err < 0) 141 return -errno; 142 143 return mnt_fd; 144 } 145 146 /* send FD over Unix domain (AF_UNIX) socket */ 147 static int sendfd(int sockfd, int fd) 148 { 149 struct msghdr msg = {}; 150 struct cmsghdr *cmsg; 151 int fds[1] = { fd }, err; 152 char iobuf[1]; 153 struct iovec io = { 154 .iov_base = iobuf, 155 .iov_len = sizeof(iobuf), 156 }; 157 union { 158 char buf[CMSG_SPACE(sizeof(fds))]; 159 struct cmsghdr align; 160 } u; 161 162 msg.msg_iov = &io; 163 msg.msg_iovlen = 1; 164 msg.msg_control = u.buf; 165 msg.msg_controllen = sizeof(u.buf); 166 cmsg = CMSG_FIRSTHDR(&msg); 167 cmsg->cmsg_level = SOL_SOCKET; 168 cmsg->cmsg_type = SCM_RIGHTS; 169 cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); 170 memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); 171 172 err = sendmsg(sockfd, &msg, 0); 173 if (err < 0) 174 err = -errno; 175 if (!ASSERT_EQ(err, 1, "sendmsg")) 176 return -EINVAL; 177 178 return 0; 179 } 180 181 /* receive FD over Unix domain (AF_UNIX) socket */ 182 static int recvfd(int sockfd, int *fd) 183 { 184 struct msghdr msg = {}; 185 struct cmsghdr *cmsg; 186 int fds[1], err; 187 char iobuf[1]; 188 struct iovec io = { 189 .iov_base = iobuf, 190 .iov_len = sizeof(iobuf), 191 }; 192 union { 193 char buf[CMSG_SPACE(sizeof(fds))]; 194 struct cmsghdr align; 195 } u; 196 197 msg.msg_iov = &io; 198 msg.msg_iovlen = 1; 199 msg.msg_control = u.buf; 200 msg.msg_controllen = sizeof(u.buf); 201 202 err = recvmsg(sockfd, &msg, 0); 203 if (err < 0) 204 err = -errno; 205 if (!ASSERT_EQ(err, 1, "recvmsg")) 206 return -EINVAL; 207 208 cmsg = CMSG_FIRSTHDR(&msg); 209 if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || 210 !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || 211 !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || 212 !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) 213 return -EINVAL; 214 215 memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); 216 *fd = fds[0]; 217 218 return 0; 219 } 220 221 static ssize_t write_nointr(int fd, const void *buf, size_t count) 222 { 223 ssize_t ret; 224 225 do { 226 ret = write(fd, buf, count); 227 } while (ret < 0 && errno == EINTR); 228 229 return ret; 230 } 231 232 static int write_file(const char *path, const void *buf, size_t count) 233 { 234 int fd; 235 ssize_t ret; 236 237 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); 238 if (fd < 0) 239 return -1; 240 241 ret = write_nointr(fd, buf, count); 242 close(fd); 243 if (ret < 0 || (size_t)ret != count) 244 return -1; 245 246 return 0; 247 } 248 249 static int create_and_enter_userns(void) 250 { 251 uid_t uid; 252 gid_t gid; 253 char map[100]; 254 255 uid = getuid(); 256 gid = getgid(); 257 258 if (unshare(CLONE_NEWUSER)) 259 return -1; 260 261 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && 262 errno != ENOENT) 263 return -1; 264 265 snprintf(map, sizeof(map), "0 %d 1", uid); 266 if (write_file("/proc/self/uid_map", map, strlen(map))) 267 return -1; 268 269 270 snprintf(map, sizeof(map), "0 %d 1", gid); 271 if (write_file("/proc/self/gid_map", map, strlen(map))) 272 return -1; 273 274 if (setgid(0)) 275 return -1; 276 277 if (setuid(0)) 278 return -1; 279 280 return 0; 281 } 282 283 typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel); 284 285 static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) 286 { 287 int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; 288 struct token_lsm *lsm_skel = NULL; 289 290 /* load and attach LSM "policy" before we go into unpriv userns */ 291 lsm_skel = token_lsm__open_and_load(); 292 if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) { 293 err = -EINVAL; 294 goto cleanup; 295 } 296 lsm_skel->bss->my_pid = getpid(); 297 err = token_lsm__attach(lsm_skel); 298 if (!ASSERT_OK(err, "lsm_skel_attach")) 299 goto cleanup; 300 301 /* setup userns with root mappings */ 302 err = create_and_enter_userns(); 303 if (!ASSERT_OK(err, "create_and_enter_userns")) 304 goto cleanup; 305 306 /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ 307 err = unshare(CLONE_NEWNS); 308 if (!ASSERT_OK(err, "create_mountns")) 309 goto cleanup; 310 311 err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); 312 if (!ASSERT_OK(err, "remount_root")) 313 goto cleanup; 314 315 fs_fd = create_bpffs_fd(); 316 if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { 317 err = -EINVAL; 318 goto cleanup; 319 } 320 321 /* ensure unprivileged child cannot set delegation options */ 322 err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); 323 ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); 324 err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); 325 ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); 326 err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); 327 ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); 328 err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); 329 ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); 330 331 /* pass BPF FS context object to parent */ 332 err = sendfd(sock_fd, fs_fd); 333 if (!ASSERT_OK(err, "send_fs_fd")) 334 goto cleanup; 335 zclose(fs_fd); 336 337 /* avoid mucking around with mount namespaces and mounting at 338 * well-known path, just get detach-mounted BPF FS fd back from parent 339 */ 340 err = recvfd(sock_fd, &mnt_fd); 341 if (!ASSERT_OK(err, "recv_mnt_fd")) 342 goto cleanup; 343 344 /* try to fspick() BPF FS and try to add some delegation options */ 345 fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); 346 if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { 347 err = -EINVAL; 348 goto cleanup; 349 } 350 351 /* ensure unprivileged child cannot reconfigure to set delegation options */ 352 err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); 353 if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { 354 err = -EINVAL; 355 goto cleanup; 356 } 357 err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); 358 if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { 359 err = -EINVAL; 360 goto cleanup; 361 } 362 err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); 363 if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { 364 err = -EINVAL; 365 goto cleanup; 366 } 367 err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); 368 if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { 369 err = -EINVAL; 370 goto cleanup; 371 } 372 zclose(fs_fd); 373 374 bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); 375 if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { 376 err = -EINVAL; 377 goto cleanup; 378 } 379 380 /* create BPF token FD and pass it to parent for some extra checks */ 381 token_fd = bpf_token_create(bpffs_fd, NULL); 382 if (!ASSERT_GT(token_fd, 0, "child_token_create")) { 383 err = -EINVAL; 384 goto cleanup; 385 } 386 err = sendfd(sock_fd, token_fd); 387 if (!ASSERT_OK(err, "send_token_fd")) 388 goto cleanup; 389 zclose(token_fd); 390 391 /* do custom test logic with customly set up BPF FS instance */ 392 err = callback(bpffs_fd, lsm_skel); 393 if (!ASSERT_OK(err, "test_callback")) 394 goto cleanup; 395 396 err = 0; 397 cleanup: 398 zclose(sock_fd); 399 zclose(mnt_fd); 400 zclose(fs_fd); 401 zclose(bpffs_fd); 402 zclose(token_fd); 403 404 lsm_skel->bss->my_pid = 0; 405 token_lsm__destroy(lsm_skel); 406 407 exit(-err); 408 } 409 410 static int wait_for_pid(pid_t pid) 411 { 412 int status, ret; 413 414 again: 415 ret = waitpid(pid, &status, 0); 416 if (ret == -1) { 417 if (errno == EINTR) 418 goto again; 419 420 return -1; 421 } 422 423 if (!WIFEXITED(status)) 424 return -1; 425 426 return WEXITSTATUS(status); 427 } 428 429 static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) 430 { 431 int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; 432 433 err = recvfd(sock_fd, &fs_fd); 434 if (!ASSERT_OK(err, "recv_bpffs_fd")) 435 goto cleanup; 436 437 mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); 438 if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { 439 err = -EINVAL; 440 goto cleanup; 441 } 442 zclose(fs_fd); 443 444 /* pass BPF FS context object to parent */ 445 err = sendfd(sock_fd, mnt_fd); 446 if (!ASSERT_OK(err, "send_mnt_fd")) 447 goto cleanup; 448 zclose(mnt_fd); 449 450 /* receive BPF token FD back from child for some extra tests */ 451 err = recvfd(sock_fd, &token_fd); 452 if (!ASSERT_OK(err, "recv_token_fd")) 453 goto cleanup; 454 455 err = wait_for_pid(child_pid); 456 ASSERT_OK(err, "waitpid_child"); 457 458 cleanup: 459 zclose(sock_fd); 460 zclose(fs_fd); 461 zclose(mnt_fd); 462 zclose(token_fd); 463 464 if (child_pid > 0) 465 (void)kill(child_pid, SIGKILL); 466 } 467 468 static void subtest_userns(struct bpffs_opts *bpffs_opts, 469 child_callback_fn child_cb) 470 { 471 int sock_fds[2] = { -1, -1 }; 472 int child_pid = 0, err; 473 474 err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); 475 if (!ASSERT_OK(err, "socketpair")) 476 goto cleanup; 477 478 child_pid = fork(); 479 if (!ASSERT_GE(child_pid, 0, "fork")) 480 goto cleanup; 481 482 if (child_pid == 0) { 483 zclose(sock_fds[0]); 484 return child(sock_fds[1], bpffs_opts, child_cb); 485 486 } else { 487 zclose(sock_fds[1]); 488 return parent(child_pid, bpffs_opts, sock_fds[0]); 489 } 490 491 cleanup: 492 zclose(sock_fds[0]); 493 zclose(sock_fds[1]); 494 if (child_pid > 0) 495 (void)kill(child_pid, SIGKILL); 496 } 497 498 static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel) 499 { 500 LIBBPF_OPTS(bpf_map_create_opts, map_opts); 501 int err, token_fd = -1, map_fd = -1; 502 __u64 old_caps = 0; 503 504 /* create BPF token from BPF FS mount */ 505 token_fd = bpf_token_create(mnt_fd, NULL); 506 if (!ASSERT_GT(token_fd, 0, "token_create")) { 507 err = -EINVAL; 508 goto cleanup; 509 } 510 511 /* while inside non-init userns, we need both a BPF token *and* 512 * CAP_BPF inside current userns to create privileged map; let's test 513 * that neither BPF token alone nor namespaced CAP_BPF is sufficient 514 */ 515 err = drop_priv_caps(&old_caps); 516 if (!ASSERT_OK(err, "drop_caps")) 517 goto cleanup; 518 519 /* no token, no CAP_BPF -> fail */ 520 map_opts.map_flags = 0; 521 map_opts.token_fd = 0; 522 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); 523 if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { 524 err = -EINVAL; 525 goto cleanup; 526 } 527 528 /* token without CAP_BPF -> fail */ 529 map_opts.map_flags = BPF_F_TOKEN_FD; 530 map_opts.token_fd = token_fd; 531 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); 532 if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { 533 err = -EINVAL; 534 goto cleanup; 535 } 536 537 /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ 538 err = restore_priv_caps(old_caps); 539 if (!ASSERT_OK(err, "restore_caps")) 540 goto cleanup; 541 542 /* CAP_BPF without token -> fail */ 543 map_opts.map_flags = 0; 544 map_opts.token_fd = 0; 545 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); 546 if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { 547 err = -EINVAL; 548 goto cleanup; 549 } 550 551 /* finally, namespaced CAP_BPF + token -> success */ 552 map_opts.map_flags = BPF_F_TOKEN_FD; 553 map_opts.token_fd = token_fd; 554 map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); 555 if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { 556 err = -EINVAL; 557 goto cleanup; 558 } 559 560 cleanup: 561 zclose(token_fd); 562 zclose(map_fd); 563 return err; 564 } 565 566 static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel) 567 { 568 LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); 569 int err, token_fd = -1, btf_fd = -1; 570 const void *raw_btf_data; 571 struct btf *btf = NULL; 572 __u32 raw_btf_size; 573 __u64 old_caps = 0; 574 575 /* create BPF token from BPF FS mount */ 576 token_fd = bpf_token_create(mnt_fd, NULL); 577 if (!ASSERT_GT(token_fd, 0, "token_create")) { 578 err = -EINVAL; 579 goto cleanup; 580 } 581 582 /* while inside non-init userns, we need both a BPF token *and* 583 * CAP_BPF inside current userns to create privileged map; let's test 584 * that neither BPF token alone nor namespaced CAP_BPF is sufficient 585 */ 586 err = drop_priv_caps(&old_caps); 587 if (!ASSERT_OK(err, "drop_caps")) 588 goto cleanup; 589 590 /* setup a trivial BTF data to load to the kernel */ 591 btf = btf__new_empty(); 592 if (!ASSERT_OK_PTR(btf, "empty_btf")) 593 goto cleanup; 594 595 ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); 596 597 raw_btf_data = btf__raw_data(btf, &raw_btf_size); 598 if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) 599 goto cleanup; 600 601 /* no token + no CAP_BPF -> failure */ 602 btf_opts.btf_flags = 0; 603 btf_opts.token_fd = 0; 604 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); 605 if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) 606 goto cleanup; 607 608 /* token + no CAP_BPF -> failure */ 609 btf_opts.btf_flags = BPF_F_TOKEN_FD; 610 btf_opts.token_fd = token_fd; 611 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); 612 if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) 613 goto cleanup; 614 615 /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ 616 err = restore_priv_caps(old_caps); 617 if (!ASSERT_OK(err, "restore_caps")) 618 goto cleanup; 619 620 /* token + CAP_BPF -> success */ 621 btf_opts.btf_flags = BPF_F_TOKEN_FD; 622 btf_opts.token_fd = token_fd; 623 btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); 624 if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) 625 goto cleanup; 626 627 err = 0; 628 cleanup: 629 btf__free(btf); 630 zclose(btf_fd); 631 zclose(token_fd); 632 return err; 633 } 634 635 static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel) 636 { 637 LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); 638 int err, token_fd = -1, prog_fd = -1; 639 struct bpf_insn insns[] = { 640 /* bpf_jiffies64() requires CAP_BPF */ 641 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), 642 /* bpf_get_current_task() requires CAP_PERFMON */ 643 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), 644 /* r0 = 0; exit; */ 645 BPF_MOV64_IMM(BPF_REG_0, 0), 646 BPF_EXIT_INSN(), 647 }; 648 size_t insn_cnt = ARRAY_SIZE(insns); 649 __u64 old_caps = 0; 650 651 /* create BPF token from BPF FS mount */ 652 token_fd = bpf_token_create(mnt_fd, NULL); 653 if (!ASSERT_GT(token_fd, 0, "token_create")) { 654 err = -EINVAL; 655 goto cleanup; 656 } 657 658 /* validate we can successfully load BPF program with token; this 659 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) 660 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have 661 * BPF token wired properly in a bunch of places in the kernel 662 */ 663 prog_opts.prog_flags = BPF_F_TOKEN_FD; 664 prog_opts.token_fd = token_fd; 665 prog_opts.expected_attach_type = BPF_XDP; 666 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 667 insns, insn_cnt, &prog_opts); 668 if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { 669 err = -EPERM; 670 goto cleanup; 671 } 672 673 /* no token + caps -> failure */ 674 prog_opts.prog_flags = 0; 675 prog_opts.token_fd = 0; 676 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 677 insns, insn_cnt, &prog_opts); 678 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { 679 err = -EPERM; 680 goto cleanup; 681 } 682 683 err = drop_priv_caps(&old_caps); 684 if (!ASSERT_OK(err, "drop_caps")) 685 goto cleanup; 686 687 /* no caps + token -> failure */ 688 prog_opts.prog_flags = BPF_F_TOKEN_FD; 689 prog_opts.token_fd = token_fd; 690 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 691 insns, insn_cnt, &prog_opts); 692 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { 693 err = -EPERM; 694 goto cleanup; 695 } 696 697 /* no caps + no token -> definitely a failure */ 698 prog_opts.prog_flags = 0; 699 prog_opts.token_fd = 0; 700 prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", 701 insns, insn_cnt, &prog_opts); 702 if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { 703 err = -EPERM; 704 goto cleanup; 705 } 706 707 err = 0; 708 cleanup: 709 zclose(prog_fd); 710 zclose(token_fd); 711 return err; 712 } 713 714 static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel) 715 { 716 LIBBPF_OPTS(bpf_object_open_opts, opts); 717 char buf[256]; 718 struct priv_map *skel; 719 int err; 720 721 skel = priv_map__open_and_load(); 722 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 723 priv_map__destroy(skel); 724 return -EINVAL; 725 } 726 727 /* use bpf_token_path to provide BPF FS path */ 728 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 729 opts.bpf_token_path = buf; 730 skel = priv_map__open_opts(&opts); 731 if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) 732 return -EINVAL; 733 734 err = priv_map__load(skel); 735 priv_map__destroy(skel); 736 if (!ASSERT_OK(err, "obj_token_path_load")) 737 return -EINVAL; 738 739 return 0; 740 } 741 742 static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel) 743 { 744 LIBBPF_OPTS(bpf_object_open_opts, opts); 745 char buf[256]; 746 struct priv_prog *skel; 747 int err; 748 749 skel = priv_prog__open_and_load(); 750 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 751 priv_prog__destroy(skel); 752 return -EINVAL; 753 } 754 755 /* use bpf_token_path to provide BPF FS path */ 756 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 757 opts.bpf_token_path = buf; 758 skel = priv_prog__open_opts(&opts); 759 if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) 760 return -EINVAL; 761 err = priv_prog__load(skel); 762 priv_prog__destroy(skel); 763 if (!ASSERT_OK(err, "obj_token_path_load")) 764 return -EINVAL; 765 766 /* provide BPF token, but reject bpf_token_capable() with LSM */ 767 lsm_skel->bss->reject_capable = true; 768 lsm_skel->bss->reject_cmd = false; 769 skel = priv_prog__open_opts(&opts); 770 if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open")) 771 return -EINVAL; 772 err = priv_prog__load(skel); 773 priv_prog__destroy(skel); 774 if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load")) 775 return -EINVAL; 776 777 /* provide BPF token, but reject bpf_token_cmd() with LSM */ 778 lsm_skel->bss->reject_capable = false; 779 lsm_skel->bss->reject_cmd = true; 780 skel = priv_prog__open_opts(&opts); 781 if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open")) 782 return -EINVAL; 783 err = priv_prog__load(skel); 784 priv_prog__destroy(skel); 785 if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load")) 786 return -EINVAL; 787 788 return 0; 789 } 790 791 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, 792 * which should cause struct_ops application to fail, as BTF won't be uploaded 793 * into the kernel, even if STRUCT_OPS programs themselves are allowed 794 */ 795 static int validate_struct_ops_load(int mnt_fd, bool expect_success) 796 { 797 LIBBPF_OPTS(bpf_object_open_opts, opts); 798 char buf[256]; 799 struct dummy_st_ops_success *skel; 800 int err; 801 802 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 803 opts.bpf_token_path = buf; 804 skel = dummy_st_ops_success__open_opts(&opts); 805 if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) 806 return -EINVAL; 807 808 err = dummy_st_ops_success__load(skel); 809 dummy_st_ops_success__destroy(skel); 810 if (expect_success) { 811 if (!ASSERT_OK(err, "obj_token_path_load")) 812 return -EINVAL; 813 } else /* expect failure */ { 814 if (!ASSERT_ERR(err, "obj_token_path_load")) 815 return -EINVAL; 816 } 817 818 return 0; 819 } 820 821 static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel) 822 { 823 return validate_struct_ops_load(mnt_fd, false /* should fail */); 824 } 825 826 static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel) 827 { 828 return validate_struct_ops_load(mnt_fd, true /* should succeed */); 829 } 830 831 #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" 832 #define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" 833 834 static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel) 835 { 836 LIBBPF_OPTS(bpf_object_open_opts, opts); 837 struct dummy_st_ops_success *skel; 838 int err; 839 840 /* before we mount BPF FS with token delegation, struct_ops skeleton 841 * should fail to load 842 */ 843 skel = dummy_st_ops_success__open_and_load(); 844 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 845 dummy_st_ops_success__destroy(skel); 846 return -EINVAL; 847 } 848 849 /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF 850 * token automatically and implicitly 851 */ 852 err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); 853 if (!ASSERT_OK(err, "move_mount_bpffs")) 854 return -EINVAL; 855 856 /* disable implicit BPF token creation by setting 857 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail 858 */ 859 err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); 860 if (!ASSERT_OK(err, "setenv_token_path")) 861 return -EINVAL; 862 skel = dummy_st_ops_success__open_and_load(); 863 if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { 864 unsetenv(TOKEN_ENVVAR); 865 dummy_st_ops_success__destroy(skel); 866 return -EINVAL; 867 } 868 unsetenv(TOKEN_ENVVAR); 869 870 /* now the same struct_ops skeleton should succeed thanks to libbpf 871 * creating BPF token from /sys/fs/bpf mount point 872 */ 873 skel = dummy_st_ops_success__open_and_load(); 874 if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) 875 return -EINVAL; 876 877 dummy_st_ops_success__destroy(skel); 878 879 /* now disable implicit token through empty bpf_token_path, should fail */ 880 opts.bpf_token_path = ""; 881 skel = dummy_st_ops_success__open_opts(&opts); 882 if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) 883 return -EINVAL; 884 885 err = dummy_st_ops_success__load(skel); 886 dummy_st_ops_success__destroy(skel); 887 if (!ASSERT_ERR(err, "obj_empty_token_path_load")) 888 return -EINVAL; 889 890 return 0; 891 } 892 893 static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel) 894 { 895 LIBBPF_OPTS(bpf_object_open_opts, opts); 896 struct dummy_st_ops_success *skel; 897 int err; 898 899 /* before we mount BPF FS with token delegation, struct_ops skeleton 900 * should fail to load 901 */ 902 skel = dummy_st_ops_success__open_and_load(); 903 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { 904 dummy_st_ops_success__destroy(skel); 905 return -EINVAL; 906 } 907 908 /* mount custom BPF FS over custom location, so libbpf can't create 909 * BPF token implicitly, unless pointed to it through 910 * LIBBPF_BPF_TOKEN_PATH envvar 911 */ 912 rmdir(TOKEN_BPFFS_CUSTOM); 913 if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) 914 goto err_out; 915 err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); 916 if (!ASSERT_OK(err, "move_mount_bpffs")) 917 goto err_out; 918 919 /* even though we have BPF FS with delegation, it's not at default 920 * /sys/fs/bpf location, so we still fail to load until envvar is set up 921 */ 922 skel = dummy_st_ops_success__open_and_load(); 923 if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { 924 dummy_st_ops_success__destroy(skel); 925 goto err_out; 926 } 927 928 err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); 929 if (!ASSERT_OK(err, "setenv_token_path")) 930 goto err_out; 931 932 /* now the same struct_ops skeleton should succeed thanks to libbpf 933 * creating BPF token from custom mount point 934 */ 935 skel = dummy_st_ops_success__open_and_load(); 936 if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) 937 goto err_out; 938 939 dummy_st_ops_success__destroy(skel); 940 941 /* now disable implicit token through empty bpf_token_path, envvar 942 * will be ignored, should fail 943 */ 944 opts.bpf_token_path = ""; 945 skel = dummy_st_ops_success__open_opts(&opts); 946 if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) 947 goto err_out; 948 949 err = dummy_st_ops_success__load(skel); 950 dummy_st_ops_success__destroy(skel); 951 if (!ASSERT_ERR(err, "obj_empty_token_path_load")) 952 goto err_out; 953 954 rmdir(TOKEN_BPFFS_CUSTOM); 955 unsetenv(TOKEN_ENVVAR); 956 return 0; 957 err_out: 958 rmdir(TOKEN_BPFFS_CUSTOM); 959 unsetenv(TOKEN_ENVVAR); 960 return -EINVAL; 961 } 962 963 #define bit(n) (1ULL << (n)) 964 965 void test_token(void) 966 { 967 if (test__start_subtest("map_token")) { 968 struct bpffs_opts opts = { 969 .cmds_str = "map_create", 970 .maps_str = "stack", 971 }; 972 973 subtest_userns(&opts, userns_map_create); 974 } 975 if (test__start_subtest("btf_token")) { 976 struct bpffs_opts opts = { 977 .cmds = 1ULL << BPF_BTF_LOAD, 978 }; 979 980 subtest_userns(&opts, userns_btf_load); 981 } 982 if (test__start_subtest("prog_token")) { 983 struct bpffs_opts opts = { 984 .cmds_str = "PROG_LOAD", 985 .progs_str = "XDP", 986 .attachs_str = "xdp", 987 }; 988 989 subtest_userns(&opts, userns_prog_load); 990 } 991 if (test__start_subtest("obj_priv_map")) { 992 struct bpffs_opts opts = { 993 .cmds = bit(BPF_MAP_CREATE), 994 .maps = bit(BPF_MAP_TYPE_QUEUE), 995 }; 996 997 subtest_userns(&opts, userns_obj_priv_map); 998 } 999 if (test__start_subtest("obj_priv_prog")) { 1000 struct bpffs_opts opts = { 1001 .cmds = bit(BPF_PROG_LOAD), 1002 .progs = bit(BPF_PROG_TYPE_KPROBE), 1003 .attachs = ~0ULL, 1004 }; 1005 1006 subtest_userns(&opts, userns_obj_priv_prog); 1007 } 1008 if (test__start_subtest("obj_priv_btf_fail")) { 1009 struct bpffs_opts opts = { 1010 /* disallow BTF loading */ 1011 .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1012 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1013 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1014 .attachs = ~0ULL, 1015 }; 1016 1017 subtest_userns(&opts, userns_obj_priv_btf_fail); 1018 } 1019 if (test__start_subtest("obj_priv_btf_success")) { 1020 struct bpffs_opts opts = { 1021 /* allow BTF loading */ 1022 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1023 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1024 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1025 .attachs = ~0ULL, 1026 }; 1027 1028 subtest_userns(&opts, userns_obj_priv_btf_success); 1029 } 1030 if (test__start_subtest("obj_priv_implicit_token")) { 1031 struct bpffs_opts opts = { 1032 /* allow BTF loading */ 1033 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1034 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1035 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1036 .attachs = ~0ULL, 1037 }; 1038 1039 subtest_userns(&opts, userns_obj_priv_implicit_token); 1040 } 1041 if (test__start_subtest("obj_priv_implicit_token_envvar")) { 1042 struct bpffs_opts opts = { 1043 /* allow BTF loading */ 1044 .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), 1045 .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), 1046 .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), 1047 .attachs = ~0ULL, 1048 }; 1049 1050 subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); 1051 } 1052 } 1053