1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Minimal file system backend for holding eBPF maps and programs, 4 * used by bpf(2) object pinning. 5 * 6 * Authors: 7 * 8 * Daniel Borkmann <daniel@iogearbox.net> 9 */ 10 11 #include <linux/init.h> 12 #include <linux/magic.h> 13 #include <linux/major.h> 14 #include <linux/mount.h> 15 #include <linux/namei.h> 16 #include <linux/fs.h> 17 #include <linux/fs_context.h> 18 #include <linux/fs_parser.h> 19 #include <linux/kdev_t.h> 20 #include <linux/filter.h> 21 #include <linux/bpf.h> 22 #include <linux/bpf_trace.h> 23 #include <linux/kstrtox.h> 24 #include "preload/bpf_preload.h" 25 26 enum bpf_type { 27 BPF_TYPE_UNSPEC = 0, 28 BPF_TYPE_PROG, 29 BPF_TYPE_MAP, 30 BPF_TYPE_LINK, 31 }; 32 33 static void *bpf_any_get(void *raw, enum bpf_type type) 34 { 35 switch (type) { 36 case BPF_TYPE_PROG: 37 bpf_prog_inc(raw); 38 break; 39 case BPF_TYPE_MAP: 40 bpf_map_inc_with_uref(raw); 41 break; 42 case BPF_TYPE_LINK: 43 bpf_link_inc(raw); 44 break; 45 default: 46 WARN_ON_ONCE(1); 47 break; 48 } 49 50 return raw; 51 } 52 53 static void bpf_any_put(void *raw, enum bpf_type type) 54 { 55 switch (type) { 56 case BPF_TYPE_PROG: 57 bpf_prog_put(raw); 58 break; 59 case BPF_TYPE_MAP: 60 bpf_map_put_with_uref(raw); 61 break; 62 case BPF_TYPE_LINK: 63 bpf_link_put(raw); 64 break; 65 default: 66 WARN_ON_ONCE(1); 67 break; 68 } 69 } 70 71 static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) 72 { 73 void *raw; 74 75 raw = bpf_map_get_with_uref(ufd); 76 if (!IS_ERR(raw)) { 77 *type = BPF_TYPE_MAP; 78 return raw; 79 } 80 81 raw = bpf_prog_get(ufd); 82 if (!IS_ERR(raw)) { 83 *type = BPF_TYPE_PROG; 84 return raw; 85 } 86 87 raw = bpf_link_get_from_fd(ufd); 88 if (!IS_ERR(raw)) { 89 *type = BPF_TYPE_LINK; 90 return raw; 91 } 92 93 return ERR_PTR(-EINVAL); 94 } 95 96 static const struct inode_operations bpf_dir_iops; 97 98 static const struct inode_operations bpf_prog_iops = { }; 99 static const struct inode_operations bpf_map_iops = { }; 100 static const struct inode_operations bpf_link_iops = { }; 101 102 struct inode *bpf_get_inode(struct super_block *sb, 103 const struct inode *dir, 104 umode_t mode) 105 { 106 struct inode *inode; 107 108 switch (mode & S_IFMT) { 109 case S_IFDIR: 110 case S_IFREG: 111 case S_IFLNK: 112 break; 113 default: 114 return ERR_PTR(-EINVAL); 115 } 116 117 inode = new_inode(sb); 118 if (!inode) 119 return ERR_PTR(-ENOSPC); 120 121 inode->i_ino = get_next_ino(); 122 simple_inode_init_ts(inode); 123 124 inode_init_owner(&nop_mnt_idmap, inode, dir, mode); 125 126 return inode; 127 } 128 129 static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) 130 { 131 *type = BPF_TYPE_UNSPEC; 132 if (inode->i_op == &bpf_prog_iops) 133 *type = BPF_TYPE_PROG; 134 else if (inode->i_op == &bpf_map_iops) 135 *type = BPF_TYPE_MAP; 136 else if (inode->i_op == &bpf_link_iops) 137 *type = BPF_TYPE_LINK; 138 else 139 return -EACCES; 140 141 return 0; 142 } 143 144 static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, 145 struct inode *dir) 146 { 147 d_make_persistent(dentry, inode); 148 149 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 150 } 151 152 static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, 153 struct dentry *dentry, umode_t mode) 154 { 155 struct inode *inode; 156 157 inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); 158 if (IS_ERR(inode)) 159 return ERR_CAST(inode); 160 161 inode->i_op = &bpf_dir_iops; 162 inode->i_fop = &simple_dir_operations; 163 164 inc_nlink(inode); 165 inc_nlink(dir); 166 167 bpf_dentry_finalize(dentry, inode, dir); 168 return NULL; 169 } 170 171 struct map_iter { 172 void *key; 173 bool done; 174 }; 175 176 static struct map_iter *map_iter(struct seq_file *m) 177 { 178 return m->private; 179 } 180 181 static struct bpf_map *seq_file_to_map(struct seq_file *m) 182 { 183 return file_inode(m->file)->i_private; 184 } 185 186 static void map_iter_free(struct map_iter *iter) 187 { 188 if (iter) { 189 kfree(iter->key); 190 kfree(iter); 191 } 192 } 193 194 static struct map_iter *map_iter_alloc(struct bpf_map *map) 195 { 196 struct map_iter *iter; 197 198 iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); 199 if (!iter) 200 goto error; 201 202 iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); 203 if (!iter->key) 204 goto error; 205 206 return iter; 207 208 error: 209 map_iter_free(iter); 210 return NULL; 211 } 212 213 static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) 214 { 215 struct bpf_map *map = seq_file_to_map(m); 216 void *key = map_iter(m)->key; 217 void *prev_key; 218 219 (*pos)++; 220 if (map_iter(m)->done) 221 return NULL; 222 223 if (unlikely(v == SEQ_START_TOKEN)) 224 prev_key = NULL; 225 else 226 prev_key = key; 227 228 rcu_read_lock(); 229 if (map->ops->map_get_next_key(map, prev_key, key)) { 230 map_iter(m)->done = true; 231 key = NULL; 232 } 233 rcu_read_unlock(); 234 return key; 235 } 236 237 static void *map_seq_start(struct seq_file *m, loff_t *pos) 238 { 239 if (map_iter(m)->done) 240 return NULL; 241 242 return *pos ? map_iter(m)->key : SEQ_START_TOKEN; 243 } 244 245 static void map_seq_stop(struct seq_file *m, void *v) 246 { 247 } 248 249 static int map_seq_show(struct seq_file *m, void *v) 250 { 251 struct bpf_map *map = seq_file_to_map(m); 252 void *key = map_iter(m)->key; 253 254 if (unlikely(v == SEQ_START_TOKEN)) { 255 seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); 256 seq_puts(m, "# WARNING!! The output format will change\n"); 257 } else { 258 map->ops->map_seq_show_elem(map, key, m); 259 } 260 261 return 0; 262 } 263 264 static const struct seq_operations bpffs_map_seq_ops = { 265 .start = map_seq_start, 266 .next = map_seq_next, 267 .show = map_seq_show, 268 .stop = map_seq_stop, 269 }; 270 271 static int bpffs_map_open(struct inode *inode, struct file *file) 272 { 273 struct bpf_map *map = inode->i_private; 274 struct map_iter *iter; 275 struct seq_file *m; 276 int err; 277 278 iter = map_iter_alloc(map); 279 if (!iter) 280 return -ENOMEM; 281 282 err = seq_open(file, &bpffs_map_seq_ops); 283 if (err) { 284 map_iter_free(iter); 285 return err; 286 } 287 288 m = file->private_data; 289 m->private = iter; 290 291 return 0; 292 } 293 294 static int bpffs_map_release(struct inode *inode, struct file *file) 295 { 296 struct seq_file *m = file->private_data; 297 298 map_iter_free(map_iter(m)); 299 300 return seq_release(inode, file); 301 } 302 303 /* bpffs_map_fops should only implement the basic 304 * read operation for a BPF map. The purpose is to 305 * provide a simple user intuitive way to do 306 * "cat bpffs/pathto/a-pinned-map". 307 * 308 * Other operations (e.g. write, lookup...) should be realized by 309 * the userspace tools (e.g. bpftool) through the 310 * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update 311 * interface. 312 */ 313 static const struct file_operations bpffs_map_fops = { 314 .open = bpffs_map_open, 315 .read = seq_read, 316 .release = bpffs_map_release, 317 }; 318 319 static int bpffs_obj_open(struct inode *inode, struct file *file) 320 { 321 return -EIO; 322 } 323 324 static const struct file_operations bpffs_obj_fops = { 325 .open = bpffs_obj_open, 326 }; 327 328 static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, 329 const struct inode_operations *iops, 330 const struct file_operations *fops) 331 { 332 struct inode *dir = dentry->d_parent->d_inode; 333 struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); 334 if (IS_ERR(inode)) 335 return PTR_ERR(inode); 336 337 inode->i_op = iops; 338 inode->i_fop = fops; 339 inode->i_private = raw; 340 341 bpf_dentry_finalize(dentry, inode, dir); 342 return 0; 343 } 344 345 static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) 346 { 347 return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, 348 &bpffs_obj_fops); 349 } 350 351 static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) 352 { 353 struct bpf_map *map = arg; 354 355 return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, 356 bpf_map_support_seq_show(map) ? 357 &bpffs_map_fops : &bpffs_obj_fops); 358 } 359 360 static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) 361 { 362 struct bpf_link *link = arg; 363 364 return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, 365 bpf_link_is_iter(link) ? 366 &bpf_iter_fops : &bpffs_obj_fops); 367 } 368 369 static struct dentry * 370 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) 371 { 372 /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future 373 * extensions. That allows popoulate_bpffs() create special files. 374 */ 375 if ((dir->i_mode & S_IALLUGO) && 376 strchr(dentry->d_name.name, '.')) 377 return ERR_PTR(-EPERM); 378 379 return simple_lookup(dir, dentry, flags); 380 } 381 382 static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, 383 struct dentry *dentry, const char *target) 384 { 385 char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); 386 struct inode *inode; 387 388 if (!link) 389 return -ENOMEM; 390 391 inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); 392 if (IS_ERR(inode)) { 393 kfree(link); 394 return PTR_ERR(inode); 395 } 396 397 inode->i_op = &simple_symlink_inode_operations; 398 inode->i_link = link; 399 400 bpf_dentry_finalize(dentry, inode, dir); 401 return 0; 402 } 403 404 static const struct inode_operations bpf_dir_iops = { 405 .lookup = bpf_lookup, 406 .mkdir = bpf_mkdir, 407 .symlink = bpf_symlink, 408 .rmdir = simple_rmdir, 409 .rename = simple_rename, 410 .link = simple_link, 411 .unlink = simple_unlink, 412 }; 413 414 /* pin iterator link into bpffs */ 415 static int bpf_iter_link_pin_kernel(struct dentry *parent, 416 const char *name, struct bpf_link *link) 417 { 418 umode_t mode = S_IFREG | S_IRUSR; 419 struct dentry *dentry; 420 int ret; 421 422 dentry = simple_start_creating(parent, name); 423 if (IS_ERR(dentry)) 424 return PTR_ERR(dentry); 425 ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, 426 &bpf_iter_fops); 427 simple_done_creating(dentry); 428 return ret; 429 } 430 431 static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, 432 enum bpf_type type) 433 { 434 struct dentry *dentry; 435 struct inode *dir; 436 struct path path; 437 umode_t mode; 438 int ret; 439 440 dentry = start_creating_user_path(path_fd, pathname, &path, 0); 441 if (IS_ERR(dentry)) 442 return PTR_ERR(dentry); 443 444 dir = d_inode(path.dentry); 445 if (dir->i_op != &bpf_dir_iops) { 446 ret = -EPERM; 447 goto out; 448 } 449 450 mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 451 ret = security_path_mknod(&path, dentry, mode, 0); 452 if (ret) 453 goto out; 454 455 switch (type) { 456 case BPF_TYPE_PROG: 457 ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); 458 break; 459 case BPF_TYPE_MAP: 460 ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); 461 break; 462 case BPF_TYPE_LINK: 463 ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); 464 break; 465 default: 466 ret = -EPERM; 467 } 468 out: 469 end_creating_path(&path, dentry); 470 return ret; 471 } 472 473 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) 474 { 475 enum bpf_type type; 476 void *raw; 477 int ret; 478 479 raw = bpf_fd_probe_obj(ufd, &type); 480 if (IS_ERR(raw)) 481 return PTR_ERR(raw); 482 483 ret = bpf_obj_do_pin(path_fd, pathname, raw, type); 484 if (ret != 0) 485 bpf_any_put(raw, type); 486 487 return ret; 488 } 489 490 static void *bpf_obj_do_get(int path_fd, const char __user *pathname, 491 enum bpf_type *type, int flags) 492 { 493 struct inode *inode; 494 struct path path; 495 void *raw; 496 int ret; 497 498 ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); 499 if (ret) 500 return ERR_PTR(ret); 501 502 inode = d_backing_inode(path.dentry); 503 ret = path_permission(&path, ACC_MODE(flags)); 504 if (ret) 505 goto out; 506 507 ret = bpf_inode_type(inode, type); 508 if (ret) 509 goto out; 510 511 raw = bpf_any_get(inode->i_private, *type); 512 if (!IS_ERR(raw)) 513 touch_atime(&path); 514 515 path_put(&path); 516 return raw; 517 out: 518 path_put(&path); 519 return ERR_PTR(ret); 520 } 521 522 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) 523 { 524 enum bpf_type type = BPF_TYPE_UNSPEC; 525 int f_flags; 526 void *raw; 527 int ret; 528 529 f_flags = bpf_get_file_flag(flags); 530 if (f_flags < 0) 531 return f_flags; 532 533 raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); 534 if (IS_ERR(raw)) 535 return PTR_ERR(raw); 536 537 if (type == BPF_TYPE_PROG) 538 ret = bpf_prog_new_fd(raw); 539 else if (type == BPF_TYPE_MAP) 540 ret = bpf_map_new_fd(raw, f_flags); 541 else if (type == BPF_TYPE_LINK) 542 ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); 543 else 544 return -ENOENT; 545 546 if (ret < 0) 547 bpf_any_put(raw, type); 548 return ret; 549 } 550 551 static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) 552 { 553 struct bpf_prog *prog; 554 int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); 555 if (ret) 556 return ERR_PTR(ret); 557 558 if (inode->i_op == &bpf_map_iops) 559 return ERR_PTR(-EINVAL); 560 if (inode->i_op == &bpf_link_iops) 561 return ERR_PTR(-EINVAL); 562 if (inode->i_op != &bpf_prog_iops) 563 return ERR_PTR(-EACCES); 564 565 prog = inode->i_private; 566 567 ret = security_bpf_prog(prog); 568 if (ret < 0) 569 return ERR_PTR(ret); 570 571 if (!bpf_prog_get_ok(prog, &type, false)) 572 return ERR_PTR(-EINVAL); 573 574 bpf_prog_inc(prog); 575 return prog; 576 } 577 578 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) 579 { 580 struct bpf_prog *prog; 581 struct path path; 582 int ret = kern_path(name, LOOKUP_FOLLOW, &path); 583 if (ret) 584 return ERR_PTR(ret); 585 prog = __get_prog_inode(d_backing_inode(path.dentry), type); 586 if (!IS_ERR(prog)) 587 touch_atime(&path); 588 path_put(&path); 589 return prog; 590 } 591 EXPORT_SYMBOL(bpf_prog_get_type_path); 592 593 struct bpffs_btf_enums { 594 const struct btf *btf; 595 const struct btf_type *cmd_t; 596 const struct btf_type *map_t; 597 const struct btf_type *prog_t; 598 const struct btf_type *attach_t; 599 }; 600 601 static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) 602 { 603 struct { 604 const struct btf_type **type; 605 const char *name; 606 } btf_enums[] = { 607 {&info->cmd_t, "bpf_cmd"}, 608 {&info->map_t, "bpf_map_type"}, 609 {&info->prog_t, "bpf_prog_type"}, 610 {&info->attach_t, "bpf_attach_type"}, 611 }; 612 const struct btf *btf; 613 int i, id; 614 615 memset(info, 0, sizeof(*info)); 616 617 btf = bpf_get_btf_vmlinux(); 618 if (IS_ERR(btf)) 619 return PTR_ERR(btf); 620 if (!btf) 621 return -ENOENT; 622 623 info->btf = btf; 624 625 for (i = 0; i < ARRAY_SIZE(btf_enums); i++) { 626 id = btf_find_by_name_kind(btf, btf_enums[i].name, 627 BTF_KIND_ENUM); 628 if (id < 0) 629 return -ESRCH; 630 631 *btf_enums[i].type = btf_type_by_id(btf, id); 632 } 633 634 return 0; 635 } 636 637 static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, 638 const char *prefix, const char *str, int *value) 639 { 640 const struct btf_enum *e; 641 const char *name; 642 int i, n, pfx_len = strlen(prefix); 643 644 *value = 0; 645 646 if (!btf || !enum_t) 647 return false; 648 649 for (i = 0, n = btf_vlen(enum_t); i < n; i++) { 650 e = &btf_enum(enum_t)[i]; 651 652 name = btf_name_by_offset(btf, e->name_off); 653 if (!name || strncasecmp(name, prefix, pfx_len) != 0) 654 continue; 655 656 /* match symbolic name case insensitive and ignoring prefix */ 657 if (strcasecmp(name + pfx_len, str) == 0) { 658 *value = e->val; 659 return true; 660 } 661 } 662 663 return false; 664 } 665 666 static void seq_print_delegate_opts(struct seq_file *m, 667 const char *opt_name, 668 const struct btf *btf, 669 const struct btf_type *enum_t, 670 const char *prefix, 671 u64 delegate_msk, u64 any_msk) 672 { 673 const struct btf_enum *e; 674 bool first = true; 675 const char *name; 676 u64 msk; 677 int i, n, pfx_len = strlen(prefix); 678 679 delegate_msk &= any_msk; /* clear unknown bits */ 680 681 if (delegate_msk == 0) 682 return; 683 684 seq_printf(m, ",%s", opt_name); 685 if (delegate_msk == any_msk) { 686 seq_printf(m, "=any"); 687 return; 688 } 689 690 if (btf && enum_t) { 691 for (i = 0, n = btf_vlen(enum_t); i < n; i++) { 692 e = &btf_enum(enum_t)[i]; 693 name = btf_name_by_offset(btf, e->name_off); 694 if (!name || strncasecmp(name, prefix, pfx_len) != 0) 695 continue; 696 msk = 1ULL << e->val; 697 if (delegate_msk & msk) { 698 /* emit lower-case name without prefix */ 699 seq_putc(m, first ? '=' : ':'); 700 name += pfx_len; 701 while (*name) { 702 seq_putc(m, tolower(*name)); 703 name++; 704 } 705 706 delegate_msk &= ~msk; 707 first = false; 708 } 709 } 710 } 711 if (delegate_msk) 712 seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); 713 } 714 715 /* 716 * Display the mount options in /proc/mounts. 717 */ 718 static int bpf_show_options(struct seq_file *m, struct dentry *root) 719 { 720 struct inode *inode = d_inode(root); 721 umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; 722 struct bpf_mount_opts *opts = root->d_sb->s_fs_info; 723 u64 mask; 724 725 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) 726 seq_printf(m, ",uid=%u", 727 from_kuid_munged(&init_user_ns, inode->i_uid)); 728 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) 729 seq_printf(m, ",gid=%u", 730 from_kgid_munged(&init_user_ns, inode->i_gid)); 731 if (mode != S_IRWXUGO) 732 seq_printf(m, ",mode=%o", mode); 733 734 if (opts->delegate_cmds || opts->delegate_maps || 735 opts->delegate_progs || opts->delegate_attachs) { 736 struct bpffs_btf_enums info; 737 738 /* ignore errors, fallback to hex */ 739 (void)find_bpffs_btf_enums(&info); 740 741 mask = (1ULL << __MAX_BPF_CMD) - 1; 742 seq_print_delegate_opts(m, "delegate_cmds", 743 info.btf, info.cmd_t, "BPF_", 744 opts->delegate_cmds, mask); 745 746 mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; 747 seq_print_delegate_opts(m, "delegate_maps", 748 info.btf, info.map_t, "BPF_MAP_TYPE_", 749 opts->delegate_maps, mask); 750 751 mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; 752 seq_print_delegate_opts(m, "delegate_progs", 753 info.btf, info.prog_t, "BPF_PROG_TYPE_", 754 opts->delegate_progs, mask); 755 756 mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; 757 seq_print_delegate_opts(m, "delegate_attachs", 758 info.btf, info.attach_t, "BPF_", 759 opts->delegate_attachs, mask); 760 } 761 762 return 0; 763 } 764 765 static void bpf_destroy_inode(struct inode *inode) 766 { 767 enum bpf_type type; 768 769 if (S_ISLNK(inode->i_mode)) 770 kfree(inode->i_link); 771 if (!bpf_inode_type(inode, &type)) 772 bpf_any_put(inode->i_private, type); 773 free_inode_nonrcu(inode); 774 } 775 776 const struct super_operations bpf_super_ops = { 777 .statfs = simple_statfs, 778 .drop_inode = inode_just_drop, 779 .show_options = bpf_show_options, 780 .destroy_inode = bpf_destroy_inode, 781 }; 782 783 enum { 784 OPT_UID, 785 OPT_GID, 786 OPT_MODE, 787 OPT_DELEGATE_CMDS, 788 OPT_DELEGATE_MAPS, 789 OPT_DELEGATE_PROGS, 790 OPT_DELEGATE_ATTACHS, 791 }; 792 793 static const struct fs_parameter_spec bpf_fs_parameters[] = { 794 fsparam_u32 ("uid", OPT_UID), 795 fsparam_u32 ("gid", OPT_GID), 796 fsparam_u32oct ("mode", OPT_MODE), 797 fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), 798 fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), 799 fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), 800 fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), 801 {} 802 }; 803 804 static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) 805 { 806 struct bpf_mount_opts *opts = fc->s_fs_info; 807 struct fs_parse_result result; 808 kuid_t uid; 809 kgid_t gid; 810 int opt, err; 811 812 opt = fs_parse(fc, bpf_fs_parameters, param, &result); 813 if (opt < 0) { 814 /* We might like to report bad mount options here, but 815 * traditionally we've ignored all mount options, so we'd 816 * better continue to ignore non-existing options for bpf. 817 */ 818 if (opt == -ENOPARAM) { 819 opt = vfs_parse_fs_param_source(fc, param); 820 if (opt != -ENOPARAM) 821 return opt; 822 823 return 0; 824 } 825 826 if (opt < 0) 827 return opt; 828 } 829 830 switch (opt) { 831 case OPT_UID: 832 uid = make_kuid(current_user_ns(), result.uint_32); 833 if (!uid_valid(uid)) 834 goto bad_value; 835 836 /* 837 * The requested uid must be representable in the 838 * filesystem's idmapping. 839 */ 840 if (!kuid_has_mapping(fc->user_ns, uid)) 841 goto bad_value; 842 843 opts->uid = uid; 844 break; 845 case OPT_GID: 846 gid = make_kgid(current_user_ns(), result.uint_32); 847 if (!gid_valid(gid)) 848 goto bad_value; 849 850 /* 851 * The requested gid must be representable in the 852 * filesystem's idmapping. 853 */ 854 if (!kgid_has_mapping(fc->user_ns, gid)) 855 goto bad_value; 856 857 opts->gid = gid; 858 break; 859 case OPT_MODE: 860 opts->mode = result.uint_32 & S_IALLUGO; 861 break; 862 case OPT_DELEGATE_CMDS: 863 case OPT_DELEGATE_MAPS: 864 case OPT_DELEGATE_PROGS: 865 case OPT_DELEGATE_ATTACHS: { 866 struct bpffs_btf_enums info; 867 const struct btf_type *enum_t; 868 const char *enum_pfx; 869 u64 *delegate_msk, msk = 0; 870 char *p, *str; 871 int val; 872 873 /* ignore errors, fallback to hex */ 874 (void)find_bpffs_btf_enums(&info); 875 876 switch (opt) { 877 case OPT_DELEGATE_CMDS: 878 delegate_msk = &opts->delegate_cmds; 879 enum_t = info.cmd_t; 880 enum_pfx = "BPF_"; 881 break; 882 case OPT_DELEGATE_MAPS: 883 delegate_msk = &opts->delegate_maps; 884 enum_t = info.map_t; 885 enum_pfx = "BPF_MAP_TYPE_"; 886 break; 887 case OPT_DELEGATE_PROGS: 888 delegate_msk = &opts->delegate_progs; 889 enum_t = info.prog_t; 890 enum_pfx = "BPF_PROG_TYPE_"; 891 break; 892 case OPT_DELEGATE_ATTACHS: 893 delegate_msk = &opts->delegate_attachs; 894 enum_t = info.attach_t; 895 enum_pfx = "BPF_"; 896 break; 897 default: 898 return -EINVAL; 899 } 900 901 str = param->string; 902 while ((p = strsep(&str, ":"))) { 903 if (strcmp(p, "any") == 0) { 904 msk |= ~0ULL; 905 } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { 906 msk |= 1ULL << val; 907 } else { 908 err = kstrtou64(p, 0, &msk); 909 if (err) 910 return err; 911 } 912 } 913 914 /* Setting delegation mount options requires privileges */ 915 if (msk && !capable(CAP_SYS_ADMIN)) 916 return -EPERM; 917 918 *delegate_msk |= msk; 919 break; 920 } 921 default: 922 /* ignore unknown mount options */ 923 break; 924 } 925 926 return 0; 927 bad_value: 928 return invalfc(fc, "Bad value for '%s'", param->key); 929 } 930 931 struct bpf_preload_ops *bpf_preload_ops; 932 EXPORT_SYMBOL_GPL(bpf_preload_ops); 933 934 static bool bpf_preload_mod_get(void) 935 { 936 /* If bpf_preload.ko wasn't loaded earlier then load it now. 937 * When bpf_preload is built into vmlinux the module's __init 938 * function will populate it. 939 */ 940 if (!bpf_preload_ops) { 941 request_module("bpf_preload"); 942 if (!bpf_preload_ops) 943 return false; 944 } 945 /* And grab the reference, so the module doesn't disappear while the 946 * kernel is interacting with the kernel module and its UMD. 947 */ 948 if (!try_module_get(bpf_preload_ops->owner)) { 949 pr_err("bpf_preload module get failed.\n"); 950 return false; 951 } 952 return true; 953 } 954 955 static void bpf_preload_mod_put(void) 956 { 957 if (bpf_preload_ops) 958 /* now user can "rmmod bpf_preload" if necessary */ 959 module_put(bpf_preload_ops->owner); 960 } 961 962 static DEFINE_MUTEX(bpf_preload_lock); 963 964 static int populate_bpffs(struct dentry *parent) 965 { 966 struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; 967 int err = 0, i; 968 969 /* grab the mutex to make sure the kernel interactions with bpf_preload 970 * are serialized 971 */ 972 mutex_lock(&bpf_preload_lock); 973 974 /* if bpf_preload.ko wasn't built into vmlinux then load it */ 975 if (!bpf_preload_mod_get()) 976 goto out; 977 978 err = bpf_preload_ops->preload(objs); 979 if (err) 980 goto out_put; 981 for (i = 0; i < BPF_PRELOAD_LINKS; i++) { 982 bpf_link_inc(objs[i].link); 983 err = bpf_iter_link_pin_kernel(parent, 984 objs[i].link_name, objs[i].link); 985 if (err) { 986 bpf_link_put(objs[i].link); 987 goto out_put; 988 } 989 } 990 out_put: 991 bpf_preload_mod_put(); 992 out: 993 mutex_unlock(&bpf_preload_lock); 994 return err; 995 } 996 997 static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) 998 { 999 static const struct tree_descr bpf_rfiles[] = { { "" } }; 1000 struct bpf_mount_opts *opts = sb->s_fs_info; 1001 struct inode *inode; 1002 int ret; 1003 1004 /* Mounting an instance of BPF FS requires privileges */ 1005 if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) 1006 return -EPERM; 1007 1008 ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); 1009 if (ret) 1010 return ret; 1011 1012 sb->s_op = &bpf_super_ops; 1013 1014 inode = sb->s_root->d_inode; 1015 inode->i_uid = opts->uid; 1016 inode->i_gid = opts->gid; 1017 inode->i_op = &bpf_dir_iops; 1018 inode->i_mode &= ~S_IALLUGO; 1019 populate_bpffs(sb->s_root); 1020 inode->i_mode |= S_ISVTX | opts->mode; 1021 return 0; 1022 } 1023 1024 static int bpf_get_tree(struct fs_context *fc) 1025 { 1026 return get_tree_nodev(fc, bpf_fill_super); 1027 } 1028 1029 static void bpf_free_fc(struct fs_context *fc) 1030 { 1031 kfree(fc->s_fs_info); 1032 } 1033 1034 static const struct fs_context_operations bpf_context_ops = { 1035 .free = bpf_free_fc, 1036 .parse_param = bpf_parse_param, 1037 .get_tree = bpf_get_tree, 1038 }; 1039 1040 /* 1041 * Set up the filesystem mount context. 1042 */ 1043 static int bpf_init_fs_context(struct fs_context *fc) 1044 { 1045 struct bpf_mount_opts *opts; 1046 1047 opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); 1048 if (!opts) 1049 return -ENOMEM; 1050 1051 opts->mode = S_IRWXUGO; 1052 opts->uid = current_fsuid(); 1053 opts->gid = current_fsgid(); 1054 1055 /* start out with no BPF token delegation enabled */ 1056 opts->delegate_cmds = 0; 1057 opts->delegate_maps = 0; 1058 opts->delegate_progs = 0; 1059 opts->delegate_attachs = 0; 1060 1061 fc->s_fs_info = opts; 1062 fc->ops = &bpf_context_ops; 1063 return 0; 1064 } 1065 1066 static void bpf_kill_super(struct super_block *sb) 1067 { 1068 struct bpf_mount_opts *opts = sb->s_fs_info; 1069 1070 kill_anon_super(sb); 1071 kfree(opts); 1072 } 1073 1074 static struct file_system_type bpf_fs_type = { 1075 .owner = THIS_MODULE, 1076 .name = "bpf", 1077 .init_fs_context = bpf_init_fs_context, 1078 .parameters = bpf_fs_parameters, 1079 .kill_sb = bpf_kill_super, 1080 .fs_flags = FS_USERNS_MOUNT, 1081 }; 1082 1083 static int __init bpf_init(void) 1084 { 1085 int ret; 1086 1087 ret = sysfs_create_mount_point(fs_kobj, "bpf"); 1088 if (ret) 1089 return ret; 1090 1091 ret = register_filesystem(&bpf_fs_type); 1092 if (ret) 1093 sysfs_remove_mount_point(fs_kobj, "bpf"); 1094 1095 return ret; 1096 } 1097 fs_initcall(bpf_init); 1098