1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Minimal file system backend for holding eBPF maps and programs, 4 * used by bpf(2) object pinning. 5 * 6 * Authors: 7 * 8 * Daniel Borkmann <daniel@iogearbox.net> 9 */ 10 11 #include <linux/init.h> 12 #include <linux/magic.h> 13 #include <linux/major.h> 14 #include <linux/mount.h> 15 #include <linux/namei.h> 16 #include <linux/fs.h> 17 #include <linux/fs_context.h> 18 #include <linux/fs_parser.h> 19 #include <linux/kdev_t.h> 20 #include <linux/filter.h> 21 #include <linux/bpf.h> 22 #include <linux/bpf_trace.h> 23 #include <linux/kstrtox.h> 24 #include "preload/bpf_preload.h" 25 26 enum bpf_type { 27 BPF_TYPE_UNSPEC = 0, 28 BPF_TYPE_PROG, 29 BPF_TYPE_MAP, 30 BPF_TYPE_LINK, 31 }; 32 33 static void *bpf_any_get(void *raw, enum bpf_type type) 34 { 35 switch (type) { 36 case BPF_TYPE_PROG: 37 bpf_prog_inc(raw); 38 break; 39 case BPF_TYPE_MAP: 40 bpf_map_inc_with_uref(raw); 41 break; 42 case BPF_TYPE_LINK: 43 bpf_link_inc(raw); 44 break; 45 default: 46 WARN_ON_ONCE(1); 47 break; 48 } 49 50 return raw; 51 } 52 53 static void bpf_any_put(void *raw, enum bpf_type type) 54 { 55 switch (type) { 56 case BPF_TYPE_PROG: 57 bpf_prog_put(raw); 58 break; 59 case BPF_TYPE_MAP: 60 bpf_map_put_with_uref(raw); 61 break; 62 case BPF_TYPE_LINK: 63 bpf_link_put(raw); 64 break; 65 default: 66 WARN_ON_ONCE(1); 67 break; 68 } 69 } 70 71 static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) 72 { 73 void *raw; 74 75 raw = bpf_map_get_with_uref(ufd); 76 if (!IS_ERR(raw)) { 77 *type = BPF_TYPE_MAP; 78 return raw; 79 } 80 81 raw = bpf_prog_get(ufd); 82 if (!IS_ERR(raw)) { 83 *type = BPF_TYPE_PROG; 84 return raw; 85 } 86 87 raw = bpf_link_get_from_fd(ufd); 88 if (!IS_ERR(raw)) { 89 *type = BPF_TYPE_LINK; 90 return raw; 91 } 92 93 return ERR_PTR(-EINVAL); 94 } 95 96 static const struct inode_operations bpf_dir_iops; 97 98 static const struct inode_operations bpf_prog_iops = { }; 99 static const struct inode_operations bpf_map_iops = { }; 100 static const struct inode_operations bpf_link_iops = { }; 101 102 struct inode *bpf_get_inode(struct super_block *sb, 103 const struct inode *dir, 104 umode_t mode) 105 { 106 struct inode *inode; 107 108 switch (mode & S_IFMT) { 109 case S_IFDIR: 110 case S_IFREG: 111 case S_IFLNK: 112 break; 113 default: 114 return ERR_PTR(-EINVAL); 115 } 116 117 inode = new_inode(sb); 118 if (!inode) 119 return ERR_PTR(-ENOSPC); 120 121 inode->i_ino = get_next_ino(); 122 simple_inode_init_ts(inode); 123 124 inode_init_owner(&nop_mnt_idmap, inode, dir, mode); 125 126 return inode; 127 } 128 129 static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) 130 { 131 *type = BPF_TYPE_UNSPEC; 132 if (inode->i_op == &bpf_prog_iops) 133 *type = BPF_TYPE_PROG; 134 else if (inode->i_op == &bpf_map_iops) 135 *type = BPF_TYPE_MAP; 136 else if (inode->i_op == &bpf_link_iops) 137 *type = BPF_TYPE_LINK; 138 else 139 return -EACCES; 140 141 return 0; 142 } 143 144 static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, 145 struct inode *dir) 146 { 147 d_instantiate(dentry, inode); 148 dget(dentry); 149 150 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 151 } 152 153 static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, 154 struct dentry *dentry, umode_t mode) 155 { 156 struct inode *inode; 157 158 inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); 159 if (IS_ERR(inode)) 160 return PTR_ERR(inode); 161 162 inode->i_op = &bpf_dir_iops; 163 inode->i_fop = &simple_dir_operations; 164 165 inc_nlink(inode); 166 inc_nlink(dir); 167 168 bpf_dentry_finalize(dentry, inode, dir); 169 return 0; 170 } 171 172 struct map_iter { 173 void *key; 174 bool done; 175 }; 176 177 static struct map_iter *map_iter(struct seq_file *m) 178 { 179 return m->private; 180 } 181 182 static struct bpf_map *seq_file_to_map(struct seq_file *m) 183 { 184 return file_inode(m->file)->i_private; 185 } 186 187 static void map_iter_free(struct map_iter *iter) 188 { 189 if (iter) { 190 kfree(iter->key); 191 kfree(iter); 192 } 193 } 194 195 static struct map_iter *map_iter_alloc(struct bpf_map *map) 196 { 197 struct map_iter *iter; 198 199 iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); 200 if (!iter) 201 goto error; 202 203 iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); 204 if (!iter->key) 205 goto error; 206 207 return iter; 208 209 error: 210 map_iter_free(iter); 211 return NULL; 212 } 213 214 static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) 215 { 216 struct bpf_map *map = seq_file_to_map(m); 217 void *key = map_iter(m)->key; 218 void *prev_key; 219 220 (*pos)++; 221 if (map_iter(m)->done) 222 return NULL; 223 224 if (unlikely(v == SEQ_START_TOKEN)) 225 prev_key = NULL; 226 else 227 prev_key = key; 228 229 rcu_read_lock(); 230 if (map->ops->map_get_next_key(map, prev_key, key)) { 231 map_iter(m)->done = true; 232 key = NULL; 233 } 234 rcu_read_unlock(); 235 return key; 236 } 237 238 static void *map_seq_start(struct seq_file *m, loff_t *pos) 239 { 240 if (map_iter(m)->done) 241 return NULL; 242 243 return *pos ? map_iter(m)->key : SEQ_START_TOKEN; 244 } 245 246 static void map_seq_stop(struct seq_file *m, void *v) 247 { 248 } 249 250 static int map_seq_show(struct seq_file *m, void *v) 251 { 252 struct bpf_map *map = seq_file_to_map(m); 253 void *key = map_iter(m)->key; 254 255 if (unlikely(v == SEQ_START_TOKEN)) { 256 seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); 257 seq_puts(m, "# WARNING!! The output format will change\n"); 258 } else { 259 map->ops->map_seq_show_elem(map, key, m); 260 } 261 262 return 0; 263 } 264 265 static const struct seq_operations bpffs_map_seq_ops = { 266 .start = map_seq_start, 267 .next = map_seq_next, 268 .show = map_seq_show, 269 .stop = map_seq_stop, 270 }; 271 272 static int bpffs_map_open(struct inode *inode, struct file *file) 273 { 274 struct bpf_map *map = inode->i_private; 275 struct map_iter *iter; 276 struct seq_file *m; 277 int err; 278 279 iter = map_iter_alloc(map); 280 if (!iter) 281 return -ENOMEM; 282 283 err = seq_open(file, &bpffs_map_seq_ops); 284 if (err) { 285 map_iter_free(iter); 286 return err; 287 } 288 289 m = file->private_data; 290 m->private = iter; 291 292 return 0; 293 } 294 295 static int bpffs_map_release(struct inode *inode, struct file *file) 296 { 297 struct seq_file *m = file->private_data; 298 299 map_iter_free(map_iter(m)); 300 301 return seq_release(inode, file); 302 } 303 304 /* bpffs_map_fops should only implement the basic 305 * read operation for a BPF map. The purpose is to 306 * provide a simple user intuitive way to do 307 * "cat bpffs/pathto/a-pinned-map". 308 * 309 * Other operations (e.g. write, lookup...) should be realized by 310 * the userspace tools (e.g. bpftool) through the 311 * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update 312 * interface. 313 */ 314 static const struct file_operations bpffs_map_fops = { 315 .open = bpffs_map_open, 316 .read = seq_read, 317 .release = bpffs_map_release, 318 }; 319 320 static int bpffs_obj_open(struct inode *inode, struct file *file) 321 { 322 return -EIO; 323 } 324 325 static const struct file_operations bpffs_obj_fops = { 326 .open = bpffs_obj_open, 327 }; 328 329 static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, 330 const struct inode_operations *iops, 331 const struct file_operations *fops) 332 { 333 struct inode *dir = dentry->d_parent->d_inode; 334 struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); 335 if (IS_ERR(inode)) 336 return PTR_ERR(inode); 337 338 inode->i_op = iops; 339 inode->i_fop = fops; 340 inode->i_private = raw; 341 342 bpf_dentry_finalize(dentry, inode, dir); 343 return 0; 344 } 345 346 static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) 347 { 348 return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, 349 &bpffs_obj_fops); 350 } 351 352 static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) 353 { 354 struct bpf_map *map = arg; 355 356 return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, 357 bpf_map_support_seq_show(map) ? 358 &bpffs_map_fops : &bpffs_obj_fops); 359 } 360 361 static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) 362 { 363 struct bpf_link *link = arg; 364 365 return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, 366 bpf_link_is_iter(link) ? 367 &bpf_iter_fops : &bpffs_obj_fops); 368 } 369 370 static struct dentry * 371 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) 372 { 373 /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future 374 * extensions. That allows popoulate_bpffs() create special files. 375 */ 376 if ((dir->i_mode & S_IALLUGO) && 377 strchr(dentry->d_name.name, '.')) 378 return ERR_PTR(-EPERM); 379 380 return simple_lookup(dir, dentry, flags); 381 } 382 383 static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, 384 struct dentry *dentry, const char *target) 385 { 386 char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); 387 struct inode *inode; 388 389 if (!link) 390 return -ENOMEM; 391 392 inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); 393 if (IS_ERR(inode)) { 394 kfree(link); 395 return PTR_ERR(inode); 396 } 397 398 inode->i_op = &simple_symlink_inode_operations; 399 inode->i_link = link; 400 401 bpf_dentry_finalize(dentry, inode, dir); 402 return 0; 403 } 404 405 static const struct inode_operations bpf_dir_iops = { 406 .lookup = bpf_lookup, 407 .mkdir = bpf_mkdir, 408 .symlink = bpf_symlink, 409 .rmdir = simple_rmdir, 410 .rename = simple_rename, 411 .link = simple_link, 412 .unlink = simple_unlink, 413 }; 414 415 /* pin iterator link into bpffs */ 416 static int bpf_iter_link_pin_kernel(struct dentry *parent, 417 const char *name, struct bpf_link *link) 418 { 419 umode_t mode = S_IFREG | S_IRUSR; 420 struct dentry *dentry; 421 int ret; 422 423 inode_lock(parent->d_inode); 424 dentry = lookup_one_len(name, parent, strlen(name)); 425 if (IS_ERR(dentry)) { 426 inode_unlock(parent->d_inode); 427 return PTR_ERR(dentry); 428 } 429 ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, 430 &bpf_iter_fops); 431 dput(dentry); 432 inode_unlock(parent->d_inode); 433 return ret; 434 } 435 436 static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, 437 enum bpf_type type) 438 { 439 struct dentry *dentry; 440 struct inode *dir; 441 struct path path; 442 umode_t mode; 443 int ret; 444 445 dentry = user_path_create(path_fd, pathname, &path, 0); 446 if (IS_ERR(dentry)) 447 return PTR_ERR(dentry); 448 449 dir = d_inode(path.dentry); 450 if (dir->i_op != &bpf_dir_iops) { 451 ret = -EPERM; 452 goto out; 453 } 454 455 mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 456 ret = security_path_mknod(&path, dentry, mode, 0); 457 if (ret) 458 goto out; 459 460 switch (type) { 461 case BPF_TYPE_PROG: 462 ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); 463 break; 464 case BPF_TYPE_MAP: 465 ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); 466 break; 467 case BPF_TYPE_LINK: 468 ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); 469 break; 470 default: 471 ret = -EPERM; 472 } 473 out: 474 done_path_create(&path, dentry); 475 return ret; 476 } 477 478 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) 479 { 480 enum bpf_type type; 481 void *raw; 482 int ret; 483 484 raw = bpf_fd_probe_obj(ufd, &type); 485 if (IS_ERR(raw)) 486 return PTR_ERR(raw); 487 488 ret = bpf_obj_do_pin(path_fd, pathname, raw, type); 489 if (ret != 0) 490 bpf_any_put(raw, type); 491 492 return ret; 493 } 494 495 static void *bpf_obj_do_get(int path_fd, const char __user *pathname, 496 enum bpf_type *type, int flags) 497 { 498 struct inode *inode; 499 struct path path; 500 void *raw; 501 int ret; 502 503 ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); 504 if (ret) 505 return ERR_PTR(ret); 506 507 inode = d_backing_inode(path.dentry); 508 ret = path_permission(&path, ACC_MODE(flags)); 509 if (ret) 510 goto out; 511 512 ret = bpf_inode_type(inode, type); 513 if (ret) 514 goto out; 515 516 raw = bpf_any_get(inode->i_private, *type); 517 if (!IS_ERR(raw)) 518 touch_atime(&path); 519 520 path_put(&path); 521 return raw; 522 out: 523 path_put(&path); 524 return ERR_PTR(ret); 525 } 526 527 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) 528 { 529 enum bpf_type type = BPF_TYPE_UNSPEC; 530 int f_flags; 531 void *raw; 532 int ret; 533 534 f_flags = bpf_get_file_flag(flags); 535 if (f_flags < 0) 536 return f_flags; 537 538 raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); 539 if (IS_ERR(raw)) 540 return PTR_ERR(raw); 541 542 if (type == BPF_TYPE_PROG) 543 ret = bpf_prog_new_fd(raw); 544 else if (type == BPF_TYPE_MAP) 545 ret = bpf_map_new_fd(raw, f_flags); 546 else if (type == BPF_TYPE_LINK) 547 ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); 548 else 549 return -ENOENT; 550 551 if (ret < 0) 552 bpf_any_put(raw, type); 553 return ret; 554 } 555 556 static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) 557 { 558 struct bpf_prog *prog; 559 int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); 560 if (ret) 561 return ERR_PTR(ret); 562 563 if (inode->i_op == &bpf_map_iops) 564 return ERR_PTR(-EINVAL); 565 if (inode->i_op == &bpf_link_iops) 566 return ERR_PTR(-EINVAL); 567 if (inode->i_op != &bpf_prog_iops) 568 return ERR_PTR(-EACCES); 569 570 prog = inode->i_private; 571 572 ret = security_bpf_prog(prog); 573 if (ret < 0) 574 return ERR_PTR(ret); 575 576 if (!bpf_prog_get_ok(prog, &type, false)) 577 return ERR_PTR(-EINVAL); 578 579 bpf_prog_inc(prog); 580 return prog; 581 } 582 583 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) 584 { 585 struct bpf_prog *prog; 586 struct path path; 587 int ret = kern_path(name, LOOKUP_FOLLOW, &path); 588 if (ret) 589 return ERR_PTR(ret); 590 prog = __get_prog_inode(d_backing_inode(path.dentry), type); 591 if (!IS_ERR(prog)) 592 touch_atime(&path); 593 path_put(&path); 594 return prog; 595 } 596 EXPORT_SYMBOL(bpf_prog_get_type_path); 597 598 struct bpffs_btf_enums { 599 const struct btf *btf; 600 const struct btf_type *cmd_t; 601 const struct btf_type *map_t; 602 const struct btf_type *prog_t; 603 const struct btf_type *attach_t; 604 }; 605 606 static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) 607 { 608 const struct btf *btf; 609 const struct btf_type *t; 610 const char *name; 611 int i, n; 612 613 memset(info, 0, sizeof(*info)); 614 615 btf = bpf_get_btf_vmlinux(); 616 if (IS_ERR(btf)) 617 return PTR_ERR(btf); 618 if (!btf) 619 return -ENOENT; 620 621 info->btf = btf; 622 623 for (i = 1, n = btf_nr_types(btf); i < n; i++) { 624 t = btf_type_by_id(btf, i); 625 if (!btf_type_is_enum(t)) 626 continue; 627 628 name = btf_name_by_offset(btf, t->name_off); 629 if (!name) 630 continue; 631 632 if (strcmp(name, "bpf_cmd") == 0) 633 info->cmd_t = t; 634 else if (strcmp(name, "bpf_map_type") == 0) 635 info->map_t = t; 636 else if (strcmp(name, "bpf_prog_type") == 0) 637 info->prog_t = t; 638 else if (strcmp(name, "bpf_attach_type") == 0) 639 info->attach_t = t; 640 else 641 continue; 642 643 if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) 644 return 0; 645 } 646 647 return -ESRCH; 648 } 649 650 static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, 651 const char *prefix, const char *str, int *value) 652 { 653 const struct btf_enum *e; 654 const char *name; 655 int i, n, pfx_len = strlen(prefix); 656 657 *value = 0; 658 659 if (!btf || !enum_t) 660 return false; 661 662 for (i = 0, n = btf_vlen(enum_t); i < n; i++) { 663 e = &btf_enum(enum_t)[i]; 664 665 name = btf_name_by_offset(btf, e->name_off); 666 if (!name || strncasecmp(name, prefix, pfx_len) != 0) 667 continue; 668 669 /* match symbolic name case insensitive and ignoring prefix */ 670 if (strcasecmp(name + pfx_len, str) == 0) { 671 *value = e->val; 672 return true; 673 } 674 } 675 676 return false; 677 } 678 679 static void seq_print_delegate_opts(struct seq_file *m, 680 const char *opt_name, 681 const struct btf *btf, 682 const struct btf_type *enum_t, 683 const char *prefix, 684 u64 delegate_msk, u64 any_msk) 685 { 686 const struct btf_enum *e; 687 bool first = true; 688 const char *name; 689 u64 msk; 690 int i, n, pfx_len = strlen(prefix); 691 692 delegate_msk &= any_msk; /* clear unknown bits */ 693 694 if (delegate_msk == 0) 695 return; 696 697 seq_printf(m, ",%s", opt_name); 698 if (delegate_msk == any_msk) { 699 seq_printf(m, "=any"); 700 return; 701 } 702 703 if (btf && enum_t) { 704 for (i = 0, n = btf_vlen(enum_t); i < n; i++) { 705 e = &btf_enum(enum_t)[i]; 706 name = btf_name_by_offset(btf, e->name_off); 707 if (!name || strncasecmp(name, prefix, pfx_len) != 0) 708 continue; 709 msk = 1ULL << e->val; 710 if (delegate_msk & msk) { 711 /* emit lower-case name without prefix */ 712 seq_putc(m, first ? '=' : ':'); 713 name += pfx_len; 714 while (*name) { 715 seq_putc(m, tolower(*name)); 716 name++; 717 } 718 719 delegate_msk &= ~msk; 720 first = false; 721 } 722 } 723 } 724 if (delegate_msk) 725 seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); 726 } 727 728 /* 729 * Display the mount options in /proc/mounts. 730 */ 731 static int bpf_show_options(struct seq_file *m, struct dentry *root) 732 { 733 struct inode *inode = d_inode(root); 734 umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; 735 struct bpf_mount_opts *opts = root->d_sb->s_fs_info; 736 u64 mask; 737 738 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) 739 seq_printf(m, ",uid=%u", 740 from_kuid_munged(&init_user_ns, inode->i_uid)); 741 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) 742 seq_printf(m, ",gid=%u", 743 from_kgid_munged(&init_user_ns, inode->i_gid)); 744 if (mode != S_IRWXUGO) 745 seq_printf(m, ",mode=%o", mode); 746 747 if (opts->delegate_cmds || opts->delegate_maps || 748 opts->delegate_progs || opts->delegate_attachs) { 749 struct bpffs_btf_enums info; 750 751 /* ignore errors, fallback to hex */ 752 (void)find_bpffs_btf_enums(&info); 753 754 mask = (1ULL << __MAX_BPF_CMD) - 1; 755 seq_print_delegate_opts(m, "delegate_cmds", 756 info.btf, info.cmd_t, "BPF_", 757 opts->delegate_cmds, mask); 758 759 mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; 760 seq_print_delegate_opts(m, "delegate_maps", 761 info.btf, info.map_t, "BPF_MAP_TYPE_", 762 opts->delegate_maps, mask); 763 764 mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; 765 seq_print_delegate_opts(m, "delegate_progs", 766 info.btf, info.prog_t, "BPF_PROG_TYPE_", 767 opts->delegate_progs, mask); 768 769 mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; 770 seq_print_delegate_opts(m, "delegate_attachs", 771 info.btf, info.attach_t, "BPF_", 772 opts->delegate_attachs, mask); 773 } 774 775 return 0; 776 } 777 778 static void bpf_free_inode(struct inode *inode) 779 { 780 enum bpf_type type; 781 782 if (S_ISLNK(inode->i_mode)) 783 kfree(inode->i_link); 784 if (!bpf_inode_type(inode, &type)) 785 bpf_any_put(inode->i_private, type); 786 free_inode_nonrcu(inode); 787 } 788 789 const struct super_operations bpf_super_ops = { 790 .statfs = simple_statfs, 791 .drop_inode = generic_delete_inode, 792 .show_options = bpf_show_options, 793 .free_inode = bpf_free_inode, 794 }; 795 796 enum { 797 OPT_UID, 798 OPT_GID, 799 OPT_MODE, 800 OPT_DELEGATE_CMDS, 801 OPT_DELEGATE_MAPS, 802 OPT_DELEGATE_PROGS, 803 OPT_DELEGATE_ATTACHS, 804 }; 805 806 static const struct fs_parameter_spec bpf_fs_parameters[] = { 807 fsparam_u32 ("uid", OPT_UID), 808 fsparam_u32 ("gid", OPT_GID), 809 fsparam_u32oct ("mode", OPT_MODE), 810 fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), 811 fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), 812 fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), 813 fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), 814 {} 815 }; 816 817 static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) 818 { 819 struct bpf_mount_opts *opts = fc->s_fs_info; 820 struct fs_parse_result result; 821 kuid_t uid; 822 kgid_t gid; 823 int opt, err; 824 825 opt = fs_parse(fc, bpf_fs_parameters, param, &result); 826 if (opt < 0) { 827 /* We might like to report bad mount options here, but 828 * traditionally we've ignored all mount options, so we'd 829 * better continue to ignore non-existing options for bpf. 830 */ 831 if (opt == -ENOPARAM) { 832 opt = vfs_parse_fs_param_source(fc, param); 833 if (opt != -ENOPARAM) 834 return opt; 835 836 return 0; 837 } 838 839 if (opt < 0) 840 return opt; 841 } 842 843 switch (opt) { 844 case OPT_UID: 845 uid = make_kuid(current_user_ns(), result.uint_32); 846 if (!uid_valid(uid)) 847 goto bad_value; 848 849 /* 850 * The requested uid must be representable in the 851 * filesystem's idmapping. 852 */ 853 if (!kuid_has_mapping(fc->user_ns, uid)) 854 goto bad_value; 855 856 opts->uid = uid; 857 break; 858 case OPT_GID: 859 gid = make_kgid(current_user_ns(), result.uint_32); 860 if (!gid_valid(gid)) 861 goto bad_value; 862 863 /* 864 * The requested gid must be representable in the 865 * filesystem's idmapping. 866 */ 867 if (!kgid_has_mapping(fc->user_ns, gid)) 868 goto bad_value; 869 870 opts->gid = gid; 871 break; 872 case OPT_MODE: 873 opts->mode = result.uint_32 & S_IALLUGO; 874 break; 875 case OPT_DELEGATE_CMDS: 876 case OPT_DELEGATE_MAPS: 877 case OPT_DELEGATE_PROGS: 878 case OPT_DELEGATE_ATTACHS: { 879 struct bpffs_btf_enums info; 880 const struct btf_type *enum_t; 881 const char *enum_pfx; 882 u64 *delegate_msk, msk = 0; 883 char *p, *str; 884 int val; 885 886 /* ignore errors, fallback to hex */ 887 (void)find_bpffs_btf_enums(&info); 888 889 switch (opt) { 890 case OPT_DELEGATE_CMDS: 891 delegate_msk = &opts->delegate_cmds; 892 enum_t = info.cmd_t; 893 enum_pfx = "BPF_"; 894 break; 895 case OPT_DELEGATE_MAPS: 896 delegate_msk = &opts->delegate_maps; 897 enum_t = info.map_t; 898 enum_pfx = "BPF_MAP_TYPE_"; 899 break; 900 case OPT_DELEGATE_PROGS: 901 delegate_msk = &opts->delegate_progs; 902 enum_t = info.prog_t; 903 enum_pfx = "BPF_PROG_TYPE_"; 904 break; 905 case OPT_DELEGATE_ATTACHS: 906 delegate_msk = &opts->delegate_attachs; 907 enum_t = info.attach_t; 908 enum_pfx = "BPF_"; 909 break; 910 default: 911 return -EINVAL; 912 } 913 914 str = param->string; 915 while ((p = strsep(&str, ":"))) { 916 if (strcmp(p, "any") == 0) { 917 msk |= ~0ULL; 918 } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { 919 msk |= 1ULL << val; 920 } else { 921 err = kstrtou64(p, 0, &msk); 922 if (err) 923 return err; 924 } 925 } 926 927 /* Setting delegation mount options requires privileges */ 928 if (msk && !capable(CAP_SYS_ADMIN)) 929 return -EPERM; 930 931 *delegate_msk |= msk; 932 break; 933 } 934 default: 935 /* ignore unknown mount options */ 936 break; 937 } 938 939 return 0; 940 bad_value: 941 return invalfc(fc, "Bad value for '%s'", param->key); 942 } 943 944 struct bpf_preload_ops *bpf_preload_ops; 945 EXPORT_SYMBOL_GPL(bpf_preload_ops); 946 947 static bool bpf_preload_mod_get(void) 948 { 949 /* If bpf_preload.ko wasn't loaded earlier then load it now. 950 * When bpf_preload is built into vmlinux the module's __init 951 * function will populate it. 952 */ 953 if (!bpf_preload_ops) { 954 request_module("bpf_preload"); 955 if (!bpf_preload_ops) 956 return false; 957 } 958 /* And grab the reference, so the module doesn't disappear while the 959 * kernel is interacting with the kernel module and its UMD. 960 */ 961 if (!try_module_get(bpf_preload_ops->owner)) { 962 pr_err("bpf_preload module get failed.\n"); 963 return false; 964 } 965 return true; 966 } 967 968 static void bpf_preload_mod_put(void) 969 { 970 if (bpf_preload_ops) 971 /* now user can "rmmod bpf_preload" if necessary */ 972 module_put(bpf_preload_ops->owner); 973 } 974 975 static DEFINE_MUTEX(bpf_preload_lock); 976 977 static int populate_bpffs(struct dentry *parent) 978 { 979 struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; 980 int err = 0, i; 981 982 /* grab the mutex to make sure the kernel interactions with bpf_preload 983 * are serialized 984 */ 985 mutex_lock(&bpf_preload_lock); 986 987 /* if bpf_preload.ko wasn't built into vmlinux then load it */ 988 if (!bpf_preload_mod_get()) 989 goto out; 990 991 err = bpf_preload_ops->preload(objs); 992 if (err) 993 goto out_put; 994 for (i = 0; i < BPF_PRELOAD_LINKS; i++) { 995 bpf_link_inc(objs[i].link); 996 err = bpf_iter_link_pin_kernel(parent, 997 objs[i].link_name, objs[i].link); 998 if (err) { 999 bpf_link_put(objs[i].link); 1000 goto out_put; 1001 } 1002 } 1003 out_put: 1004 bpf_preload_mod_put(); 1005 out: 1006 mutex_unlock(&bpf_preload_lock); 1007 return err; 1008 } 1009 1010 static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) 1011 { 1012 static const struct tree_descr bpf_rfiles[] = { { "" } }; 1013 struct bpf_mount_opts *opts = sb->s_fs_info; 1014 struct inode *inode; 1015 int ret; 1016 1017 /* Mounting an instance of BPF FS requires privileges */ 1018 if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) 1019 return -EPERM; 1020 1021 ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); 1022 if (ret) 1023 return ret; 1024 1025 sb->s_op = &bpf_super_ops; 1026 1027 inode = sb->s_root->d_inode; 1028 inode->i_uid = opts->uid; 1029 inode->i_gid = opts->gid; 1030 inode->i_op = &bpf_dir_iops; 1031 inode->i_mode &= ~S_IALLUGO; 1032 populate_bpffs(sb->s_root); 1033 inode->i_mode |= S_ISVTX | opts->mode; 1034 return 0; 1035 } 1036 1037 static int bpf_get_tree(struct fs_context *fc) 1038 { 1039 return get_tree_nodev(fc, bpf_fill_super); 1040 } 1041 1042 static void bpf_free_fc(struct fs_context *fc) 1043 { 1044 kfree(fc->s_fs_info); 1045 } 1046 1047 static const struct fs_context_operations bpf_context_ops = { 1048 .free = bpf_free_fc, 1049 .parse_param = bpf_parse_param, 1050 .get_tree = bpf_get_tree, 1051 }; 1052 1053 /* 1054 * Set up the filesystem mount context. 1055 */ 1056 static int bpf_init_fs_context(struct fs_context *fc) 1057 { 1058 struct bpf_mount_opts *opts; 1059 1060 opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); 1061 if (!opts) 1062 return -ENOMEM; 1063 1064 opts->mode = S_IRWXUGO; 1065 opts->uid = current_fsuid(); 1066 opts->gid = current_fsgid(); 1067 1068 /* start out with no BPF token delegation enabled */ 1069 opts->delegate_cmds = 0; 1070 opts->delegate_maps = 0; 1071 opts->delegate_progs = 0; 1072 opts->delegate_attachs = 0; 1073 1074 fc->s_fs_info = opts; 1075 fc->ops = &bpf_context_ops; 1076 return 0; 1077 } 1078 1079 static void bpf_kill_super(struct super_block *sb) 1080 { 1081 struct bpf_mount_opts *opts = sb->s_fs_info; 1082 1083 kill_litter_super(sb); 1084 kfree(opts); 1085 } 1086 1087 static struct file_system_type bpf_fs_type = { 1088 .owner = THIS_MODULE, 1089 .name = "bpf", 1090 .init_fs_context = bpf_init_fs_context, 1091 .parameters = bpf_fs_parameters, 1092 .kill_sb = bpf_kill_super, 1093 .fs_flags = FS_USERNS_MOUNT, 1094 }; 1095 1096 static int __init bpf_init(void) 1097 { 1098 int ret; 1099 1100 ret = sysfs_create_mount_point(fs_kobj, "bpf"); 1101 if (ret) 1102 return ret; 1103 1104 ret = register_filesystem(&bpf_fs_type); 1105 if (ret) 1106 sysfs_remove_mount_point(fs_kobj, "bpf"); 1107 1108 return ret; 1109 } 1110 fs_initcall(bpf_init); 1111