1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Minimal file system backend for holding eBPF maps and programs, 4 * used by bpf(2) object pinning. 5 * 6 * Authors: 7 * 8 * Daniel Borkmann <daniel@iogearbox.net> 9 */ 10 11 #include <linux/init.h> 12 #include <linux/magic.h> 13 #include <linux/major.h> 14 #include <linux/mount.h> 15 #include <linux/namei.h> 16 #include <linux/fs.h> 17 #include <linux/fs_context.h> 18 #include <linux/fs_parser.h> 19 #include <linux/kdev_t.h> 20 #include <linux/filter.h> 21 #include <linux/bpf.h> 22 #include <linux/bpf_trace.h> 23 #include <linux/kstrtox.h> 24 #include "preload/bpf_preload.h" 25 26 enum bpf_type { 27 BPF_TYPE_UNSPEC = 0, 28 BPF_TYPE_PROG, 29 BPF_TYPE_MAP, 30 BPF_TYPE_LINK, 31 }; 32 33 static void *bpf_any_get(void *raw, enum bpf_type type) 34 { 35 switch (type) { 36 case BPF_TYPE_PROG: 37 bpf_prog_inc(raw); 38 break; 39 case BPF_TYPE_MAP: 40 bpf_map_inc_with_uref(raw); 41 break; 42 case BPF_TYPE_LINK: 43 bpf_link_inc(raw); 44 break; 45 default: 46 WARN_ON_ONCE(1); 47 break; 48 } 49 50 return raw; 51 } 52 53 static void bpf_any_put(void *raw, enum bpf_type type) 54 { 55 switch (type) { 56 case BPF_TYPE_PROG: 57 bpf_prog_put(raw); 58 break; 59 case BPF_TYPE_MAP: 60 bpf_map_put_with_uref(raw); 61 break; 62 case BPF_TYPE_LINK: 63 bpf_link_put(raw); 64 break; 65 default: 66 WARN_ON_ONCE(1); 67 break; 68 } 69 } 70 71 static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) 72 { 73 void *raw; 74 75 raw = bpf_map_get_with_uref(ufd); 76 if (!IS_ERR(raw)) { 77 *type = BPF_TYPE_MAP; 78 return raw; 79 } 80 81 raw = bpf_prog_get(ufd); 82 if (!IS_ERR(raw)) { 83 *type = BPF_TYPE_PROG; 84 return raw; 85 } 86 87 raw = bpf_link_get_from_fd(ufd); 88 if (!IS_ERR(raw)) { 89 *type = BPF_TYPE_LINK; 90 return raw; 91 } 92 93 return ERR_PTR(-EINVAL); 94 } 95 96 static const struct inode_operations bpf_dir_iops; 97 98 static const struct inode_operations bpf_prog_iops = { }; 99 static const struct inode_operations bpf_map_iops = { }; 100 static const struct inode_operations bpf_link_iops = { }; 101 102 struct inode *bpf_get_inode(struct super_block *sb, 103 const struct inode *dir, 104 umode_t mode) 105 { 106 struct inode *inode; 107 108 switch (mode & S_IFMT) { 109 case S_IFDIR: 110 case S_IFREG: 111 case S_IFLNK: 112 break; 113 default: 114 return ERR_PTR(-EINVAL); 115 } 116 117 inode = new_inode(sb); 118 if (!inode) 119 return ERR_PTR(-ENOSPC); 120 121 inode->i_ino = get_next_ino(); 122 simple_inode_init_ts(inode); 123 124 inode_init_owner(&nop_mnt_idmap, inode, dir, mode); 125 126 return inode; 127 } 128 129 static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) 130 { 131 *type = BPF_TYPE_UNSPEC; 132 if (inode->i_op == &bpf_prog_iops) 133 *type = BPF_TYPE_PROG; 134 else if (inode->i_op == &bpf_map_iops) 135 *type = BPF_TYPE_MAP; 136 else if (inode->i_op == &bpf_link_iops) 137 *type = BPF_TYPE_LINK; 138 else 139 return -EACCES; 140 141 return 0; 142 } 143 144 static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, 145 struct inode *dir) 146 { 147 d_make_persistent(dentry, inode); 148 149 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 150 } 151 152 static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, 153 struct dentry *dentry, umode_t mode) 154 { 155 struct inode *inode; 156 157 inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); 158 if (IS_ERR(inode)) 159 return ERR_CAST(inode); 160 161 inode->i_op = &bpf_dir_iops; 162 inode->i_fop = &simple_dir_operations; 163 164 inc_nlink(inode); 165 inc_nlink(dir); 166 167 bpf_dentry_finalize(dentry, inode, dir); 168 return NULL; 169 } 170 171 struct map_iter { 172 void *key; 173 bool done; 174 }; 175 176 static struct map_iter *map_iter(struct seq_file *m) 177 { 178 return m->private; 179 } 180 181 static struct bpf_map *seq_file_to_map(struct seq_file *m) 182 { 183 return file_inode(m->file)->i_private; 184 } 185 186 static void map_iter_free(struct map_iter *iter) 187 { 188 if (iter) { 189 kfree(iter->key); 190 kfree(iter); 191 } 192 } 193 194 static struct map_iter *map_iter_alloc(struct bpf_map *map) 195 { 196 struct map_iter *iter; 197 198 iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); 199 if (!iter) 200 goto error; 201 202 iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); 203 if (!iter->key) 204 goto error; 205 206 return iter; 207 208 error: 209 map_iter_free(iter); 210 return NULL; 211 } 212 213 static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) 214 { 215 struct bpf_map *map = seq_file_to_map(m); 216 void *key = map_iter(m)->key; 217 void *prev_key; 218 219 (*pos)++; 220 if (map_iter(m)->done) 221 return NULL; 222 223 if (unlikely(v == SEQ_START_TOKEN)) 224 prev_key = NULL; 225 else 226 prev_key = key; 227 228 rcu_read_lock(); 229 if (map->ops->map_get_next_key(map, prev_key, key)) { 230 map_iter(m)->done = true; 231 key = NULL; 232 } 233 rcu_read_unlock(); 234 return key; 235 } 236 237 static void *map_seq_start(struct seq_file *m, loff_t *pos) 238 { 239 if (map_iter(m)->done) 240 return NULL; 241 242 return *pos ? map_iter(m)->key : SEQ_START_TOKEN; 243 } 244 245 static void map_seq_stop(struct seq_file *m, void *v) 246 { 247 } 248 249 static int map_seq_show(struct seq_file *m, void *v) 250 { 251 struct bpf_map *map = seq_file_to_map(m); 252 void *key = map_iter(m)->key; 253 254 if (unlikely(v == SEQ_START_TOKEN)) { 255 seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); 256 seq_puts(m, "# WARNING!! The output format will change\n"); 257 } else { 258 map->ops->map_seq_show_elem(map, key, m); 259 } 260 261 return 0; 262 } 263 264 static const struct seq_operations bpffs_map_seq_ops = { 265 .start = map_seq_start, 266 .next = map_seq_next, 267 .show = map_seq_show, 268 .stop = map_seq_stop, 269 }; 270 271 static int bpffs_map_open(struct inode *inode, struct file *file) 272 { 273 struct bpf_map *map = inode->i_private; 274 struct map_iter *iter; 275 struct seq_file *m; 276 int err; 277 278 iter = map_iter_alloc(map); 279 if (!iter) 280 return -ENOMEM; 281 282 err = seq_open(file, &bpffs_map_seq_ops); 283 if (err) { 284 map_iter_free(iter); 285 return err; 286 } 287 288 m = file->private_data; 289 m->private = iter; 290 291 return 0; 292 } 293 294 static int bpffs_map_release(struct inode *inode, struct file *file) 295 { 296 struct seq_file *m = file->private_data; 297 298 map_iter_free(map_iter(m)); 299 300 return seq_release(inode, file); 301 } 302 303 /* bpffs_map_fops should only implement the basic 304 * read operation for a BPF map. The purpose is to 305 * provide a simple user intuitive way to do 306 * "cat bpffs/pathto/a-pinned-map". 307 * 308 * Other operations (e.g. write, lookup...) should be realized by 309 * the userspace tools (e.g. bpftool) through the 310 * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update 311 * interface. 312 */ 313 static const struct file_operations bpffs_map_fops = { 314 .open = bpffs_map_open, 315 .read = seq_read, 316 .release = bpffs_map_release, 317 }; 318 319 static int bpffs_obj_open(struct inode *inode, struct file *file) 320 { 321 return -EIO; 322 } 323 324 static const struct file_operations bpffs_obj_fops = { 325 .open = bpffs_obj_open, 326 }; 327 328 static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, 329 const struct inode_operations *iops, 330 const struct file_operations *fops) 331 { 332 struct inode *dir = dentry->d_parent->d_inode; 333 struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); 334 if (IS_ERR(inode)) 335 return PTR_ERR(inode); 336 337 inode->i_op = iops; 338 inode->i_fop = fops; 339 inode->i_private = raw; 340 341 bpf_dentry_finalize(dentry, inode, dir); 342 return 0; 343 } 344 345 static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) 346 { 347 return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, 348 &bpffs_obj_fops); 349 } 350 351 static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) 352 { 353 struct bpf_map *map = arg; 354 355 return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, 356 bpf_map_support_seq_show(map) ? 357 &bpffs_map_fops : &bpffs_obj_fops); 358 } 359 360 static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) 361 { 362 struct bpf_link *link = arg; 363 364 return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, 365 bpf_link_is_iter(link) ? 366 &bpf_iter_fops : &bpffs_obj_fops); 367 } 368 369 static struct dentry * 370 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) 371 { 372 /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future 373 * extensions. That allows popoulate_bpffs() create special files. 374 */ 375 if ((dir->i_mode & S_IALLUGO) && 376 strchr(dentry->d_name.name, '.')) 377 return ERR_PTR(-EPERM); 378 379 return simple_lookup(dir, dentry, flags); 380 } 381 382 static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, 383 struct dentry *dentry, const char *target) 384 { 385 char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); 386 struct inode *inode; 387 388 if (!link) 389 return -ENOMEM; 390 391 inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); 392 if (IS_ERR(inode)) { 393 kfree(link); 394 return PTR_ERR(inode); 395 } 396 397 inode->i_op = &simple_symlink_inode_operations; 398 inode->i_link = link; 399 400 bpf_dentry_finalize(dentry, inode, dir); 401 return 0; 402 } 403 404 static const struct inode_operations bpf_dir_iops = { 405 .lookup = bpf_lookup, 406 .mkdir = bpf_mkdir, 407 .symlink = bpf_symlink, 408 .rmdir = simple_rmdir, 409 .rename = simple_rename, 410 .link = simple_link, 411 .unlink = simple_unlink, 412 }; 413 414 /* pin iterator link into bpffs */ 415 static int bpf_iter_link_pin_kernel(struct dentry *parent, 416 const char *name, struct bpf_link *link) 417 { 418 umode_t mode = S_IFREG | S_IRUSR; 419 struct dentry *dentry; 420 int ret; 421 422 dentry = simple_start_creating(parent, name); 423 if (IS_ERR(dentry)) 424 return PTR_ERR(dentry); 425 ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, 426 &bpf_iter_fops); 427 simple_done_creating(dentry); 428 return ret; 429 } 430 431 static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, 432 enum bpf_type type) 433 { 434 struct dentry *dentry; 435 struct inode *dir; 436 struct path path; 437 umode_t mode; 438 int ret; 439 440 dentry = start_creating_user_path(path_fd, pathname, &path, 0); 441 if (IS_ERR(dentry)) 442 return PTR_ERR(dentry); 443 444 dir = d_inode(path.dentry); 445 if (dir->i_op != &bpf_dir_iops) { 446 ret = -EPERM; 447 goto out; 448 } 449 450 mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 451 ret = security_path_mknod(&path, dentry, mode, 0); 452 if (ret) 453 goto out; 454 455 switch (type) { 456 case BPF_TYPE_PROG: 457 ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); 458 break; 459 case BPF_TYPE_MAP: 460 ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); 461 break; 462 case BPF_TYPE_LINK: 463 ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); 464 break; 465 default: 466 ret = -EPERM; 467 } 468 out: 469 end_creating_path(&path, dentry); 470 return ret; 471 } 472 473 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) 474 { 475 enum bpf_type type; 476 void *raw; 477 int ret; 478 479 raw = bpf_fd_probe_obj(ufd, &type); 480 if (IS_ERR(raw)) 481 return PTR_ERR(raw); 482 483 ret = bpf_obj_do_pin(path_fd, pathname, raw, type); 484 if (ret != 0) 485 bpf_any_put(raw, type); 486 487 return ret; 488 } 489 490 static void *bpf_obj_do_get(int path_fd, const char __user *pathname, 491 enum bpf_type *type, int flags) 492 { 493 struct inode *inode; 494 struct path path; 495 void *raw; 496 int ret; 497 498 ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); 499 if (ret) 500 return ERR_PTR(ret); 501 502 inode = d_backing_inode(path.dentry); 503 ret = path_permission(&path, ACC_MODE(flags)); 504 if (ret) 505 goto out; 506 507 ret = bpf_inode_type(inode, type); 508 if (ret) 509 goto out; 510 511 raw = bpf_any_get(inode->i_private, *type); 512 if (!IS_ERR(raw)) 513 touch_atime(&path); 514 515 path_put(&path); 516 return raw; 517 out: 518 path_put(&path); 519 return ERR_PTR(ret); 520 } 521 522 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) 523 { 524 enum bpf_type type = BPF_TYPE_UNSPEC; 525 int f_flags; 526 void *raw; 527 int ret; 528 529 f_flags = bpf_get_file_flag(flags); 530 if (f_flags < 0) 531 return f_flags; 532 533 raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); 534 if (IS_ERR(raw)) 535 return PTR_ERR(raw); 536 537 if (type == BPF_TYPE_PROG) 538 ret = bpf_prog_new_fd(raw); 539 else if (type == BPF_TYPE_MAP) 540 ret = bpf_map_new_fd(raw, f_flags); 541 else if (type == BPF_TYPE_LINK) 542 ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); 543 else 544 return -ENOENT; 545 546 if (ret < 0) 547 bpf_any_put(raw, type); 548 return ret; 549 } 550 551 static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) 552 { 553 struct bpf_prog *prog; 554 int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); 555 if (ret) 556 return ERR_PTR(ret); 557 558 if (inode->i_op == &bpf_map_iops) 559 return ERR_PTR(-EINVAL); 560 if (inode->i_op == &bpf_link_iops) 561 return ERR_PTR(-EINVAL); 562 if (inode->i_op != &bpf_prog_iops) 563 return ERR_PTR(-EACCES); 564 565 prog = inode->i_private; 566 567 ret = security_bpf_prog(prog); 568 if (ret < 0) 569 return ERR_PTR(ret); 570 571 if (!bpf_prog_get_ok(prog, &type, false)) 572 return ERR_PTR(-EINVAL); 573 574 bpf_prog_inc(prog); 575 return prog; 576 } 577 578 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) 579 { 580 struct bpf_prog *prog; 581 struct path path; 582 int ret = kern_path(name, LOOKUP_FOLLOW, &path); 583 if (ret) 584 return ERR_PTR(ret); 585 prog = __get_prog_inode(d_backing_inode(path.dentry), type); 586 if (!IS_ERR(prog)) 587 touch_atime(&path); 588 path_put(&path); 589 return prog; 590 } 591 EXPORT_SYMBOL(bpf_prog_get_type_path); 592 593 struct bpffs_btf_enums { 594 const struct btf *btf; 595 const struct btf_type *cmd_t; 596 const struct btf_type *map_t; 597 const struct btf_type *prog_t; 598 const struct btf_type *attach_t; 599 }; 600 601 static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) 602 { 603 const struct btf *btf; 604 const struct btf_type *t; 605 const char *name; 606 int i, n; 607 608 memset(info, 0, sizeof(*info)); 609 610 btf = bpf_get_btf_vmlinux(); 611 if (IS_ERR(btf)) 612 return PTR_ERR(btf); 613 if (!btf) 614 return -ENOENT; 615 616 info->btf = btf; 617 618 for (i = 1, n = btf_nr_types(btf); i < n; i++) { 619 t = btf_type_by_id(btf, i); 620 if (!btf_type_is_enum(t)) 621 continue; 622 623 name = btf_name_by_offset(btf, t->name_off); 624 if (!name) 625 continue; 626 627 if (strcmp(name, "bpf_cmd") == 0) 628 info->cmd_t = t; 629 else if (strcmp(name, "bpf_map_type") == 0) 630 info->map_t = t; 631 else if (strcmp(name, "bpf_prog_type") == 0) 632 info->prog_t = t; 633 else if (strcmp(name, "bpf_attach_type") == 0) 634 info->attach_t = t; 635 else 636 continue; 637 638 if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) 639 return 0; 640 } 641 642 return -ESRCH; 643 } 644 645 static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, 646 const char *prefix, const char *str, int *value) 647 { 648 const struct btf_enum *e; 649 const char *name; 650 int i, n, pfx_len = strlen(prefix); 651 652 *value = 0; 653 654 if (!btf || !enum_t) 655 return false; 656 657 for (i = 0, n = btf_vlen(enum_t); i < n; i++) { 658 e = &btf_enum(enum_t)[i]; 659 660 name = btf_name_by_offset(btf, e->name_off); 661 if (!name || strncasecmp(name, prefix, pfx_len) != 0) 662 continue; 663 664 /* match symbolic name case insensitive and ignoring prefix */ 665 if (strcasecmp(name + pfx_len, str) == 0) { 666 *value = e->val; 667 return true; 668 } 669 } 670 671 return false; 672 } 673 674 static void seq_print_delegate_opts(struct seq_file *m, 675 const char *opt_name, 676 const struct btf *btf, 677 const struct btf_type *enum_t, 678 const char *prefix, 679 u64 delegate_msk, u64 any_msk) 680 { 681 const struct btf_enum *e; 682 bool first = true; 683 const char *name; 684 u64 msk; 685 int i, n, pfx_len = strlen(prefix); 686 687 delegate_msk &= any_msk; /* clear unknown bits */ 688 689 if (delegate_msk == 0) 690 return; 691 692 seq_printf(m, ",%s", opt_name); 693 if (delegate_msk == any_msk) { 694 seq_printf(m, "=any"); 695 return; 696 } 697 698 if (btf && enum_t) { 699 for (i = 0, n = btf_vlen(enum_t); i < n; i++) { 700 e = &btf_enum(enum_t)[i]; 701 name = btf_name_by_offset(btf, e->name_off); 702 if (!name || strncasecmp(name, prefix, pfx_len) != 0) 703 continue; 704 msk = 1ULL << e->val; 705 if (delegate_msk & msk) { 706 /* emit lower-case name without prefix */ 707 seq_putc(m, first ? '=' : ':'); 708 name += pfx_len; 709 while (*name) { 710 seq_putc(m, tolower(*name)); 711 name++; 712 } 713 714 delegate_msk &= ~msk; 715 first = false; 716 } 717 } 718 } 719 if (delegate_msk) 720 seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); 721 } 722 723 /* 724 * Display the mount options in /proc/mounts. 725 */ 726 static int bpf_show_options(struct seq_file *m, struct dentry *root) 727 { 728 struct inode *inode = d_inode(root); 729 umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; 730 struct bpf_mount_opts *opts = root->d_sb->s_fs_info; 731 u64 mask; 732 733 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) 734 seq_printf(m, ",uid=%u", 735 from_kuid_munged(&init_user_ns, inode->i_uid)); 736 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) 737 seq_printf(m, ",gid=%u", 738 from_kgid_munged(&init_user_ns, inode->i_gid)); 739 if (mode != S_IRWXUGO) 740 seq_printf(m, ",mode=%o", mode); 741 742 if (opts->delegate_cmds || opts->delegate_maps || 743 opts->delegate_progs || opts->delegate_attachs) { 744 struct bpffs_btf_enums info; 745 746 /* ignore errors, fallback to hex */ 747 (void)find_bpffs_btf_enums(&info); 748 749 mask = (1ULL << __MAX_BPF_CMD) - 1; 750 seq_print_delegate_opts(m, "delegate_cmds", 751 info.btf, info.cmd_t, "BPF_", 752 opts->delegate_cmds, mask); 753 754 mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; 755 seq_print_delegate_opts(m, "delegate_maps", 756 info.btf, info.map_t, "BPF_MAP_TYPE_", 757 opts->delegate_maps, mask); 758 759 mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; 760 seq_print_delegate_opts(m, "delegate_progs", 761 info.btf, info.prog_t, "BPF_PROG_TYPE_", 762 opts->delegate_progs, mask); 763 764 mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; 765 seq_print_delegate_opts(m, "delegate_attachs", 766 info.btf, info.attach_t, "BPF_", 767 opts->delegate_attachs, mask); 768 } 769 770 return 0; 771 } 772 773 static void bpf_destroy_inode(struct inode *inode) 774 { 775 enum bpf_type type; 776 777 if (S_ISLNK(inode->i_mode)) 778 kfree(inode->i_link); 779 if (!bpf_inode_type(inode, &type)) 780 bpf_any_put(inode->i_private, type); 781 free_inode_nonrcu(inode); 782 } 783 784 const struct super_operations bpf_super_ops = { 785 .statfs = simple_statfs, 786 .drop_inode = inode_just_drop, 787 .show_options = bpf_show_options, 788 .destroy_inode = bpf_destroy_inode, 789 }; 790 791 enum { 792 OPT_UID, 793 OPT_GID, 794 OPT_MODE, 795 OPT_DELEGATE_CMDS, 796 OPT_DELEGATE_MAPS, 797 OPT_DELEGATE_PROGS, 798 OPT_DELEGATE_ATTACHS, 799 }; 800 801 static const struct fs_parameter_spec bpf_fs_parameters[] = { 802 fsparam_u32 ("uid", OPT_UID), 803 fsparam_u32 ("gid", OPT_GID), 804 fsparam_u32oct ("mode", OPT_MODE), 805 fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), 806 fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), 807 fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), 808 fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), 809 {} 810 }; 811 812 static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) 813 { 814 struct bpf_mount_opts *opts = fc->s_fs_info; 815 struct fs_parse_result result; 816 kuid_t uid; 817 kgid_t gid; 818 int opt, err; 819 820 opt = fs_parse(fc, bpf_fs_parameters, param, &result); 821 if (opt < 0) { 822 /* We might like to report bad mount options here, but 823 * traditionally we've ignored all mount options, so we'd 824 * better continue to ignore non-existing options for bpf. 825 */ 826 if (opt == -ENOPARAM) { 827 opt = vfs_parse_fs_param_source(fc, param); 828 if (opt != -ENOPARAM) 829 return opt; 830 831 return 0; 832 } 833 834 if (opt < 0) 835 return opt; 836 } 837 838 switch (opt) { 839 case OPT_UID: 840 uid = make_kuid(current_user_ns(), result.uint_32); 841 if (!uid_valid(uid)) 842 goto bad_value; 843 844 /* 845 * The requested uid must be representable in the 846 * filesystem's idmapping. 847 */ 848 if (!kuid_has_mapping(fc->user_ns, uid)) 849 goto bad_value; 850 851 opts->uid = uid; 852 break; 853 case OPT_GID: 854 gid = make_kgid(current_user_ns(), result.uint_32); 855 if (!gid_valid(gid)) 856 goto bad_value; 857 858 /* 859 * The requested gid must be representable in the 860 * filesystem's idmapping. 861 */ 862 if (!kgid_has_mapping(fc->user_ns, gid)) 863 goto bad_value; 864 865 opts->gid = gid; 866 break; 867 case OPT_MODE: 868 opts->mode = result.uint_32 & S_IALLUGO; 869 break; 870 case OPT_DELEGATE_CMDS: 871 case OPT_DELEGATE_MAPS: 872 case OPT_DELEGATE_PROGS: 873 case OPT_DELEGATE_ATTACHS: { 874 struct bpffs_btf_enums info; 875 const struct btf_type *enum_t; 876 const char *enum_pfx; 877 u64 *delegate_msk, msk = 0; 878 char *p, *str; 879 int val; 880 881 /* ignore errors, fallback to hex */ 882 (void)find_bpffs_btf_enums(&info); 883 884 switch (opt) { 885 case OPT_DELEGATE_CMDS: 886 delegate_msk = &opts->delegate_cmds; 887 enum_t = info.cmd_t; 888 enum_pfx = "BPF_"; 889 break; 890 case OPT_DELEGATE_MAPS: 891 delegate_msk = &opts->delegate_maps; 892 enum_t = info.map_t; 893 enum_pfx = "BPF_MAP_TYPE_"; 894 break; 895 case OPT_DELEGATE_PROGS: 896 delegate_msk = &opts->delegate_progs; 897 enum_t = info.prog_t; 898 enum_pfx = "BPF_PROG_TYPE_"; 899 break; 900 case OPT_DELEGATE_ATTACHS: 901 delegate_msk = &opts->delegate_attachs; 902 enum_t = info.attach_t; 903 enum_pfx = "BPF_"; 904 break; 905 default: 906 return -EINVAL; 907 } 908 909 str = param->string; 910 while ((p = strsep(&str, ":"))) { 911 if (strcmp(p, "any") == 0) { 912 msk |= ~0ULL; 913 } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { 914 msk |= 1ULL << val; 915 } else { 916 err = kstrtou64(p, 0, &msk); 917 if (err) 918 return err; 919 } 920 } 921 922 /* Setting delegation mount options requires privileges */ 923 if (msk && !capable(CAP_SYS_ADMIN)) 924 return -EPERM; 925 926 *delegate_msk |= msk; 927 break; 928 } 929 default: 930 /* ignore unknown mount options */ 931 break; 932 } 933 934 return 0; 935 bad_value: 936 return invalfc(fc, "Bad value for '%s'", param->key); 937 } 938 939 struct bpf_preload_ops *bpf_preload_ops; 940 EXPORT_SYMBOL_GPL(bpf_preload_ops); 941 942 static bool bpf_preload_mod_get(void) 943 { 944 /* If bpf_preload.ko wasn't loaded earlier then load it now. 945 * When bpf_preload is built into vmlinux the module's __init 946 * function will populate it. 947 */ 948 if (!bpf_preload_ops) { 949 request_module("bpf_preload"); 950 if (!bpf_preload_ops) 951 return false; 952 } 953 /* And grab the reference, so the module doesn't disappear while the 954 * kernel is interacting with the kernel module and its UMD. 955 */ 956 if (!try_module_get(bpf_preload_ops->owner)) { 957 pr_err("bpf_preload module get failed.\n"); 958 return false; 959 } 960 return true; 961 } 962 963 static void bpf_preload_mod_put(void) 964 { 965 if (bpf_preload_ops) 966 /* now user can "rmmod bpf_preload" if necessary */ 967 module_put(bpf_preload_ops->owner); 968 } 969 970 static DEFINE_MUTEX(bpf_preload_lock); 971 972 static int populate_bpffs(struct dentry *parent) 973 { 974 struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; 975 int err = 0, i; 976 977 /* grab the mutex to make sure the kernel interactions with bpf_preload 978 * are serialized 979 */ 980 mutex_lock(&bpf_preload_lock); 981 982 /* if bpf_preload.ko wasn't built into vmlinux then load it */ 983 if (!bpf_preload_mod_get()) 984 goto out; 985 986 err = bpf_preload_ops->preload(objs); 987 if (err) 988 goto out_put; 989 for (i = 0; i < BPF_PRELOAD_LINKS; i++) { 990 bpf_link_inc(objs[i].link); 991 err = bpf_iter_link_pin_kernel(parent, 992 objs[i].link_name, objs[i].link); 993 if (err) { 994 bpf_link_put(objs[i].link); 995 goto out_put; 996 } 997 } 998 out_put: 999 bpf_preload_mod_put(); 1000 out: 1001 mutex_unlock(&bpf_preload_lock); 1002 return err; 1003 } 1004 1005 static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) 1006 { 1007 static const struct tree_descr bpf_rfiles[] = { { "" } }; 1008 struct bpf_mount_opts *opts = sb->s_fs_info; 1009 struct inode *inode; 1010 int ret; 1011 1012 /* Mounting an instance of BPF FS requires privileges */ 1013 if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) 1014 return -EPERM; 1015 1016 ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); 1017 if (ret) 1018 return ret; 1019 1020 sb->s_op = &bpf_super_ops; 1021 1022 inode = sb->s_root->d_inode; 1023 inode->i_uid = opts->uid; 1024 inode->i_gid = opts->gid; 1025 inode->i_op = &bpf_dir_iops; 1026 inode->i_mode &= ~S_IALLUGO; 1027 populate_bpffs(sb->s_root); 1028 inode->i_mode |= S_ISVTX | opts->mode; 1029 return 0; 1030 } 1031 1032 static int bpf_get_tree(struct fs_context *fc) 1033 { 1034 return get_tree_nodev(fc, bpf_fill_super); 1035 } 1036 1037 static void bpf_free_fc(struct fs_context *fc) 1038 { 1039 kfree(fc->s_fs_info); 1040 } 1041 1042 static const struct fs_context_operations bpf_context_ops = { 1043 .free = bpf_free_fc, 1044 .parse_param = bpf_parse_param, 1045 .get_tree = bpf_get_tree, 1046 }; 1047 1048 /* 1049 * Set up the filesystem mount context. 1050 */ 1051 static int bpf_init_fs_context(struct fs_context *fc) 1052 { 1053 struct bpf_mount_opts *opts; 1054 1055 opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); 1056 if (!opts) 1057 return -ENOMEM; 1058 1059 opts->mode = S_IRWXUGO; 1060 opts->uid = current_fsuid(); 1061 opts->gid = current_fsgid(); 1062 1063 /* start out with no BPF token delegation enabled */ 1064 opts->delegate_cmds = 0; 1065 opts->delegate_maps = 0; 1066 opts->delegate_progs = 0; 1067 opts->delegate_attachs = 0; 1068 1069 fc->s_fs_info = opts; 1070 fc->ops = &bpf_context_ops; 1071 return 0; 1072 } 1073 1074 static void bpf_kill_super(struct super_block *sb) 1075 { 1076 struct bpf_mount_opts *opts = sb->s_fs_info; 1077 1078 kill_anon_super(sb); 1079 kfree(opts); 1080 } 1081 1082 static struct file_system_type bpf_fs_type = { 1083 .owner = THIS_MODULE, 1084 .name = "bpf", 1085 .init_fs_context = bpf_init_fs_context, 1086 .parameters = bpf_fs_parameters, 1087 .kill_sb = bpf_kill_super, 1088 .fs_flags = FS_USERNS_MOUNT, 1089 }; 1090 1091 static int __init bpf_init(void) 1092 { 1093 int ret; 1094 1095 ret = sysfs_create_mount_point(fs_kobj, "bpf"); 1096 if (ret) 1097 return ret; 1098 1099 ret = register_filesystem(&bpf_fs_type); 1100 if (ret) 1101 sysfs_remove_mount_point(fs_kobj, "bpf"); 1102 1103 return ret; 1104 } 1105 fs_initcall(bpf_init); 1106