1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <crypto/sha2.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf-cgroup.h> 7 #include <linux/bpf_trace.h> 8 #include <linux/bpf_lirc.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bsearch.h> 11 #include <linux/btf.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched/signal.h> 15 #include <linux/vmalloc.h> 16 #include <linux/mmzone.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/fdtable.h> 19 #include <linux/file.h> 20 #include <linux/fs.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/kernel.h> 24 #include <linux/idr.h> 25 #include <linux/cred.h> 26 #include <linux/timekeeping.h> 27 #include <linux/ctype.h> 28 #include <linux/nospec.h> 29 #include <linux/audit.h> 30 #include <uapi/linux/btf.h> 31 #include <linux/pgtable.h> 32 #include <linux/bpf_lsm.h> 33 #include <linux/poll.h> 34 #include <linux/sort.h> 35 #include <linux/bpf-netns.h> 36 #include <linux/rcupdate_trace.h> 37 #include <linux/memcontrol.h> 38 #include <linux/trace_events.h> 39 #include <linux/tracepoint.h> 40 #include <linux/overflow.h> 41 #include <linux/cookie.h> 42 #include <linux/verification.h> 43 44 #include <net/netfilter/nf_bpf_link.h> 45 #include <net/netkit.h> 46 #include <net/tcx.h> 47 48 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 49 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 50 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 51 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 52 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 53 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 54 IS_FD_HASH(map)) 55 56 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 57 58 DEFINE_PER_CPU(int, bpf_prog_active); 59 DEFINE_COOKIE(bpf_map_cookie); 60 static DEFINE_IDR(prog_idr); 61 static DEFINE_SPINLOCK(prog_idr_lock); 62 static DEFINE_IDR(map_idr); 63 static DEFINE_SPINLOCK(map_idr_lock); 64 static DEFINE_IDR(link_idr); 65 static DEFINE_SPINLOCK(link_idr_lock); 66 67 int sysctl_unprivileged_bpf_disabled __read_mostly = 68 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 69 70 static const struct bpf_map_ops * const bpf_map_types[] = { 71 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 72 #define BPF_MAP_TYPE(_id, _ops) \ 73 [_id] = &_ops, 74 #define BPF_LINK_TYPE(_id, _name) 75 #include <linux/bpf_types.h> 76 #undef BPF_PROG_TYPE 77 #undef BPF_MAP_TYPE 78 #undef BPF_LINK_TYPE 79 }; 80 81 /* 82 * If we're handed a bigger struct than we know of, ensure all the unknown bits 83 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 84 * we don't know about yet. 85 * 86 * There is a ToCToU between this function call and the following 87 * copy_from_user() call. However, this is not a concern since this function is 88 * meant to be a future-proofing of bits. 89 */ 90 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 91 size_t expected_size, 92 size_t actual_size) 93 { 94 int res; 95 96 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 97 return -E2BIG; 98 99 if (actual_size <= expected_size) 100 return 0; 101 102 if (uaddr.is_kernel) 103 res = memchr_inv(uaddr.kernel + expected_size, 0, 104 actual_size - expected_size) == NULL; 105 else 106 res = check_zeroed_user(uaddr.user + expected_size, 107 actual_size - expected_size); 108 if (res < 0) 109 return res; 110 return res ? 0 : -E2BIG; 111 } 112 113 const struct bpf_map_ops bpf_map_offload_ops = { 114 .map_meta_equal = bpf_map_meta_equal, 115 .map_alloc = bpf_map_offload_map_alloc, 116 .map_free = bpf_map_offload_map_free, 117 .map_check_btf = map_check_no_btf, 118 .map_mem_usage = bpf_map_offload_map_mem_usage, 119 }; 120 121 static void bpf_map_write_active_inc(struct bpf_map *map) 122 { 123 atomic64_inc(&map->writecnt); 124 } 125 126 static void bpf_map_write_active_dec(struct bpf_map *map) 127 { 128 atomic64_dec(&map->writecnt); 129 } 130 131 bool bpf_map_write_active(const struct bpf_map *map) 132 { 133 return atomic64_read(&map->writecnt) != 0; 134 } 135 136 static u32 bpf_map_value_size(const struct bpf_map *map) 137 { 138 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 139 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 140 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 141 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 142 return round_up(map->value_size, 8) * num_possible_cpus(); 143 else if (IS_FD_MAP(map)) 144 return sizeof(u32); 145 else 146 return map->value_size; 147 } 148 149 static void maybe_wait_bpf_programs(struct bpf_map *map) 150 { 151 /* Wait for any running non-sleepable BPF programs to complete so that 152 * userspace, when we return to it, knows that all non-sleepable 153 * programs that could be running use the new map value. For sleepable 154 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait 155 * for the completions of these programs, but considering the waiting 156 * time can be very long and userspace may think it will hang forever, 157 * so don't handle sleepable BPF programs now. 158 */ 159 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 160 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 161 synchronize_rcu(); 162 } 163 164 static void unpin_uptr_kaddr(void *kaddr) 165 { 166 if (kaddr) 167 unpin_user_page(virt_to_page(kaddr)); 168 } 169 170 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) 171 { 172 const struct btf_field *field; 173 void **uptr_addr; 174 int i; 175 176 for (i = 0, field = rec->fields; i < cnt; i++, field++) { 177 if (field->type != BPF_UPTR) 178 continue; 179 180 uptr_addr = obj + field->offset; 181 unpin_uptr_kaddr(*uptr_addr); 182 } 183 } 184 185 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) 186 { 187 if (!btf_record_has_field(rec, BPF_UPTR)) 188 return; 189 190 __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); 191 } 192 193 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) 194 { 195 const struct btf_field *field; 196 const struct btf_type *t; 197 unsigned long start, end; 198 struct page *page; 199 void **uptr_addr; 200 int i, err; 201 202 if (!btf_record_has_field(rec, BPF_UPTR)) 203 return 0; 204 205 for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { 206 if (field->type != BPF_UPTR) 207 continue; 208 209 uptr_addr = obj + field->offset; 210 start = *(unsigned long *)uptr_addr; 211 if (!start) 212 continue; 213 214 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 215 /* t->size was checked for zero before */ 216 if (check_add_overflow(start, t->size - 1, &end)) { 217 err = -EFAULT; 218 goto unpin_all; 219 } 220 221 /* The uptr's struct cannot span across two pages */ 222 if ((start & PAGE_MASK) != (end & PAGE_MASK)) { 223 err = -EOPNOTSUPP; 224 goto unpin_all; 225 } 226 227 err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); 228 if (err != 1) 229 goto unpin_all; 230 231 if (PageHighMem(page)) { 232 err = -EOPNOTSUPP; 233 unpin_user_page(page); 234 goto unpin_all; 235 } 236 237 *uptr_addr = page_address(page) + offset_in_page(start); 238 } 239 240 return 0; 241 242 unpin_all: 243 __bpf_obj_unpin_uptrs(rec, i, obj); 244 return err; 245 } 246 247 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 248 void *key, void *value, __u64 flags) 249 { 250 int err; 251 252 /* Need to create a kthread, thus must support schedule */ 253 if (bpf_map_is_offloaded(map)) { 254 return bpf_map_offload_update_elem(map, key, value, flags); 255 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 256 map->map_type == BPF_MAP_TYPE_ARENA || 257 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 258 return map->ops->map_update_elem(map, key, value, flags); 259 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 260 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 261 return sock_map_update_elem_sys(map, key, value, flags); 262 } else if (IS_FD_PROG_ARRAY(map)) { 263 return bpf_fd_array_map_update_elem(map, map_file, key, value, 264 flags); 265 } 266 267 bpf_disable_instrumentation(); 268 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 269 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 270 err = bpf_percpu_hash_update(map, key, value, flags); 271 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 272 err = bpf_percpu_array_update(map, key, value, flags); 273 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 274 err = bpf_percpu_cgroup_storage_update(map, key, value, 275 flags); 276 } else if (IS_FD_ARRAY(map)) { 277 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 278 flags); 279 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 280 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 281 flags); 282 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 283 /* rcu_read_lock() is not needed */ 284 err = bpf_fd_reuseport_array_update_elem(map, key, value, 285 flags); 286 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 287 map->map_type == BPF_MAP_TYPE_STACK || 288 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 289 err = map->ops->map_push_elem(map, value, flags); 290 } else { 291 err = bpf_obj_pin_uptrs(map->record, value); 292 if (!err) { 293 rcu_read_lock(); 294 err = map->ops->map_update_elem(map, key, value, flags); 295 rcu_read_unlock(); 296 if (err) 297 bpf_obj_unpin_uptrs(map->record, value); 298 } 299 } 300 bpf_enable_instrumentation(); 301 302 return err; 303 } 304 305 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 306 __u64 flags) 307 { 308 void *ptr; 309 int err; 310 311 if (bpf_map_is_offloaded(map)) 312 return bpf_map_offload_lookup_elem(map, key, value); 313 314 bpf_disable_instrumentation(); 315 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 316 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 317 err = bpf_percpu_hash_copy(map, key, value); 318 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 319 err = bpf_percpu_array_copy(map, key, value); 320 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 321 err = bpf_percpu_cgroup_storage_copy(map, key, value); 322 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 323 err = bpf_stackmap_extract(map, key, value, false); 324 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 325 err = bpf_fd_array_map_lookup_elem(map, key, value); 326 } else if (IS_FD_HASH(map)) { 327 err = bpf_fd_htab_map_lookup_elem(map, key, value); 328 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 329 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 330 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 331 map->map_type == BPF_MAP_TYPE_STACK || 332 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 333 err = map->ops->map_peek_elem(map, value); 334 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 335 /* struct_ops map requires directly updating "value" */ 336 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 337 } else { 338 rcu_read_lock(); 339 if (map->ops->map_lookup_elem_sys_only) 340 ptr = map->ops->map_lookup_elem_sys_only(map, key); 341 else 342 ptr = map->ops->map_lookup_elem(map, key); 343 if (IS_ERR(ptr)) { 344 err = PTR_ERR(ptr); 345 } else if (!ptr) { 346 err = -ENOENT; 347 } else { 348 err = 0; 349 if (flags & BPF_F_LOCK) 350 /* lock 'ptr' and copy everything but lock */ 351 copy_map_value_locked(map, value, ptr, true); 352 else 353 copy_map_value(map, value, ptr); 354 /* mask lock and timer, since value wasn't zero inited */ 355 check_and_init_map_value(map, value); 356 } 357 rcu_read_unlock(); 358 } 359 360 bpf_enable_instrumentation(); 361 362 return err; 363 } 364 365 /* Please, do not use this function outside from the map creation path 366 * (e.g. in map update path) without taking care of setting the active 367 * memory cgroup (see at bpf_map_kmalloc_node() for example). 368 */ 369 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 370 { 371 /* We really just want to fail instead of triggering OOM killer 372 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 373 * which is used for lower order allocation requests. 374 * 375 * It has been observed that higher order allocation requests done by 376 * vmalloc with __GFP_NORETRY being set might fail due to not trying 377 * to reclaim memory from the page cache, thus we set 378 * __GFP_RETRY_MAYFAIL to avoid such situations. 379 */ 380 381 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 382 unsigned int flags = 0; 383 unsigned long align = 1; 384 void *area; 385 386 if (size >= SIZE_MAX) 387 return NULL; 388 389 /* kmalloc()'ed memory can't be mmap()'ed */ 390 if (mmapable) { 391 BUG_ON(!PAGE_ALIGNED(size)); 392 align = SHMLBA; 393 flags = VM_USERMAP; 394 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 395 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 396 numa_node); 397 if (area != NULL) 398 return area; 399 } 400 401 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 402 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 403 flags, numa_node, __builtin_return_address(0)); 404 } 405 406 void *bpf_map_area_alloc(u64 size, int numa_node) 407 { 408 return __bpf_map_area_alloc(size, numa_node, false); 409 } 410 411 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 412 { 413 return __bpf_map_area_alloc(size, numa_node, true); 414 } 415 416 void bpf_map_area_free(void *area) 417 { 418 kvfree(area); 419 } 420 421 static u32 bpf_map_flags_retain_permanent(u32 flags) 422 { 423 /* Some map creation flags are not tied to the map object but 424 * rather to the map fd instead, so they have no meaning upon 425 * map object inspection since multiple file descriptors with 426 * different (access) properties can exist here. Thus, given 427 * this has zero meaning for the map itself, lets clear these 428 * from here. 429 */ 430 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 431 } 432 433 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 434 { 435 map->map_type = attr->map_type; 436 map->key_size = attr->key_size; 437 map->value_size = attr->value_size; 438 map->max_entries = attr->max_entries; 439 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 440 map->numa_node = bpf_map_attr_numa_node(attr); 441 map->map_extra = attr->map_extra; 442 } 443 444 static int bpf_map_alloc_id(struct bpf_map *map) 445 { 446 int id; 447 448 idr_preload(GFP_KERNEL); 449 spin_lock_bh(&map_idr_lock); 450 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 451 if (id > 0) 452 map->id = id; 453 spin_unlock_bh(&map_idr_lock); 454 idr_preload_end(); 455 456 if (WARN_ON_ONCE(!id)) 457 return -ENOSPC; 458 459 return id > 0 ? 0 : id; 460 } 461 462 void bpf_map_free_id(struct bpf_map *map) 463 { 464 unsigned long flags; 465 466 /* Offloaded maps are removed from the IDR store when their device 467 * disappears - even if someone holds an fd to them they are unusable, 468 * the memory is gone, all ops will fail; they are simply waiting for 469 * refcnt to drop to be freed. 470 */ 471 if (!map->id) 472 return; 473 474 spin_lock_irqsave(&map_idr_lock, flags); 475 476 idr_remove(&map_idr, map->id); 477 map->id = 0; 478 479 spin_unlock_irqrestore(&map_idr_lock, flags); 480 } 481 482 #ifdef CONFIG_MEMCG 483 static void bpf_map_save_memcg(struct bpf_map *map) 484 { 485 /* Currently if a map is created by a process belonging to the root 486 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 487 * So we have to check map->objcg for being NULL each time it's 488 * being used. 489 */ 490 if (memcg_bpf_enabled()) 491 map->objcg = get_obj_cgroup_from_current(); 492 } 493 494 static void bpf_map_release_memcg(struct bpf_map *map) 495 { 496 if (map->objcg) 497 obj_cgroup_put(map->objcg); 498 } 499 500 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 501 { 502 if (map->objcg) 503 return get_mem_cgroup_from_objcg(map->objcg); 504 505 return root_mem_cgroup; 506 } 507 508 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 509 int node) 510 { 511 struct mem_cgroup *memcg, *old_memcg; 512 void *ptr; 513 514 memcg = bpf_map_get_memcg(map); 515 old_memcg = set_active_memcg(memcg); 516 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 517 set_active_memcg(old_memcg); 518 mem_cgroup_put(memcg); 519 520 return ptr; 521 } 522 523 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 524 int node) 525 { 526 struct mem_cgroup *memcg, *old_memcg; 527 void *ptr; 528 529 memcg = bpf_map_get_memcg(map); 530 old_memcg = set_active_memcg(memcg); 531 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 532 set_active_memcg(old_memcg); 533 mem_cgroup_put(memcg); 534 535 return ptr; 536 } 537 538 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 539 { 540 struct mem_cgroup *memcg, *old_memcg; 541 void *ptr; 542 543 memcg = bpf_map_get_memcg(map); 544 old_memcg = set_active_memcg(memcg); 545 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 546 set_active_memcg(old_memcg); 547 mem_cgroup_put(memcg); 548 549 return ptr; 550 } 551 552 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 553 gfp_t flags) 554 { 555 struct mem_cgroup *memcg, *old_memcg; 556 void *ptr; 557 558 memcg = bpf_map_get_memcg(map); 559 old_memcg = set_active_memcg(memcg); 560 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 561 set_active_memcg(old_memcg); 562 mem_cgroup_put(memcg); 563 564 return ptr; 565 } 566 567 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 568 size_t align, gfp_t flags) 569 { 570 struct mem_cgroup *memcg, *old_memcg; 571 void __percpu *ptr; 572 573 memcg = bpf_map_get_memcg(map); 574 old_memcg = set_active_memcg(memcg); 575 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 576 set_active_memcg(old_memcg); 577 mem_cgroup_put(memcg); 578 579 return ptr; 580 } 581 582 #else 583 static void bpf_map_save_memcg(struct bpf_map *map) 584 { 585 } 586 587 static void bpf_map_release_memcg(struct bpf_map *map) 588 { 589 } 590 #endif 591 592 static bool can_alloc_pages(void) 593 { 594 return preempt_count() == 0 && !irqs_disabled() && 595 !IS_ENABLED(CONFIG_PREEMPT_RT); 596 } 597 598 static struct page *__bpf_alloc_page(int nid) 599 { 600 if (!can_alloc_pages()) 601 return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); 602 603 return alloc_pages_node(nid, 604 GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT 605 | __GFP_NOWARN, 606 0); 607 } 608 609 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 610 unsigned long nr_pages, struct page **pages) 611 { 612 unsigned long i, j; 613 struct page *pg; 614 int ret = 0; 615 #ifdef CONFIG_MEMCG 616 struct mem_cgroup *memcg, *old_memcg; 617 618 memcg = bpf_map_get_memcg(map); 619 old_memcg = set_active_memcg(memcg); 620 #endif 621 for (i = 0; i < nr_pages; i++) { 622 pg = __bpf_alloc_page(nid); 623 624 if (pg) { 625 pages[i] = pg; 626 continue; 627 } 628 for (j = 0; j < i; j++) 629 free_pages_nolock(pages[j], 0); 630 ret = -ENOMEM; 631 break; 632 } 633 634 #ifdef CONFIG_MEMCG 635 set_active_memcg(old_memcg); 636 mem_cgroup_put(memcg); 637 #endif 638 return ret; 639 } 640 641 642 static int btf_field_cmp(const void *a, const void *b) 643 { 644 const struct btf_field *f1 = a, *f2 = b; 645 646 if (f1->offset < f2->offset) 647 return -1; 648 else if (f1->offset > f2->offset) 649 return 1; 650 return 0; 651 } 652 653 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 654 u32 field_mask) 655 { 656 struct btf_field *field; 657 658 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 659 return NULL; 660 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 661 if (!field || !(field->type & field_mask)) 662 return NULL; 663 return field; 664 } 665 666 void btf_record_free(struct btf_record *rec) 667 { 668 int i; 669 670 if (IS_ERR_OR_NULL(rec)) 671 return; 672 for (i = 0; i < rec->cnt; i++) { 673 switch (rec->fields[i].type) { 674 case BPF_KPTR_UNREF: 675 case BPF_KPTR_REF: 676 case BPF_KPTR_PERCPU: 677 case BPF_UPTR: 678 if (rec->fields[i].kptr.module) 679 module_put(rec->fields[i].kptr.module); 680 if (btf_is_kernel(rec->fields[i].kptr.btf)) 681 btf_put(rec->fields[i].kptr.btf); 682 break; 683 case BPF_LIST_HEAD: 684 case BPF_LIST_NODE: 685 case BPF_RB_ROOT: 686 case BPF_RB_NODE: 687 case BPF_SPIN_LOCK: 688 case BPF_RES_SPIN_LOCK: 689 case BPF_TIMER: 690 case BPF_REFCOUNT: 691 case BPF_WORKQUEUE: 692 case BPF_TASK_WORK: 693 /* Nothing to release */ 694 break; 695 default: 696 WARN_ON_ONCE(1); 697 continue; 698 } 699 } 700 kfree(rec); 701 } 702 703 void bpf_map_free_record(struct bpf_map *map) 704 { 705 btf_record_free(map->record); 706 map->record = NULL; 707 } 708 709 struct btf_record *btf_record_dup(const struct btf_record *rec) 710 { 711 const struct btf_field *fields; 712 struct btf_record *new_rec; 713 int ret, size, i; 714 715 if (IS_ERR_OR_NULL(rec)) 716 return NULL; 717 size = struct_size(rec, fields, rec->cnt); 718 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 719 if (!new_rec) 720 return ERR_PTR(-ENOMEM); 721 /* Do a deep copy of the btf_record */ 722 fields = rec->fields; 723 new_rec->cnt = 0; 724 for (i = 0; i < rec->cnt; i++) { 725 switch (fields[i].type) { 726 case BPF_KPTR_UNREF: 727 case BPF_KPTR_REF: 728 case BPF_KPTR_PERCPU: 729 case BPF_UPTR: 730 if (btf_is_kernel(fields[i].kptr.btf)) 731 btf_get(fields[i].kptr.btf); 732 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 733 ret = -ENXIO; 734 goto free; 735 } 736 break; 737 case BPF_LIST_HEAD: 738 case BPF_LIST_NODE: 739 case BPF_RB_ROOT: 740 case BPF_RB_NODE: 741 case BPF_SPIN_LOCK: 742 case BPF_RES_SPIN_LOCK: 743 case BPF_TIMER: 744 case BPF_REFCOUNT: 745 case BPF_WORKQUEUE: 746 case BPF_TASK_WORK: 747 /* Nothing to acquire */ 748 break; 749 default: 750 ret = -EFAULT; 751 WARN_ON_ONCE(1); 752 goto free; 753 } 754 new_rec->cnt++; 755 } 756 return new_rec; 757 free: 758 btf_record_free(new_rec); 759 return ERR_PTR(ret); 760 } 761 762 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 763 { 764 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 765 int size; 766 767 if (!a_has_fields && !b_has_fields) 768 return true; 769 if (a_has_fields != b_has_fields) 770 return false; 771 if (rec_a->cnt != rec_b->cnt) 772 return false; 773 size = struct_size(rec_a, fields, rec_a->cnt); 774 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 775 * members are zeroed out. So memcmp is safe to do without worrying 776 * about padding/unused fields. 777 * 778 * While spin_lock, timer, and kptr have no relation to map BTF, 779 * list_head metadata is specific to map BTF, the btf and value_rec 780 * members in particular. btf is the map BTF, while value_rec points to 781 * btf_record in that map BTF. 782 * 783 * So while by default, we don't rely on the map BTF (which the records 784 * were parsed from) matching for both records, which is not backwards 785 * compatible, in case list_head is part of it, we implicitly rely on 786 * that by way of depending on memcmp succeeding for it. 787 */ 788 return !memcmp(rec_a, rec_b, size); 789 } 790 791 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 792 { 793 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 794 return; 795 bpf_timer_cancel_and_free(obj + rec->timer_off); 796 } 797 798 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) 799 { 800 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) 801 return; 802 bpf_wq_cancel_and_free(obj + rec->wq_off); 803 } 804 805 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) 806 { 807 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) 808 return; 809 bpf_task_work_cancel_and_free(obj + rec->task_work_off); 810 } 811 812 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 813 { 814 const struct btf_field *fields; 815 int i; 816 817 if (IS_ERR_OR_NULL(rec)) 818 return; 819 fields = rec->fields; 820 for (i = 0; i < rec->cnt; i++) { 821 struct btf_struct_meta *pointee_struct_meta; 822 const struct btf_field *field = &fields[i]; 823 void *field_ptr = obj + field->offset; 824 void *xchgd_field; 825 826 switch (fields[i].type) { 827 case BPF_SPIN_LOCK: 828 case BPF_RES_SPIN_LOCK: 829 break; 830 case BPF_TIMER: 831 bpf_timer_cancel_and_free(field_ptr); 832 break; 833 case BPF_WORKQUEUE: 834 bpf_wq_cancel_and_free(field_ptr); 835 break; 836 case BPF_TASK_WORK: 837 bpf_task_work_cancel_and_free(field_ptr); 838 break; 839 case BPF_KPTR_UNREF: 840 WRITE_ONCE(*(u64 *)field_ptr, 0); 841 break; 842 case BPF_KPTR_REF: 843 case BPF_KPTR_PERCPU: 844 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 845 if (!xchgd_field) 846 break; 847 848 if (!btf_is_kernel(field->kptr.btf)) { 849 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 850 field->kptr.btf_id); 851 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 852 pointee_struct_meta->record : NULL, 853 fields[i].type == BPF_KPTR_PERCPU); 854 } else { 855 field->kptr.dtor(xchgd_field); 856 } 857 break; 858 case BPF_UPTR: 859 /* The caller ensured that no one is using the uptr */ 860 unpin_uptr_kaddr(*(void **)field_ptr); 861 break; 862 case BPF_LIST_HEAD: 863 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 864 continue; 865 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 866 break; 867 case BPF_RB_ROOT: 868 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 869 continue; 870 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 871 break; 872 case BPF_LIST_NODE: 873 case BPF_RB_NODE: 874 case BPF_REFCOUNT: 875 break; 876 default: 877 WARN_ON_ONCE(1); 878 continue; 879 } 880 } 881 } 882 883 static void bpf_map_free(struct bpf_map *map) 884 { 885 struct btf_record *rec = map->record; 886 struct btf *btf = map->btf; 887 888 /* implementation dependent freeing. Disabling migration to simplify 889 * the free of values or special fields allocated from bpf memory 890 * allocator. 891 */ 892 kfree(map->excl_prog_sha); 893 migrate_disable(); 894 map->ops->map_free(map); 895 migrate_enable(); 896 897 /* Delay freeing of btf_record for maps, as map_free 898 * callback usually needs access to them. It is better to do it here 899 * than require each callback to do the free itself manually. 900 * 901 * Note that the btf_record stashed in map->inner_map_meta->record was 902 * already freed using the map_free callback for map in map case which 903 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 904 * template bpf_map struct used during verification. 905 */ 906 btf_record_free(rec); 907 /* Delay freeing of btf for maps, as map_free callback may need 908 * struct_meta info which will be freed with btf_put(). 909 */ 910 btf_put(btf); 911 } 912 913 /* called from workqueue */ 914 static void bpf_map_free_deferred(struct work_struct *work) 915 { 916 struct bpf_map *map = container_of(work, struct bpf_map, work); 917 918 security_bpf_map_free(map); 919 bpf_map_release_memcg(map); 920 bpf_map_owner_free(map); 921 bpf_map_free(map); 922 } 923 924 static void bpf_map_put_uref(struct bpf_map *map) 925 { 926 if (atomic64_dec_and_test(&map->usercnt)) { 927 if (map->ops->map_release_uref) 928 map->ops->map_release_uref(map); 929 } 930 } 931 932 static void bpf_map_free_in_work(struct bpf_map *map) 933 { 934 INIT_WORK(&map->work, bpf_map_free_deferred); 935 /* Avoid spawning kworkers, since they all might contend 936 * for the same mutex like slab_mutex. 937 */ 938 queue_work(system_dfl_wq, &map->work); 939 } 940 941 static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 942 { 943 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 944 } 945 946 static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 947 { 948 if (rcu_trace_implies_rcu_gp()) 949 bpf_map_free_rcu_gp(rcu); 950 else 951 call_rcu(rcu, bpf_map_free_rcu_gp); 952 } 953 954 /* decrement map refcnt and schedule it for freeing via workqueue 955 * (underlying map implementation ops->map_free() might sleep) 956 */ 957 void bpf_map_put(struct bpf_map *map) 958 { 959 if (atomic64_dec_and_test(&map->refcnt)) { 960 /* bpf_map_free_id() must be called first */ 961 bpf_map_free_id(map); 962 963 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 964 if (READ_ONCE(map->free_after_mult_rcu_gp)) 965 call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 966 else if (READ_ONCE(map->free_after_rcu_gp)) 967 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 968 else 969 bpf_map_free_in_work(map); 970 } 971 } 972 EXPORT_SYMBOL_GPL(bpf_map_put); 973 974 void bpf_map_put_with_uref(struct bpf_map *map) 975 { 976 bpf_map_put_uref(map); 977 bpf_map_put(map); 978 } 979 980 static int bpf_map_release(struct inode *inode, struct file *filp) 981 { 982 struct bpf_map *map = filp->private_data; 983 984 if (map->ops->map_release) 985 map->ops->map_release(map, filp); 986 987 bpf_map_put_with_uref(map); 988 return 0; 989 } 990 991 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 992 { 993 fmode_t mode = fd_file(f)->f_mode; 994 995 /* Our file permissions may have been overridden by global 996 * map permissions facing syscall side. 997 */ 998 if (READ_ONCE(map->frozen)) 999 mode &= ~FMODE_CAN_WRITE; 1000 return mode; 1001 } 1002 1003 #ifdef CONFIG_PROC_FS 1004 /* Show the memory usage of a bpf map */ 1005 static u64 bpf_map_memory_usage(const struct bpf_map *map) 1006 { 1007 return map->ops->map_mem_usage(map); 1008 } 1009 1010 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 1011 { 1012 struct bpf_map *map = filp->private_data; 1013 u32 type = 0, jited = 0; 1014 1015 spin_lock(&map->owner_lock); 1016 if (map->owner) { 1017 type = map->owner->type; 1018 jited = map->owner->jited; 1019 } 1020 spin_unlock(&map->owner_lock); 1021 1022 seq_printf(m, 1023 "map_type:\t%u\n" 1024 "key_size:\t%u\n" 1025 "value_size:\t%u\n" 1026 "max_entries:\t%u\n" 1027 "map_flags:\t%#x\n" 1028 "map_extra:\t%#llx\n" 1029 "memlock:\t%llu\n" 1030 "map_id:\t%u\n" 1031 "frozen:\t%u\n", 1032 map->map_type, 1033 map->key_size, 1034 map->value_size, 1035 map->max_entries, 1036 map->map_flags, 1037 (unsigned long long)map->map_extra, 1038 bpf_map_memory_usage(map), 1039 map->id, 1040 READ_ONCE(map->frozen)); 1041 if (type) { 1042 seq_printf(m, "owner_prog_type:\t%u\n", type); 1043 seq_printf(m, "owner_jited:\t%u\n", jited); 1044 } 1045 } 1046 #endif 1047 1048 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 1049 loff_t *ppos) 1050 { 1051 /* We need this handler such that alloc_file() enables 1052 * f_mode with FMODE_CAN_READ. 1053 */ 1054 return -EINVAL; 1055 } 1056 1057 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 1058 size_t siz, loff_t *ppos) 1059 { 1060 /* We need this handler such that alloc_file() enables 1061 * f_mode with FMODE_CAN_WRITE. 1062 */ 1063 return -EINVAL; 1064 } 1065 1066 /* called for any extra memory-mapped regions (except initial) */ 1067 static void bpf_map_mmap_open(struct vm_area_struct *vma) 1068 { 1069 struct bpf_map *map = vma->vm_file->private_data; 1070 1071 if (vma->vm_flags & VM_MAYWRITE) 1072 bpf_map_write_active_inc(map); 1073 } 1074 1075 /* called for all unmapped memory region (including initial) */ 1076 static void bpf_map_mmap_close(struct vm_area_struct *vma) 1077 { 1078 struct bpf_map *map = vma->vm_file->private_data; 1079 1080 if (vma->vm_flags & VM_MAYWRITE) 1081 bpf_map_write_active_dec(map); 1082 } 1083 1084 static const struct vm_operations_struct bpf_map_default_vmops = { 1085 .open = bpf_map_mmap_open, 1086 .close = bpf_map_mmap_close, 1087 }; 1088 1089 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 1090 { 1091 struct bpf_map *map = filp->private_data; 1092 int err = 0; 1093 1094 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 1095 return -ENOTSUPP; 1096 1097 if (!(vma->vm_flags & VM_SHARED)) 1098 return -EINVAL; 1099 1100 mutex_lock(&map->freeze_mutex); 1101 1102 if (vma->vm_flags & VM_WRITE) { 1103 if (map->frozen) { 1104 err = -EPERM; 1105 goto out; 1106 } 1107 /* map is meant to be read-only, so do not allow mapping as 1108 * writable, because it's possible to leak a writable page 1109 * reference and allows user-space to still modify it after 1110 * freezing, while verifier will assume contents do not change 1111 */ 1112 if (map->map_flags & BPF_F_RDONLY_PROG) { 1113 err = -EACCES; 1114 goto out; 1115 } 1116 bpf_map_write_active_inc(map); 1117 } 1118 out: 1119 mutex_unlock(&map->freeze_mutex); 1120 if (err) 1121 return err; 1122 1123 /* set default open/close callbacks */ 1124 vma->vm_ops = &bpf_map_default_vmops; 1125 vma->vm_private_data = map; 1126 vm_flags_clear(vma, VM_MAYEXEC); 1127 /* If mapping is read-only, then disallow potentially re-mapping with 1128 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing 1129 * means that as far as BPF map's memory-mapped VMAs are concerned, 1130 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, 1131 * both should be set, so we can forget about VM_MAYWRITE and always 1132 * check just VM_WRITE 1133 */ 1134 if (!(vma->vm_flags & VM_WRITE)) 1135 vm_flags_clear(vma, VM_MAYWRITE); 1136 1137 err = map->ops->map_mmap(map, vma); 1138 if (err) { 1139 if (vma->vm_flags & VM_WRITE) 1140 bpf_map_write_active_dec(map); 1141 } 1142 1143 return err; 1144 } 1145 1146 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 1147 { 1148 struct bpf_map *map = filp->private_data; 1149 1150 if (map->ops->map_poll) 1151 return map->ops->map_poll(map, filp, pts); 1152 1153 return EPOLLERR; 1154 } 1155 1156 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, 1157 unsigned long len, unsigned long pgoff, 1158 unsigned long flags) 1159 { 1160 struct bpf_map *map = filp->private_data; 1161 1162 if (map->ops->map_get_unmapped_area) 1163 return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); 1164 #ifdef CONFIG_MMU 1165 return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags); 1166 #else 1167 return addr; 1168 #endif 1169 } 1170 1171 const struct file_operations bpf_map_fops = { 1172 #ifdef CONFIG_PROC_FS 1173 .show_fdinfo = bpf_map_show_fdinfo, 1174 #endif 1175 .release = bpf_map_release, 1176 .read = bpf_dummy_read, 1177 .write = bpf_dummy_write, 1178 .mmap = bpf_map_mmap, 1179 .poll = bpf_map_poll, 1180 .get_unmapped_area = bpf_get_unmapped_area, 1181 }; 1182 1183 int bpf_map_new_fd(struct bpf_map *map, int flags) 1184 { 1185 int ret; 1186 1187 ret = security_bpf_map(map, OPEN_FMODE(flags)); 1188 if (ret < 0) 1189 return ret; 1190 1191 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 1192 flags | O_CLOEXEC); 1193 } 1194 1195 int bpf_get_file_flag(int flags) 1196 { 1197 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 1198 return -EINVAL; 1199 if (flags & BPF_F_RDONLY) 1200 return O_RDONLY; 1201 if (flags & BPF_F_WRONLY) 1202 return O_WRONLY; 1203 return O_RDWR; 1204 } 1205 1206 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 1207 #define CHECK_ATTR(CMD) \ 1208 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 1209 sizeof(attr->CMD##_LAST_FIELD), 0, \ 1210 sizeof(*attr) - \ 1211 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 1212 sizeof(attr->CMD##_LAST_FIELD)) != NULL 1213 1214 /* dst and src must have at least "size" number of bytes. 1215 * Return strlen on success and < 0 on error. 1216 */ 1217 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 1218 { 1219 const char *end = src + size; 1220 const char *orig_src = src; 1221 1222 memset(dst, 0, size); 1223 /* Copy all isalnum(), '_' and '.' chars. */ 1224 while (src < end && *src) { 1225 if (!isalnum(*src) && 1226 *src != '_' && *src != '.') 1227 return -EINVAL; 1228 *dst++ = *src++; 1229 } 1230 1231 /* No '\0' found in "size" number of bytes */ 1232 if (src == end) 1233 return -EINVAL; 1234 1235 return src - orig_src; 1236 } 1237 1238 int map_check_no_btf(const struct bpf_map *map, 1239 const struct btf *btf, 1240 const struct btf_type *key_type, 1241 const struct btf_type *value_type) 1242 { 1243 return -ENOTSUPP; 1244 } 1245 1246 static int map_check_btf(struct bpf_map *map, struct bpf_token *token, 1247 const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 1248 { 1249 const struct btf_type *key_type, *value_type; 1250 u32 key_size, value_size; 1251 int ret = 0; 1252 1253 /* Some maps allow key to be unspecified. */ 1254 if (btf_key_id) { 1255 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1256 if (!key_type || key_size != map->key_size) 1257 return -EINVAL; 1258 } else { 1259 key_type = btf_type_by_id(btf, 0); 1260 if (!map->ops->map_check_btf) 1261 return -EINVAL; 1262 } 1263 1264 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1265 if (!value_type || value_size != map->value_size) 1266 return -EINVAL; 1267 1268 map->record = btf_parse_fields(btf, value_type, 1269 BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1270 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | 1271 BPF_TASK_WORK, 1272 map->value_size); 1273 if (!IS_ERR_OR_NULL(map->record)) { 1274 int i; 1275 1276 if (!bpf_token_capable(token, CAP_BPF)) { 1277 ret = -EPERM; 1278 goto free_map_tab; 1279 } 1280 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1281 ret = -EACCES; 1282 goto free_map_tab; 1283 } 1284 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1285 switch (map->record->field_mask & (1 << i)) { 1286 case 0: 1287 continue; 1288 case BPF_SPIN_LOCK: 1289 case BPF_RES_SPIN_LOCK: 1290 if (map->map_type != BPF_MAP_TYPE_HASH && 1291 map->map_type != BPF_MAP_TYPE_ARRAY && 1292 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1293 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1294 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1295 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1296 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1297 ret = -EOPNOTSUPP; 1298 goto free_map_tab; 1299 } 1300 break; 1301 case BPF_TIMER: 1302 case BPF_WORKQUEUE: 1303 case BPF_TASK_WORK: 1304 if (map->map_type != BPF_MAP_TYPE_HASH && 1305 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1306 map->map_type != BPF_MAP_TYPE_ARRAY) { 1307 ret = -EOPNOTSUPP; 1308 goto free_map_tab; 1309 } 1310 break; 1311 case BPF_KPTR_UNREF: 1312 case BPF_KPTR_REF: 1313 case BPF_KPTR_PERCPU: 1314 case BPF_REFCOUNT: 1315 if (map->map_type != BPF_MAP_TYPE_HASH && 1316 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1317 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1318 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1319 map->map_type != BPF_MAP_TYPE_ARRAY && 1320 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1321 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1322 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1323 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1324 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1325 ret = -EOPNOTSUPP; 1326 goto free_map_tab; 1327 } 1328 break; 1329 case BPF_UPTR: 1330 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { 1331 ret = -EOPNOTSUPP; 1332 goto free_map_tab; 1333 } 1334 break; 1335 case BPF_LIST_HEAD: 1336 case BPF_RB_ROOT: 1337 if (map->map_type != BPF_MAP_TYPE_HASH && 1338 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1339 map->map_type != BPF_MAP_TYPE_ARRAY) { 1340 ret = -EOPNOTSUPP; 1341 goto free_map_tab; 1342 } 1343 break; 1344 default: 1345 /* Fail if map_type checks are missing for a field type */ 1346 ret = -EOPNOTSUPP; 1347 goto free_map_tab; 1348 } 1349 } 1350 } 1351 1352 ret = btf_check_and_fixup_fields(btf, map->record); 1353 if (ret < 0) 1354 goto free_map_tab; 1355 1356 if (map->ops->map_check_btf) { 1357 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1358 if (ret < 0) 1359 goto free_map_tab; 1360 } 1361 1362 return ret; 1363 free_map_tab: 1364 bpf_map_free_record(map); 1365 return ret; 1366 } 1367 1368 static bool bpf_net_capable(void) 1369 { 1370 return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); 1371 } 1372 1373 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size 1374 /* called via syscall */ 1375 static int map_create(union bpf_attr *attr, bpfptr_t uattr) 1376 { 1377 const struct bpf_map_ops *ops; 1378 struct bpf_token *token = NULL; 1379 int numa_node = bpf_map_attr_numa_node(attr); 1380 u32 map_type = attr->map_type; 1381 struct bpf_map *map; 1382 bool token_flag; 1383 int f_flags; 1384 int err; 1385 1386 err = CHECK_ATTR(BPF_MAP_CREATE); 1387 if (err) 1388 return -EINVAL; 1389 1390 /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it 1391 * to avoid per-map type checks tripping on unknown flag 1392 */ 1393 token_flag = attr->map_flags & BPF_F_TOKEN_FD; 1394 attr->map_flags &= ~BPF_F_TOKEN_FD; 1395 1396 if (attr->btf_vmlinux_value_type_id) { 1397 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1398 attr->btf_key_type_id || attr->btf_value_type_id) 1399 return -EINVAL; 1400 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1401 return -EINVAL; 1402 } 1403 1404 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1405 attr->map_type != BPF_MAP_TYPE_ARENA && 1406 attr->map_extra != 0) 1407 return -EINVAL; 1408 1409 f_flags = bpf_get_file_flag(attr->map_flags); 1410 if (f_flags < 0) 1411 return f_flags; 1412 1413 if (numa_node != NUMA_NO_NODE && 1414 ((unsigned int)numa_node >= nr_node_ids || 1415 !node_online(numa_node))) 1416 return -EINVAL; 1417 1418 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1419 map_type = attr->map_type; 1420 if (map_type >= ARRAY_SIZE(bpf_map_types)) 1421 return -EINVAL; 1422 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1423 ops = bpf_map_types[map_type]; 1424 if (!ops) 1425 return -EINVAL; 1426 1427 if (ops->map_alloc_check) { 1428 err = ops->map_alloc_check(attr); 1429 if (err) 1430 return err; 1431 } 1432 if (attr->map_ifindex) 1433 ops = &bpf_map_offload_ops; 1434 if (!ops->map_mem_usage) 1435 return -EINVAL; 1436 1437 if (token_flag) { 1438 token = bpf_token_get_from_fd(attr->map_token_fd); 1439 if (IS_ERR(token)) 1440 return PTR_ERR(token); 1441 1442 /* if current token doesn't grant map creation permissions, 1443 * then we can't use this token, so ignore it and rely on 1444 * system-wide capabilities checks 1445 */ 1446 if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || 1447 !bpf_token_allow_map_type(token, attr->map_type)) { 1448 bpf_token_put(token); 1449 token = NULL; 1450 } 1451 } 1452 1453 err = -EPERM; 1454 1455 /* Intent here is for unprivileged_bpf_disabled to block BPF map 1456 * creation for unprivileged users; other actions depend 1457 * on fd availability and access to bpffs, so are dependent on 1458 * object creation success. Even with unprivileged BPF disabled, 1459 * capability checks are still carried out. 1460 */ 1461 if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) 1462 goto put_token; 1463 1464 /* check privileged map type permissions */ 1465 switch (map_type) { 1466 case BPF_MAP_TYPE_ARRAY: 1467 case BPF_MAP_TYPE_PERCPU_ARRAY: 1468 case BPF_MAP_TYPE_PROG_ARRAY: 1469 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1470 case BPF_MAP_TYPE_CGROUP_ARRAY: 1471 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1472 case BPF_MAP_TYPE_HASH: 1473 case BPF_MAP_TYPE_PERCPU_HASH: 1474 case BPF_MAP_TYPE_HASH_OF_MAPS: 1475 case BPF_MAP_TYPE_RINGBUF: 1476 case BPF_MAP_TYPE_USER_RINGBUF: 1477 case BPF_MAP_TYPE_CGROUP_STORAGE: 1478 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1479 /* unprivileged */ 1480 break; 1481 case BPF_MAP_TYPE_SK_STORAGE: 1482 case BPF_MAP_TYPE_INODE_STORAGE: 1483 case BPF_MAP_TYPE_TASK_STORAGE: 1484 case BPF_MAP_TYPE_CGRP_STORAGE: 1485 case BPF_MAP_TYPE_BLOOM_FILTER: 1486 case BPF_MAP_TYPE_LPM_TRIE: 1487 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1488 case BPF_MAP_TYPE_STACK_TRACE: 1489 case BPF_MAP_TYPE_QUEUE: 1490 case BPF_MAP_TYPE_STACK: 1491 case BPF_MAP_TYPE_LRU_HASH: 1492 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1493 case BPF_MAP_TYPE_STRUCT_OPS: 1494 case BPF_MAP_TYPE_CPUMAP: 1495 case BPF_MAP_TYPE_ARENA: 1496 case BPF_MAP_TYPE_INSN_ARRAY: 1497 if (!bpf_token_capable(token, CAP_BPF)) 1498 goto put_token; 1499 break; 1500 case BPF_MAP_TYPE_SOCKMAP: 1501 case BPF_MAP_TYPE_SOCKHASH: 1502 case BPF_MAP_TYPE_DEVMAP: 1503 case BPF_MAP_TYPE_DEVMAP_HASH: 1504 case BPF_MAP_TYPE_XSKMAP: 1505 if (!bpf_token_capable(token, CAP_NET_ADMIN)) 1506 goto put_token; 1507 break; 1508 default: 1509 WARN(1, "unsupported map type %d", map_type); 1510 goto put_token; 1511 } 1512 1513 map = ops->map_alloc(attr); 1514 if (IS_ERR(map)) { 1515 err = PTR_ERR(map); 1516 goto put_token; 1517 } 1518 map->ops = ops; 1519 map->map_type = map_type; 1520 1521 err = bpf_obj_name_cpy(map->name, attr->map_name, 1522 sizeof(attr->map_name)); 1523 if (err < 0) 1524 goto free_map; 1525 1526 preempt_disable(); 1527 map->cookie = gen_cookie_next(&bpf_map_cookie); 1528 preempt_enable(); 1529 1530 atomic64_set(&map->refcnt, 1); 1531 atomic64_set(&map->usercnt, 1); 1532 mutex_init(&map->freeze_mutex); 1533 spin_lock_init(&map->owner_lock); 1534 1535 if (attr->btf_key_type_id || attr->btf_value_type_id || 1536 /* Even the map's value is a kernel's struct, 1537 * the bpf_prog.o must have BTF to begin with 1538 * to figure out the corresponding kernel's 1539 * counter part. Thus, attr->btf_fd has 1540 * to be valid also. 1541 */ 1542 attr->btf_vmlinux_value_type_id) { 1543 struct btf *btf; 1544 1545 btf = btf_get_by_fd(attr->btf_fd); 1546 if (IS_ERR(btf)) { 1547 err = PTR_ERR(btf); 1548 goto free_map; 1549 } 1550 if (btf_is_kernel(btf)) { 1551 btf_put(btf); 1552 err = -EACCES; 1553 goto free_map; 1554 } 1555 map->btf = btf; 1556 1557 if (attr->btf_value_type_id) { 1558 err = map_check_btf(map, token, btf, attr->btf_key_type_id, 1559 attr->btf_value_type_id); 1560 if (err) 1561 goto free_map; 1562 } 1563 1564 map->btf_key_type_id = attr->btf_key_type_id; 1565 map->btf_value_type_id = attr->btf_value_type_id; 1566 map->btf_vmlinux_value_type_id = 1567 attr->btf_vmlinux_value_type_id; 1568 } 1569 1570 if (attr->excl_prog_hash) { 1571 bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); 1572 1573 if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { 1574 err = -EINVAL; 1575 goto free_map; 1576 } 1577 1578 map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 1579 if (!map->excl_prog_sha) { 1580 err = -ENOMEM; 1581 goto free_map; 1582 } 1583 1584 if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { 1585 err = -EFAULT; 1586 goto free_map; 1587 } 1588 } else if (attr->excl_prog_hash_size) { 1589 return -EINVAL; 1590 } 1591 1592 err = security_bpf_map_create(map, attr, token, uattr.is_kernel); 1593 if (err) 1594 goto free_map_sec; 1595 1596 err = bpf_map_alloc_id(map); 1597 if (err) 1598 goto free_map_sec; 1599 1600 bpf_map_save_memcg(map); 1601 bpf_token_put(token); 1602 1603 err = bpf_map_new_fd(map, f_flags); 1604 if (err < 0) { 1605 /* failed to allocate fd. 1606 * bpf_map_put_with_uref() is needed because the above 1607 * bpf_map_alloc_id() has published the map 1608 * to the userspace and the userspace may 1609 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1610 */ 1611 bpf_map_put_with_uref(map); 1612 return err; 1613 } 1614 1615 return err; 1616 1617 free_map_sec: 1618 security_bpf_map_free(map); 1619 free_map: 1620 bpf_map_free(map); 1621 put_token: 1622 bpf_token_put(token); 1623 return err; 1624 } 1625 1626 void bpf_map_inc(struct bpf_map *map) 1627 { 1628 atomic64_inc(&map->refcnt); 1629 } 1630 EXPORT_SYMBOL_GPL(bpf_map_inc); 1631 1632 void bpf_map_inc_with_uref(struct bpf_map *map) 1633 { 1634 atomic64_inc(&map->refcnt); 1635 atomic64_inc(&map->usercnt); 1636 } 1637 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1638 1639 struct bpf_map *bpf_map_get(u32 ufd) 1640 { 1641 CLASS(fd, f)(ufd); 1642 struct bpf_map *map = __bpf_map_get(f); 1643 1644 if (!IS_ERR(map)) 1645 bpf_map_inc(map); 1646 1647 return map; 1648 } 1649 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); 1650 1651 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1652 { 1653 CLASS(fd, f)(ufd); 1654 struct bpf_map *map = __bpf_map_get(f); 1655 1656 if (!IS_ERR(map)) 1657 bpf_map_inc_with_uref(map); 1658 1659 return map; 1660 } 1661 1662 /* map_idr_lock should have been held or the map should have been 1663 * protected by rcu read lock. 1664 */ 1665 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1666 { 1667 int refold; 1668 1669 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1670 if (!refold) 1671 return ERR_PTR(-ENOENT); 1672 if (uref) 1673 atomic64_inc(&map->usercnt); 1674 1675 return map; 1676 } 1677 1678 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1679 { 1680 lockdep_assert(rcu_read_lock_held()); 1681 return __bpf_map_inc_not_zero(map, false); 1682 } 1683 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1684 1685 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 1686 bool delete) 1687 { 1688 return -ENOTSUPP; 1689 } 1690 1691 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1692 { 1693 if (key_size) 1694 return vmemdup_user(ukey, key_size); 1695 1696 if (ukey) 1697 return ERR_PTR(-EINVAL); 1698 1699 return NULL; 1700 } 1701 1702 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1703 { 1704 if (key_size) 1705 return kvmemdup_bpfptr(ukey, key_size); 1706 1707 if (!bpfptr_is_null(ukey)) 1708 return ERR_PTR(-EINVAL); 1709 1710 return NULL; 1711 } 1712 1713 /* last field in 'union bpf_attr' used by this command */ 1714 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1715 1716 static int map_lookup_elem(union bpf_attr *attr) 1717 { 1718 void __user *ukey = u64_to_user_ptr(attr->key); 1719 void __user *uvalue = u64_to_user_ptr(attr->value); 1720 struct bpf_map *map; 1721 void *key, *value; 1722 u32 value_size; 1723 int err; 1724 1725 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1726 return -EINVAL; 1727 1728 if (attr->flags & ~BPF_F_LOCK) 1729 return -EINVAL; 1730 1731 CLASS(fd, f)(attr->map_fd); 1732 map = __bpf_map_get(f); 1733 if (IS_ERR(map)) 1734 return PTR_ERR(map); 1735 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1736 return -EPERM; 1737 1738 if ((attr->flags & BPF_F_LOCK) && 1739 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 1740 return -EINVAL; 1741 1742 key = __bpf_copy_key(ukey, map->key_size); 1743 if (IS_ERR(key)) 1744 return PTR_ERR(key); 1745 1746 value_size = bpf_map_value_size(map); 1747 1748 err = -ENOMEM; 1749 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1750 if (!value) 1751 goto free_key; 1752 1753 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1754 if (copy_from_user(value, uvalue, value_size)) 1755 err = -EFAULT; 1756 else 1757 err = bpf_map_copy_value(map, key, value, attr->flags); 1758 goto free_value; 1759 } 1760 1761 err = bpf_map_copy_value(map, key, value, attr->flags); 1762 if (err) 1763 goto free_value; 1764 1765 err = -EFAULT; 1766 if (copy_to_user(uvalue, value, value_size) != 0) 1767 goto free_value; 1768 1769 err = 0; 1770 1771 free_value: 1772 kvfree(value); 1773 free_key: 1774 kvfree(key); 1775 return err; 1776 } 1777 1778 1779 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1780 1781 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1782 { 1783 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1784 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1785 struct bpf_map *map; 1786 void *key, *value; 1787 u32 value_size; 1788 int err; 1789 1790 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1791 return -EINVAL; 1792 1793 CLASS(fd, f)(attr->map_fd); 1794 map = __bpf_map_get(f); 1795 if (IS_ERR(map)) 1796 return PTR_ERR(map); 1797 bpf_map_write_active_inc(map); 1798 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1799 err = -EPERM; 1800 goto err_put; 1801 } 1802 1803 if ((attr->flags & BPF_F_LOCK) && 1804 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1805 err = -EINVAL; 1806 goto err_put; 1807 } 1808 1809 key = ___bpf_copy_key(ukey, map->key_size); 1810 if (IS_ERR(key)) { 1811 err = PTR_ERR(key); 1812 goto err_put; 1813 } 1814 1815 value_size = bpf_map_value_size(map); 1816 value = kvmemdup_bpfptr(uvalue, value_size); 1817 if (IS_ERR(value)) { 1818 err = PTR_ERR(value); 1819 goto free_key; 1820 } 1821 1822 err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags); 1823 if (!err) 1824 maybe_wait_bpf_programs(map); 1825 1826 kvfree(value); 1827 free_key: 1828 kvfree(key); 1829 err_put: 1830 bpf_map_write_active_dec(map); 1831 return err; 1832 } 1833 1834 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1835 1836 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1837 { 1838 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1839 struct bpf_map *map; 1840 void *key; 1841 int err; 1842 1843 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1844 return -EINVAL; 1845 1846 CLASS(fd, f)(attr->map_fd); 1847 map = __bpf_map_get(f); 1848 if (IS_ERR(map)) 1849 return PTR_ERR(map); 1850 bpf_map_write_active_inc(map); 1851 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1852 err = -EPERM; 1853 goto err_put; 1854 } 1855 1856 key = ___bpf_copy_key(ukey, map->key_size); 1857 if (IS_ERR(key)) { 1858 err = PTR_ERR(key); 1859 goto err_put; 1860 } 1861 1862 if (bpf_map_is_offloaded(map)) { 1863 err = bpf_map_offload_delete_elem(map, key); 1864 goto out; 1865 } else if (IS_FD_PROG_ARRAY(map) || 1866 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1867 /* These maps require sleepable context */ 1868 err = map->ops->map_delete_elem(map, key); 1869 goto out; 1870 } 1871 1872 bpf_disable_instrumentation(); 1873 rcu_read_lock(); 1874 err = map->ops->map_delete_elem(map, key); 1875 rcu_read_unlock(); 1876 bpf_enable_instrumentation(); 1877 if (!err) 1878 maybe_wait_bpf_programs(map); 1879 out: 1880 kvfree(key); 1881 err_put: 1882 bpf_map_write_active_dec(map); 1883 return err; 1884 } 1885 1886 /* last field in 'union bpf_attr' used by this command */ 1887 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1888 1889 static int map_get_next_key(union bpf_attr *attr) 1890 { 1891 void __user *ukey = u64_to_user_ptr(attr->key); 1892 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1893 struct bpf_map *map; 1894 void *key, *next_key; 1895 int err; 1896 1897 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1898 return -EINVAL; 1899 1900 CLASS(fd, f)(attr->map_fd); 1901 map = __bpf_map_get(f); 1902 if (IS_ERR(map)) 1903 return PTR_ERR(map); 1904 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1905 return -EPERM; 1906 1907 if (ukey) { 1908 key = __bpf_copy_key(ukey, map->key_size); 1909 if (IS_ERR(key)) 1910 return PTR_ERR(key); 1911 } else { 1912 key = NULL; 1913 } 1914 1915 err = -ENOMEM; 1916 next_key = kvmalloc(map->key_size, GFP_USER); 1917 if (!next_key) 1918 goto free_key; 1919 1920 if (bpf_map_is_offloaded(map)) { 1921 err = bpf_map_offload_get_next_key(map, key, next_key); 1922 goto out; 1923 } 1924 1925 rcu_read_lock(); 1926 err = map->ops->map_get_next_key(map, key, next_key); 1927 rcu_read_unlock(); 1928 out: 1929 if (err) 1930 goto free_next_key; 1931 1932 err = -EFAULT; 1933 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1934 goto free_next_key; 1935 1936 err = 0; 1937 1938 free_next_key: 1939 kvfree(next_key); 1940 free_key: 1941 kvfree(key); 1942 return err; 1943 } 1944 1945 int generic_map_delete_batch(struct bpf_map *map, 1946 const union bpf_attr *attr, 1947 union bpf_attr __user *uattr) 1948 { 1949 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1950 u32 cp, max_count; 1951 int err = 0; 1952 void *key; 1953 1954 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1955 return -EINVAL; 1956 1957 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1958 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1959 return -EINVAL; 1960 } 1961 1962 max_count = attr->batch.count; 1963 if (!max_count) 1964 return 0; 1965 1966 if (put_user(0, &uattr->batch.count)) 1967 return -EFAULT; 1968 1969 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1970 if (!key) 1971 return -ENOMEM; 1972 1973 for (cp = 0; cp < max_count; cp++) { 1974 err = -EFAULT; 1975 if (copy_from_user(key, keys + cp * map->key_size, 1976 map->key_size)) 1977 break; 1978 1979 if (bpf_map_is_offloaded(map)) { 1980 err = bpf_map_offload_delete_elem(map, key); 1981 break; 1982 } 1983 1984 bpf_disable_instrumentation(); 1985 rcu_read_lock(); 1986 err = map->ops->map_delete_elem(map, key); 1987 rcu_read_unlock(); 1988 bpf_enable_instrumentation(); 1989 if (err) 1990 break; 1991 cond_resched(); 1992 } 1993 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1994 err = -EFAULT; 1995 1996 kvfree(key); 1997 1998 return err; 1999 } 2000 2001 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 2002 const union bpf_attr *attr, 2003 union bpf_attr __user *uattr) 2004 { 2005 void __user *values = u64_to_user_ptr(attr->batch.values); 2006 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2007 u32 value_size, cp, max_count; 2008 void *key, *value; 2009 int err = 0; 2010 2011 if (attr->batch.elem_flags & ~BPF_F_LOCK) 2012 return -EINVAL; 2013 2014 if ((attr->batch.elem_flags & BPF_F_LOCK) && 2015 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2016 return -EINVAL; 2017 } 2018 2019 value_size = bpf_map_value_size(map); 2020 2021 max_count = attr->batch.count; 2022 if (!max_count) 2023 return 0; 2024 2025 if (put_user(0, &uattr->batch.count)) 2026 return -EFAULT; 2027 2028 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2029 if (!key) 2030 return -ENOMEM; 2031 2032 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2033 if (!value) { 2034 kvfree(key); 2035 return -ENOMEM; 2036 } 2037 2038 for (cp = 0; cp < max_count; cp++) { 2039 err = -EFAULT; 2040 if (copy_from_user(key, keys + cp * map->key_size, 2041 map->key_size) || 2042 copy_from_user(value, values + cp * value_size, value_size)) 2043 break; 2044 2045 err = bpf_map_update_value(map, map_file, key, value, 2046 attr->batch.elem_flags); 2047 2048 if (err) 2049 break; 2050 cond_resched(); 2051 } 2052 2053 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 2054 err = -EFAULT; 2055 2056 kvfree(value); 2057 kvfree(key); 2058 2059 return err; 2060 } 2061 2062 int generic_map_lookup_batch(struct bpf_map *map, 2063 const union bpf_attr *attr, 2064 union bpf_attr __user *uattr) 2065 { 2066 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 2067 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 2068 void __user *values = u64_to_user_ptr(attr->batch.values); 2069 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2070 void *buf, *buf_prevkey, *prev_key, *key, *value; 2071 u32 value_size, cp, max_count; 2072 int err; 2073 2074 if (attr->batch.elem_flags & ~BPF_F_LOCK) 2075 return -EINVAL; 2076 2077 if ((attr->batch.elem_flags & BPF_F_LOCK) && 2078 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 2079 return -EINVAL; 2080 2081 value_size = bpf_map_value_size(map); 2082 2083 max_count = attr->batch.count; 2084 if (!max_count) 2085 return 0; 2086 2087 if (put_user(0, &uattr->batch.count)) 2088 return -EFAULT; 2089 2090 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2091 if (!buf_prevkey) 2092 return -ENOMEM; 2093 2094 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 2095 if (!buf) { 2096 kvfree(buf_prevkey); 2097 return -ENOMEM; 2098 } 2099 2100 err = -EFAULT; 2101 prev_key = NULL; 2102 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 2103 goto free_buf; 2104 key = buf; 2105 value = key + map->key_size; 2106 if (ubatch) 2107 prev_key = buf_prevkey; 2108 2109 for (cp = 0; cp < max_count;) { 2110 rcu_read_lock(); 2111 err = map->ops->map_get_next_key(map, prev_key, key); 2112 rcu_read_unlock(); 2113 if (err) 2114 break; 2115 err = bpf_map_copy_value(map, key, value, 2116 attr->batch.elem_flags); 2117 2118 if (err == -ENOENT) 2119 goto next_key; 2120 2121 if (err) 2122 goto free_buf; 2123 2124 if (copy_to_user(keys + cp * map->key_size, key, 2125 map->key_size)) { 2126 err = -EFAULT; 2127 goto free_buf; 2128 } 2129 if (copy_to_user(values + cp * value_size, value, value_size)) { 2130 err = -EFAULT; 2131 goto free_buf; 2132 } 2133 2134 cp++; 2135 next_key: 2136 if (!prev_key) 2137 prev_key = buf_prevkey; 2138 2139 swap(prev_key, key); 2140 cond_resched(); 2141 } 2142 2143 if (err == -EFAULT) 2144 goto free_buf; 2145 2146 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 2147 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 2148 err = -EFAULT; 2149 2150 free_buf: 2151 kvfree(buf_prevkey); 2152 kvfree(buf); 2153 return err; 2154 } 2155 2156 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 2157 2158 static int map_lookup_and_delete_elem(union bpf_attr *attr) 2159 { 2160 void __user *ukey = u64_to_user_ptr(attr->key); 2161 void __user *uvalue = u64_to_user_ptr(attr->value); 2162 struct bpf_map *map; 2163 void *key, *value; 2164 u32 value_size; 2165 int err; 2166 2167 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 2168 return -EINVAL; 2169 2170 if (attr->flags & ~BPF_F_LOCK) 2171 return -EINVAL; 2172 2173 CLASS(fd, f)(attr->map_fd); 2174 map = __bpf_map_get(f); 2175 if (IS_ERR(map)) 2176 return PTR_ERR(map); 2177 bpf_map_write_active_inc(map); 2178 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 2179 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 2180 err = -EPERM; 2181 goto err_put; 2182 } 2183 2184 if (attr->flags && 2185 (map->map_type == BPF_MAP_TYPE_QUEUE || 2186 map->map_type == BPF_MAP_TYPE_STACK)) { 2187 err = -EINVAL; 2188 goto err_put; 2189 } 2190 2191 if ((attr->flags & BPF_F_LOCK) && 2192 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2193 err = -EINVAL; 2194 goto err_put; 2195 } 2196 2197 key = __bpf_copy_key(ukey, map->key_size); 2198 if (IS_ERR(key)) { 2199 err = PTR_ERR(key); 2200 goto err_put; 2201 } 2202 2203 value_size = bpf_map_value_size(map); 2204 2205 err = -ENOMEM; 2206 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2207 if (!value) 2208 goto free_key; 2209 2210 err = -ENOTSUPP; 2211 if (map->map_type == BPF_MAP_TYPE_QUEUE || 2212 map->map_type == BPF_MAP_TYPE_STACK) { 2213 err = map->ops->map_pop_elem(map, value); 2214 } else if (map->map_type == BPF_MAP_TYPE_HASH || 2215 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2216 map->map_type == BPF_MAP_TYPE_LRU_HASH || 2217 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 2218 map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 2219 if (!bpf_map_is_offloaded(map)) { 2220 bpf_disable_instrumentation(); 2221 rcu_read_lock(); 2222 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 2223 rcu_read_unlock(); 2224 bpf_enable_instrumentation(); 2225 } 2226 } 2227 2228 if (err) 2229 goto free_value; 2230 2231 if (copy_to_user(uvalue, value, value_size) != 0) { 2232 err = -EFAULT; 2233 goto free_value; 2234 } 2235 2236 err = 0; 2237 2238 free_value: 2239 kvfree(value); 2240 free_key: 2241 kvfree(key); 2242 err_put: 2243 bpf_map_write_active_dec(map); 2244 return err; 2245 } 2246 2247 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 2248 2249 static int map_freeze(const union bpf_attr *attr) 2250 { 2251 int err = 0; 2252 struct bpf_map *map; 2253 2254 if (CHECK_ATTR(BPF_MAP_FREEZE)) 2255 return -EINVAL; 2256 2257 CLASS(fd, f)(attr->map_fd); 2258 map = __bpf_map_get(f); 2259 if (IS_ERR(map)) 2260 return PTR_ERR(map); 2261 2262 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) 2263 return -ENOTSUPP; 2264 2265 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) 2266 return -EPERM; 2267 2268 mutex_lock(&map->freeze_mutex); 2269 if (bpf_map_write_active(map)) { 2270 err = -EBUSY; 2271 goto err_put; 2272 } 2273 if (READ_ONCE(map->frozen)) { 2274 err = -EBUSY; 2275 goto err_put; 2276 } 2277 2278 WRITE_ONCE(map->frozen, true); 2279 err_put: 2280 mutex_unlock(&map->freeze_mutex); 2281 return err; 2282 } 2283 2284 static const struct bpf_prog_ops * const bpf_prog_types[] = { 2285 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 2286 [_id] = & _name ## _prog_ops, 2287 #define BPF_MAP_TYPE(_id, _ops) 2288 #define BPF_LINK_TYPE(_id, _name) 2289 #include <linux/bpf_types.h> 2290 #undef BPF_PROG_TYPE 2291 #undef BPF_MAP_TYPE 2292 #undef BPF_LINK_TYPE 2293 }; 2294 2295 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 2296 { 2297 const struct bpf_prog_ops *ops; 2298 2299 if (type >= ARRAY_SIZE(bpf_prog_types)) 2300 return -EINVAL; 2301 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 2302 ops = bpf_prog_types[type]; 2303 if (!ops) 2304 return -EINVAL; 2305 2306 if (!bpf_prog_is_offloaded(prog->aux)) 2307 prog->aux->ops = ops; 2308 else 2309 prog->aux->ops = &bpf_offload_prog_ops; 2310 prog->type = type; 2311 return 0; 2312 } 2313 2314 enum bpf_audit { 2315 BPF_AUDIT_LOAD, 2316 BPF_AUDIT_UNLOAD, 2317 BPF_AUDIT_MAX, 2318 }; 2319 2320 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2321 [BPF_AUDIT_LOAD] = "LOAD", 2322 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2323 }; 2324 2325 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2326 { 2327 struct audit_context *ctx = NULL; 2328 struct audit_buffer *ab; 2329 2330 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2331 return; 2332 if (audit_enabled == AUDIT_OFF) 2333 return; 2334 if (!in_irq() && !irqs_disabled()) 2335 ctx = audit_context(); 2336 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2337 if (unlikely(!ab)) 2338 return; 2339 audit_log_format(ab, "prog-id=%u op=%s", 2340 prog->aux->id, bpf_audit_str[op]); 2341 audit_log_end(ab); 2342 } 2343 2344 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2345 { 2346 int id; 2347 2348 idr_preload(GFP_KERNEL); 2349 spin_lock_bh(&prog_idr_lock); 2350 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2351 if (id > 0) 2352 prog->aux->id = id; 2353 spin_unlock_bh(&prog_idr_lock); 2354 idr_preload_end(); 2355 2356 /* id is in [1, INT_MAX) */ 2357 if (WARN_ON_ONCE(!id)) 2358 return -ENOSPC; 2359 2360 return id > 0 ? 0 : id; 2361 } 2362 2363 void bpf_prog_free_id(struct bpf_prog *prog) 2364 { 2365 unsigned long flags; 2366 2367 /* cBPF to eBPF migrations are currently not in the idr store. 2368 * Offloaded programs are removed from the store when their device 2369 * disappears - even if someone grabs an fd to them they are unusable, 2370 * simply waiting for refcnt to drop to be freed. 2371 */ 2372 if (!prog->aux->id) 2373 return; 2374 2375 spin_lock_irqsave(&prog_idr_lock, flags); 2376 idr_remove(&prog_idr, prog->aux->id); 2377 prog->aux->id = 0; 2378 spin_unlock_irqrestore(&prog_idr_lock, flags); 2379 } 2380 2381 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2382 { 2383 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2384 2385 kvfree(aux->func_info); 2386 kfree(aux->func_info_aux); 2387 free_uid(aux->user); 2388 security_bpf_prog_free(aux->prog); 2389 bpf_prog_free(aux->prog); 2390 } 2391 2392 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2393 { 2394 bpf_prog_kallsyms_del_all(prog); 2395 btf_put(prog->aux->btf); 2396 module_put(prog->aux->mod); 2397 kvfree(prog->aux->jited_linfo); 2398 kvfree(prog->aux->linfo); 2399 kfree(prog->aux->kfunc_tab); 2400 kfree(prog->aux->ctx_arg_info); 2401 if (prog->aux->attach_btf) 2402 btf_put(prog->aux->attach_btf); 2403 2404 if (deferred) { 2405 if (prog->sleepable) 2406 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2407 else 2408 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2409 } else { 2410 __bpf_prog_put_rcu(&prog->aux->rcu); 2411 } 2412 } 2413 2414 static void bpf_prog_put_deferred(struct work_struct *work) 2415 { 2416 struct bpf_prog_aux *aux; 2417 struct bpf_prog *prog; 2418 2419 aux = container_of(work, struct bpf_prog_aux, work); 2420 prog = aux->prog; 2421 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2422 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2423 bpf_prog_free_id(prog); 2424 __bpf_prog_put_noref(prog, true); 2425 } 2426 2427 static void __bpf_prog_put(struct bpf_prog *prog) 2428 { 2429 struct bpf_prog_aux *aux = prog->aux; 2430 2431 if (atomic64_dec_and_test(&aux->refcnt)) { 2432 if (in_irq() || irqs_disabled()) { 2433 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2434 schedule_work(&aux->work); 2435 } else { 2436 bpf_prog_put_deferred(&aux->work); 2437 } 2438 } 2439 } 2440 2441 void bpf_prog_put(struct bpf_prog *prog) 2442 { 2443 __bpf_prog_put(prog); 2444 } 2445 EXPORT_SYMBOL_GPL(bpf_prog_put); 2446 2447 static int bpf_prog_release(struct inode *inode, struct file *filp) 2448 { 2449 struct bpf_prog *prog = filp->private_data; 2450 2451 bpf_prog_put(prog); 2452 return 0; 2453 } 2454 2455 struct bpf_prog_kstats { 2456 u64 nsecs; 2457 u64 cnt; 2458 u64 misses; 2459 }; 2460 2461 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2462 { 2463 struct bpf_prog_stats *stats; 2464 unsigned int flags; 2465 2466 if (unlikely(!prog->stats)) 2467 return; 2468 2469 stats = this_cpu_ptr(prog->stats); 2470 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2471 u64_stats_inc(&stats->misses); 2472 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2473 } 2474 2475 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2476 struct bpf_prog_kstats *stats) 2477 { 2478 u64 nsecs = 0, cnt = 0, misses = 0; 2479 int cpu; 2480 2481 for_each_possible_cpu(cpu) { 2482 const struct bpf_prog_stats *st; 2483 unsigned int start; 2484 u64 tnsecs, tcnt, tmisses; 2485 2486 st = per_cpu_ptr(prog->stats, cpu); 2487 do { 2488 start = u64_stats_fetch_begin(&st->syncp); 2489 tnsecs = u64_stats_read(&st->nsecs); 2490 tcnt = u64_stats_read(&st->cnt); 2491 tmisses = u64_stats_read(&st->misses); 2492 } while (u64_stats_fetch_retry(&st->syncp, start)); 2493 nsecs += tnsecs; 2494 cnt += tcnt; 2495 misses += tmisses; 2496 } 2497 stats->nsecs = nsecs; 2498 stats->cnt = cnt; 2499 stats->misses = misses; 2500 } 2501 2502 #ifdef CONFIG_PROC_FS 2503 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2504 { 2505 const struct bpf_prog *prog = filp->private_data; 2506 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2507 struct bpf_prog_kstats stats; 2508 2509 bpf_prog_get_stats(prog, &stats); 2510 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2511 seq_printf(m, 2512 "prog_type:\t%u\n" 2513 "prog_jited:\t%u\n" 2514 "prog_tag:\t%s\n" 2515 "memlock:\t%llu\n" 2516 "prog_id:\t%u\n" 2517 "run_time_ns:\t%llu\n" 2518 "run_cnt:\t%llu\n" 2519 "recursion_misses:\t%llu\n" 2520 "verified_insns:\t%u\n", 2521 prog->type, 2522 prog->jited, 2523 prog_tag, 2524 prog->pages * 1ULL << PAGE_SHIFT, 2525 prog->aux->id, 2526 stats.nsecs, 2527 stats.cnt, 2528 stats.misses, 2529 prog->aux->verified_insns); 2530 } 2531 #endif 2532 2533 const struct file_operations bpf_prog_fops = { 2534 #ifdef CONFIG_PROC_FS 2535 .show_fdinfo = bpf_prog_show_fdinfo, 2536 #endif 2537 .release = bpf_prog_release, 2538 .read = bpf_dummy_read, 2539 .write = bpf_dummy_write, 2540 }; 2541 2542 int bpf_prog_new_fd(struct bpf_prog *prog) 2543 { 2544 int ret; 2545 2546 ret = security_bpf_prog(prog); 2547 if (ret < 0) 2548 return ret; 2549 2550 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2551 O_RDWR | O_CLOEXEC); 2552 } 2553 2554 void bpf_prog_add(struct bpf_prog *prog, int i) 2555 { 2556 atomic64_add(i, &prog->aux->refcnt); 2557 } 2558 EXPORT_SYMBOL_GPL(bpf_prog_add); 2559 2560 void bpf_prog_sub(struct bpf_prog *prog, int i) 2561 { 2562 /* Only to be used for undoing previous bpf_prog_add() in some 2563 * error path. We still know that another entity in our call 2564 * path holds a reference to the program, thus atomic_sub() can 2565 * be safely used in such cases! 2566 */ 2567 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2568 } 2569 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2570 2571 void bpf_prog_inc(struct bpf_prog *prog) 2572 { 2573 atomic64_inc(&prog->aux->refcnt); 2574 } 2575 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2576 2577 /* prog_idr_lock should have been held */ 2578 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2579 { 2580 int refold; 2581 2582 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2583 2584 if (!refold) 2585 return ERR_PTR(-ENOENT); 2586 2587 return prog; 2588 } 2589 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2590 2591 bool bpf_prog_get_ok(struct bpf_prog *prog, 2592 enum bpf_prog_type *attach_type, bool attach_drv) 2593 { 2594 /* not an attachment, just a refcount inc, always allow */ 2595 if (!attach_type) 2596 return true; 2597 2598 if (prog->type != *attach_type) 2599 return false; 2600 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2601 return false; 2602 2603 return true; 2604 } 2605 2606 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2607 bool attach_drv) 2608 { 2609 CLASS(fd, f)(ufd); 2610 struct bpf_prog *prog; 2611 2612 if (fd_empty(f)) 2613 return ERR_PTR(-EBADF); 2614 if (fd_file(f)->f_op != &bpf_prog_fops) 2615 return ERR_PTR(-EINVAL); 2616 2617 prog = fd_file(f)->private_data; 2618 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) 2619 return ERR_PTR(-EINVAL); 2620 2621 bpf_prog_inc(prog); 2622 return prog; 2623 } 2624 2625 struct bpf_prog *bpf_prog_get(u32 ufd) 2626 { 2627 return __bpf_prog_get(ufd, NULL, false); 2628 } 2629 2630 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2631 bool attach_drv) 2632 { 2633 return __bpf_prog_get(ufd, &type, attach_drv); 2634 } 2635 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2636 2637 /* Initially all BPF programs could be loaded w/o specifying 2638 * expected_attach_type. Later for some of them specifying expected_attach_type 2639 * at load time became required so that program could be validated properly. 2640 * Programs of types that are allowed to be loaded both w/ and w/o (for 2641 * backward compatibility) expected_attach_type, should have the default attach 2642 * type assigned to expected_attach_type for the latter case, so that it can be 2643 * validated later at attach time. 2644 * 2645 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2646 * prog type requires it but has some attach types that have to be backward 2647 * compatible. 2648 */ 2649 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2650 { 2651 switch (attr->prog_type) { 2652 case BPF_PROG_TYPE_CGROUP_SOCK: 2653 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2654 * exist so checking for non-zero is the way to go here. 2655 */ 2656 if (!attr->expected_attach_type) 2657 attr->expected_attach_type = 2658 BPF_CGROUP_INET_SOCK_CREATE; 2659 break; 2660 case BPF_PROG_TYPE_SK_REUSEPORT: 2661 if (!attr->expected_attach_type) 2662 attr->expected_attach_type = 2663 BPF_SK_REUSEPORT_SELECT; 2664 break; 2665 } 2666 } 2667 2668 static int 2669 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2670 enum bpf_attach_type expected_attach_type, 2671 struct btf *attach_btf, u32 btf_id, 2672 struct bpf_prog *dst_prog) 2673 { 2674 if (btf_id) { 2675 if (btf_id > BTF_MAX_TYPE) 2676 return -EINVAL; 2677 2678 if (!attach_btf && !dst_prog) 2679 return -EINVAL; 2680 2681 switch (prog_type) { 2682 case BPF_PROG_TYPE_TRACING: 2683 case BPF_PROG_TYPE_LSM: 2684 case BPF_PROG_TYPE_STRUCT_OPS: 2685 case BPF_PROG_TYPE_EXT: 2686 break; 2687 default: 2688 return -EINVAL; 2689 } 2690 } 2691 2692 if (attach_btf && (!btf_id || dst_prog)) 2693 return -EINVAL; 2694 2695 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2696 prog_type != BPF_PROG_TYPE_EXT) 2697 return -EINVAL; 2698 2699 switch (prog_type) { 2700 case BPF_PROG_TYPE_CGROUP_SOCK: 2701 switch (expected_attach_type) { 2702 case BPF_CGROUP_INET_SOCK_CREATE: 2703 case BPF_CGROUP_INET_SOCK_RELEASE: 2704 case BPF_CGROUP_INET4_POST_BIND: 2705 case BPF_CGROUP_INET6_POST_BIND: 2706 return 0; 2707 default: 2708 return -EINVAL; 2709 } 2710 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2711 switch (expected_attach_type) { 2712 case BPF_CGROUP_INET4_BIND: 2713 case BPF_CGROUP_INET6_BIND: 2714 case BPF_CGROUP_INET4_CONNECT: 2715 case BPF_CGROUP_INET6_CONNECT: 2716 case BPF_CGROUP_UNIX_CONNECT: 2717 case BPF_CGROUP_INET4_GETPEERNAME: 2718 case BPF_CGROUP_INET6_GETPEERNAME: 2719 case BPF_CGROUP_UNIX_GETPEERNAME: 2720 case BPF_CGROUP_INET4_GETSOCKNAME: 2721 case BPF_CGROUP_INET6_GETSOCKNAME: 2722 case BPF_CGROUP_UNIX_GETSOCKNAME: 2723 case BPF_CGROUP_UDP4_SENDMSG: 2724 case BPF_CGROUP_UDP6_SENDMSG: 2725 case BPF_CGROUP_UNIX_SENDMSG: 2726 case BPF_CGROUP_UDP4_RECVMSG: 2727 case BPF_CGROUP_UDP6_RECVMSG: 2728 case BPF_CGROUP_UNIX_RECVMSG: 2729 return 0; 2730 default: 2731 return -EINVAL; 2732 } 2733 case BPF_PROG_TYPE_CGROUP_SKB: 2734 switch (expected_attach_type) { 2735 case BPF_CGROUP_INET_INGRESS: 2736 case BPF_CGROUP_INET_EGRESS: 2737 return 0; 2738 default: 2739 return -EINVAL; 2740 } 2741 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2742 switch (expected_attach_type) { 2743 case BPF_CGROUP_SETSOCKOPT: 2744 case BPF_CGROUP_GETSOCKOPT: 2745 return 0; 2746 default: 2747 return -EINVAL; 2748 } 2749 case BPF_PROG_TYPE_SK_LOOKUP: 2750 if (expected_attach_type == BPF_SK_LOOKUP) 2751 return 0; 2752 return -EINVAL; 2753 case BPF_PROG_TYPE_SK_REUSEPORT: 2754 switch (expected_attach_type) { 2755 case BPF_SK_REUSEPORT_SELECT: 2756 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2757 return 0; 2758 default: 2759 return -EINVAL; 2760 } 2761 case BPF_PROG_TYPE_NETFILTER: 2762 if (expected_attach_type == BPF_NETFILTER) 2763 return 0; 2764 return -EINVAL; 2765 case BPF_PROG_TYPE_SYSCALL: 2766 case BPF_PROG_TYPE_EXT: 2767 if (expected_attach_type) 2768 return -EINVAL; 2769 fallthrough; 2770 default: 2771 return 0; 2772 } 2773 } 2774 2775 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2776 { 2777 switch (prog_type) { 2778 case BPF_PROG_TYPE_SCHED_CLS: 2779 case BPF_PROG_TYPE_SCHED_ACT: 2780 case BPF_PROG_TYPE_XDP: 2781 case BPF_PROG_TYPE_LWT_IN: 2782 case BPF_PROG_TYPE_LWT_OUT: 2783 case BPF_PROG_TYPE_LWT_XMIT: 2784 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2785 case BPF_PROG_TYPE_SK_SKB: 2786 case BPF_PROG_TYPE_SK_MSG: 2787 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2788 case BPF_PROG_TYPE_CGROUP_DEVICE: 2789 case BPF_PROG_TYPE_CGROUP_SOCK: 2790 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2791 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2792 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2793 case BPF_PROG_TYPE_SOCK_OPS: 2794 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2795 case BPF_PROG_TYPE_NETFILTER: 2796 return true; 2797 case BPF_PROG_TYPE_CGROUP_SKB: 2798 /* always unpriv */ 2799 case BPF_PROG_TYPE_SK_REUSEPORT: 2800 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2801 default: 2802 return false; 2803 } 2804 } 2805 2806 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2807 { 2808 switch (prog_type) { 2809 case BPF_PROG_TYPE_KPROBE: 2810 case BPF_PROG_TYPE_TRACEPOINT: 2811 case BPF_PROG_TYPE_PERF_EVENT: 2812 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2813 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2814 case BPF_PROG_TYPE_TRACING: 2815 case BPF_PROG_TYPE_LSM: 2816 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2817 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2818 return true; 2819 default: 2820 return false; 2821 } 2822 } 2823 2824 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, 2825 bool is_kernel) 2826 { 2827 bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); 2828 struct bpf_dynptr_kern sig_ptr, insns_ptr; 2829 struct bpf_key *key = NULL; 2830 void *sig; 2831 int err = 0; 2832 2833 if (system_keyring_id_check(attr->keyring_id) == 0) 2834 key = bpf_lookup_system_key(attr->keyring_id); 2835 else 2836 key = bpf_lookup_user_key(attr->keyring_id, 0); 2837 2838 if (!key) 2839 return -EINVAL; 2840 2841 sig = kvmemdup_bpfptr(usig, attr->signature_size); 2842 if (IS_ERR(sig)) { 2843 bpf_key_put(key); 2844 return -ENOMEM; 2845 } 2846 2847 bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, 2848 attr->signature_size); 2849 bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, 2850 prog->len * sizeof(struct bpf_insn)); 2851 2852 err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, 2853 (struct bpf_dynptr *)&sig_ptr, key); 2854 2855 bpf_key_put(key); 2856 kvfree(sig); 2857 return err; 2858 } 2859 2860 static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) 2861 { 2862 int err; 2863 int i; 2864 2865 for (i = 0; i < prog->aux->used_map_cnt; i++) { 2866 if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY) 2867 continue; 2868 2869 err = bpf_insn_array_ready(prog->aux->used_maps[i]); 2870 if (err) 2871 return err; 2872 } 2873 2874 return 0; 2875 } 2876 2877 /* last field in 'union bpf_attr' used by this command */ 2878 #define BPF_PROG_LOAD_LAST_FIELD keyring_id 2879 2880 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2881 { 2882 enum bpf_prog_type type = attr->prog_type; 2883 struct bpf_prog *prog, *dst_prog = NULL; 2884 struct btf *attach_btf = NULL; 2885 struct bpf_token *token = NULL; 2886 bool bpf_cap; 2887 int err; 2888 char license[128]; 2889 2890 if (CHECK_ATTR(BPF_PROG_LOAD)) 2891 return -EINVAL; 2892 2893 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2894 BPF_F_ANY_ALIGNMENT | 2895 BPF_F_TEST_STATE_FREQ | 2896 BPF_F_SLEEPABLE | 2897 BPF_F_TEST_RND_HI32 | 2898 BPF_F_XDP_HAS_FRAGS | 2899 BPF_F_XDP_DEV_BOUND_ONLY | 2900 BPF_F_TEST_REG_INVARIANTS | 2901 BPF_F_TOKEN_FD)) 2902 return -EINVAL; 2903 2904 bpf_prog_load_fixup_attach_type(attr); 2905 2906 if (attr->prog_flags & BPF_F_TOKEN_FD) { 2907 token = bpf_token_get_from_fd(attr->prog_token_fd); 2908 if (IS_ERR(token)) 2909 return PTR_ERR(token); 2910 /* if current token doesn't grant prog loading permissions, 2911 * then we can't use this token, so ignore it and rely on 2912 * system-wide capabilities checks 2913 */ 2914 if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || 2915 !bpf_token_allow_prog_type(token, attr->prog_type, 2916 attr->expected_attach_type)) { 2917 bpf_token_put(token); 2918 token = NULL; 2919 } 2920 } 2921 2922 bpf_cap = bpf_token_capable(token, CAP_BPF); 2923 err = -EPERM; 2924 2925 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2926 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2927 !bpf_cap) 2928 goto put_token; 2929 2930 /* Intent here is for unprivileged_bpf_disabled to block BPF program 2931 * creation for unprivileged users; other actions depend 2932 * on fd availability and access to bpffs, so are dependent on 2933 * object creation success. Even with unprivileged BPF disabled, 2934 * capability checks are still carried out for these 2935 * and other operations. 2936 */ 2937 if (sysctl_unprivileged_bpf_disabled && !bpf_cap) 2938 goto put_token; 2939 2940 if (attr->insn_cnt == 0 || 2941 attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2942 err = -E2BIG; 2943 goto put_token; 2944 } 2945 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2946 type != BPF_PROG_TYPE_CGROUP_SKB && 2947 !bpf_cap) 2948 goto put_token; 2949 2950 if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) 2951 goto put_token; 2952 if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) 2953 goto put_token; 2954 2955 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2956 * or btf, we need to check which one it is 2957 */ 2958 if (attr->attach_prog_fd) { 2959 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2960 if (IS_ERR(dst_prog)) { 2961 dst_prog = NULL; 2962 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2963 if (IS_ERR(attach_btf)) { 2964 err = -EINVAL; 2965 goto put_token; 2966 } 2967 if (!btf_is_kernel(attach_btf)) { 2968 /* attaching through specifying bpf_prog's BTF 2969 * objects directly might be supported eventually 2970 */ 2971 btf_put(attach_btf); 2972 err = -ENOTSUPP; 2973 goto put_token; 2974 } 2975 } 2976 } else if (attr->attach_btf_id) { 2977 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2978 attach_btf = bpf_get_btf_vmlinux(); 2979 if (IS_ERR(attach_btf)) { 2980 err = PTR_ERR(attach_btf); 2981 goto put_token; 2982 } 2983 if (!attach_btf) { 2984 err = -EINVAL; 2985 goto put_token; 2986 } 2987 btf_get(attach_btf); 2988 } 2989 2990 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2991 attach_btf, attr->attach_btf_id, 2992 dst_prog)) { 2993 if (dst_prog) 2994 bpf_prog_put(dst_prog); 2995 if (attach_btf) 2996 btf_put(attach_btf); 2997 err = -EINVAL; 2998 goto put_token; 2999 } 3000 3001 /* plain bpf_prog allocation */ 3002 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 3003 if (!prog) { 3004 if (dst_prog) 3005 bpf_prog_put(dst_prog); 3006 if (attach_btf) 3007 btf_put(attach_btf); 3008 err = -EINVAL; 3009 goto put_token; 3010 } 3011 3012 prog->expected_attach_type = attr->expected_attach_type; 3013 prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); 3014 prog->aux->attach_btf = attach_btf; 3015 prog->aux->attach_btf_id = attr->attach_btf_id; 3016 prog->aux->dst_prog = dst_prog; 3017 prog->aux->dev_bound = !!attr->prog_ifindex; 3018 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 3019 3020 /* move token into prog->aux, reuse taken refcnt */ 3021 prog->aux->token = token; 3022 token = NULL; 3023 3024 prog->aux->user = get_current_user(); 3025 prog->len = attr->insn_cnt; 3026 3027 err = -EFAULT; 3028 if (copy_from_bpfptr(prog->insns, 3029 make_bpfptr(attr->insns, uattr.is_kernel), 3030 bpf_prog_insn_size(prog)) != 0) 3031 goto free_prog; 3032 /* copy eBPF program license from user space */ 3033 if (strncpy_from_bpfptr(license, 3034 make_bpfptr(attr->license, uattr.is_kernel), 3035 sizeof(license) - 1) < 0) 3036 goto free_prog; 3037 license[sizeof(license) - 1] = 0; 3038 3039 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 3040 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 3041 3042 if (attr->signature) { 3043 err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); 3044 if (err) 3045 goto free_prog; 3046 } 3047 3048 prog->orig_prog = NULL; 3049 prog->jited = 0; 3050 3051 atomic64_set(&prog->aux->refcnt, 1); 3052 3053 if (bpf_prog_is_dev_bound(prog->aux)) { 3054 err = bpf_prog_dev_bound_init(prog, attr); 3055 if (err) 3056 goto free_prog; 3057 } 3058 3059 if (type == BPF_PROG_TYPE_EXT && dst_prog && 3060 bpf_prog_is_dev_bound(dst_prog->aux)) { 3061 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 3062 if (err) 3063 goto free_prog; 3064 } 3065 3066 /* 3067 * Bookkeeping for managing the program attachment chain. 3068 * 3069 * It might be tempting to set attach_tracing_prog flag at the attachment 3070 * time, but this will not prevent from loading bunch of tracing prog 3071 * first, then attach them one to another. 3072 * 3073 * The flag attach_tracing_prog is set for the whole program lifecycle, and 3074 * doesn't have to be cleared in bpf_tracing_link_release, since tracing 3075 * programs cannot change attachment target. 3076 */ 3077 if (type == BPF_PROG_TYPE_TRACING && dst_prog && 3078 dst_prog->type == BPF_PROG_TYPE_TRACING) { 3079 prog->aux->attach_tracing_prog = true; 3080 } 3081 3082 /* find program type: socket_filter vs tracing_filter */ 3083 err = find_prog_type(type, prog); 3084 if (err < 0) 3085 goto free_prog; 3086 3087 prog->aux->load_time = ktime_get_boottime_ns(); 3088 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 3089 sizeof(attr->prog_name)); 3090 if (err < 0) 3091 goto free_prog; 3092 3093 err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 3094 if (err) 3095 goto free_prog_sec; 3096 3097 /* run eBPF verifier */ 3098 err = bpf_check(&prog, attr, uattr, uattr_size); 3099 if (err < 0) 3100 goto free_used_maps; 3101 3102 prog = bpf_prog_select_runtime(prog, &err); 3103 if (err < 0) 3104 goto free_used_maps; 3105 3106 err = bpf_prog_mark_insn_arrays_ready(prog); 3107 if (err < 0) 3108 goto free_used_maps; 3109 3110 err = bpf_prog_alloc_id(prog); 3111 if (err) 3112 goto free_used_maps; 3113 3114 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 3115 * effectively publicly exposed. However, retrieving via 3116 * bpf_prog_get_fd_by_id() will take another reference, 3117 * therefore it cannot be gone underneath us. 3118 * 3119 * Only for the time /after/ successful bpf_prog_new_fd() 3120 * and before returning to userspace, we might just hold 3121 * one reference and any parallel close on that fd could 3122 * rip everything out. Hence, below notifications must 3123 * happen before bpf_prog_new_fd(). 3124 * 3125 * Also, any failure handling from this point onwards must 3126 * be using bpf_prog_put() given the program is exposed. 3127 */ 3128 bpf_prog_kallsyms_add(prog); 3129 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 3130 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 3131 3132 err = bpf_prog_new_fd(prog); 3133 if (err < 0) 3134 bpf_prog_put(prog); 3135 return err; 3136 3137 free_used_maps: 3138 /* In case we have subprogs, we need to wait for a grace 3139 * period before we can tear down JIT memory since symbols 3140 * are already exposed under kallsyms. 3141 */ 3142 __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); 3143 return err; 3144 3145 free_prog_sec: 3146 security_bpf_prog_free(prog); 3147 free_prog: 3148 free_uid(prog->aux->user); 3149 if (prog->aux->attach_btf) 3150 btf_put(prog->aux->attach_btf); 3151 bpf_prog_free(prog); 3152 put_token: 3153 bpf_token_put(token); 3154 return err; 3155 } 3156 3157 #define BPF_OBJ_LAST_FIELD path_fd 3158 3159 static int bpf_obj_pin(const union bpf_attr *attr) 3160 { 3161 int path_fd; 3162 3163 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 3164 return -EINVAL; 3165 3166 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3167 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3168 return -EINVAL; 3169 3170 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3171 return bpf_obj_pin_user(attr->bpf_fd, path_fd, 3172 u64_to_user_ptr(attr->pathname)); 3173 } 3174 3175 static int bpf_obj_get(const union bpf_attr *attr) 3176 { 3177 int path_fd; 3178 3179 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 3180 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 3181 return -EINVAL; 3182 3183 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3184 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3185 return -EINVAL; 3186 3187 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3188 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 3189 attr->file_flags); 3190 } 3191 3192 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has 3193 * "sleepable" semantics, which normally would mean that BPF link's attach 3194 * hook can dereference link or link's underlying program for some time after 3195 * detachment due to RCU Tasks Trace-based lifetime protection scheme. 3196 * BPF program itself can be non-sleepable, yet, because it's transitively 3197 * reachable through BPF link, its freeing has to be delayed until after RCU 3198 * Tasks Trace GP. 3199 */ 3200 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, 3201 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3202 enum bpf_attach_type attach_type, bool sleepable) 3203 { 3204 WARN_ON(ops->dealloc && ops->dealloc_deferred); 3205 atomic64_set(&link->refcnt, 1); 3206 link->type = type; 3207 link->sleepable = sleepable; 3208 link->id = 0; 3209 link->ops = ops; 3210 link->prog = prog; 3211 link->attach_type = attach_type; 3212 } 3213 3214 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 3215 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3216 enum bpf_attach_type attach_type) 3217 { 3218 bpf_link_init_sleepable(link, type, ops, prog, attach_type, false); 3219 } 3220 3221 static void bpf_link_free_id(int id) 3222 { 3223 if (!id) 3224 return; 3225 3226 spin_lock_bh(&link_idr_lock); 3227 idr_remove(&link_idr, id); 3228 spin_unlock_bh(&link_idr_lock); 3229 } 3230 3231 /* Clean up bpf_link and corresponding anon_inode file and FD. After 3232 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 3233 * anon_inode's release() call. This helper marks bpf_link as 3234 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 3235 * is not decremented, it's the responsibility of a calling code that failed 3236 * to complete bpf_link initialization. 3237 * This helper eventually calls link's dealloc callback, but does not call 3238 * link's release callback. 3239 */ 3240 void bpf_link_cleanup(struct bpf_link_primer *primer) 3241 { 3242 primer->link->prog = NULL; 3243 bpf_link_free_id(primer->id); 3244 fput(primer->file); 3245 put_unused_fd(primer->fd); 3246 } 3247 3248 void bpf_link_inc(struct bpf_link *link) 3249 { 3250 atomic64_inc(&link->refcnt); 3251 } 3252 3253 static void bpf_link_dealloc(struct bpf_link *link) 3254 { 3255 /* now that we know that bpf_link itself can't be reached, put underlying BPF program */ 3256 if (link->prog) 3257 bpf_prog_put(link->prog); 3258 3259 /* free bpf_link and its containing memory */ 3260 if (link->ops->dealloc_deferred) 3261 link->ops->dealloc_deferred(link); 3262 else 3263 link->ops->dealloc(link); 3264 } 3265 3266 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) 3267 { 3268 struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); 3269 3270 bpf_link_dealloc(link); 3271 } 3272 3273 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3274 { 3275 if (rcu_trace_implies_rcu_gp()) 3276 bpf_link_defer_dealloc_rcu_gp(rcu); 3277 else 3278 call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3279 } 3280 3281 /* bpf_link_free is guaranteed to be called from process context */ 3282 static void bpf_link_free(struct bpf_link *link) 3283 { 3284 const struct bpf_link_ops *ops = link->ops; 3285 3286 bpf_link_free_id(link->id); 3287 /* detach BPF program, clean up used resources */ 3288 if (link->prog) 3289 ops->release(link); 3290 if (ops->dealloc_deferred) { 3291 /* Schedule BPF link deallocation, which will only then 3292 * trigger putting BPF program refcount. 3293 * If underlying BPF program is sleepable or BPF link's target 3294 * attach hookpoint is sleepable or otherwise requires RCU GPs 3295 * to ensure link and its underlying BPF program is not 3296 * reachable anymore, we need to first wait for RCU tasks 3297 * trace sync, and then go through "classic" RCU grace period 3298 */ 3299 if (link->sleepable || (link->prog && link->prog->sleepable)) 3300 call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3301 else 3302 call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3303 } else if (ops->dealloc) { 3304 bpf_link_dealloc(link); 3305 } 3306 } 3307 3308 static void bpf_link_put_deferred(struct work_struct *work) 3309 { 3310 struct bpf_link *link = container_of(work, struct bpf_link, work); 3311 3312 bpf_link_free(link); 3313 } 3314 3315 /* bpf_link_put might be called from atomic context. It needs to be called 3316 * from sleepable context in order to acquire sleeping locks during the process. 3317 */ 3318 void bpf_link_put(struct bpf_link *link) 3319 { 3320 if (!atomic64_dec_and_test(&link->refcnt)) 3321 return; 3322 3323 INIT_WORK(&link->work, bpf_link_put_deferred); 3324 schedule_work(&link->work); 3325 } 3326 EXPORT_SYMBOL(bpf_link_put); 3327 3328 static void bpf_link_put_direct(struct bpf_link *link) 3329 { 3330 if (!atomic64_dec_and_test(&link->refcnt)) 3331 return; 3332 bpf_link_free(link); 3333 } 3334 3335 static int bpf_link_release(struct inode *inode, struct file *filp) 3336 { 3337 struct bpf_link *link = filp->private_data; 3338 3339 bpf_link_put_direct(link); 3340 return 0; 3341 } 3342 3343 #ifdef CONFIG_PROC_FS 3344 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 3345 #define BPF_MAP_TYPE(_id, _ops) 3346 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 3347 static const char *bpf_link_type_strs[] = { 3348 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 3349 #include <linux/bpf_types.h> 3350 }; 3351 #undef BPF_PROG_TYPE 3352 #undef BPF_MAP_TYPE 3353 #undef BPF_LINK_TYPE 3354 3355 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 3356 { 3357 const struct bpf_link *link = filp->private_data; 3358 const struct bpf_prog *prog = link->prog; 3359 enum bpf_link_type type = link->type; 3360 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 3361 3362 if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { 3363 if (link->type == BPF_LINK_TYPE_KPROBE_MULTI) 3364 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? 3365 "kretprobe_multi" : "kprobe_multi"); 3366 else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) 3367 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? 3368 "uretprobe_multi" : "uprobe_multi"); 3369 else 3370 seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); 3371 } else { 3372 WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); 3373 seq_printf(m, "link_type:\t<%u>\n", type); 3374 } 3375 seq_printf(m, "link_id:\t%u\n", link->id); 3376 3377 if (prog) { 3378 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 3379 seq_printf(m, 3380 "prog_tag:\t%s\n" 3381 "prog_id:\t%u\n", 3382 prog_tag, 3383 prog->aux->id); 3384 } 3385 if (link->ops->show_fdinfo) 3386 link->ops->show_fdinfo(link, m); 3387 } 3388 #endif 3389 3390 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) 3391 { 3392 struct bpf_link *link = file->private_data; 3393 3394 return link->ops->poll(file, pts); 3395 } 3396 3397 static const struct file_operations bpf_link_fops = { 3398 #ifdef CONFIG_PROC_FS 3399 .show_fdinfo = bpf_link_show_fdinfo, 3400 #endif 3401 .release = bpf_link_release, 3402 .read = bpf_dummy_read, 3403 .write = bpf_dummy_write, 3404 }; 3405 3406 static const struct file_operations bpf_link_fops_poll = { 3407 #ifdef CONFIG_PROC_FS 3408 .show_fdinfo = bpf_link_show_fdinfo, 3409 #endif 3410 .release = bpf_link_release, 3411 .read = bpf_dummy_read, 3412 .write = bpf_dummy_write, 3413 .poll = bpf_link_poll, 3414 }; 3415 3416 static int bpf_link_alloc_id(struct bpf_link *link) 3417 { 3418 int id; 3419 3420 idr_preload(GFP_KERNEL); 3421 spin_lock_bh(&link_idr_lock); 3422 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 3423 spin_unlock_bh(&link_idr_lock); 3424 idr_preload_end(); 3425 3426 return id; 3427 } 3428 3429 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 3430 * reserving unused FD and allocating ID from link_idr. This is to be paired 3431 * with bpf_link_settle() to install FD and ID and expose bpf_link to 3432 * user-space, if bpf_link is successfully attached. If not, bpf_link and 3433 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 3434 * transient state is passed around in struct bpf_link_primer. 3435 * This is preferred way to create and initialize bpf_link, especially when 3436 * there are complicated and expensive operations in between creating bpf_link 3437 * itself and attaching it to BPF hook. By using bpf_link_prime() and 3438 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 3439 * expensive (and potentially failing) roll back operations in a rare case 3440 * that file, FD, or ID can't be allocated. 3441 */ 3442 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 3443 { 3444 struct file *file; 3445 int fd, id; 3446 3447 fd = get_unused_fd_flags(O_CLOEXEC); 3448 if (fd < 0) 3449 return fd; 3450 3451 3452 id = bpf_link_alloc_id(link); 3453 if (id < 0) { 3454 put_unused_fd(fd); 3455 return id; 3456 } 3457 3458 file = anon_inode_getfile("bpf_link", 3459 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3460 link, O_CLOEXEC); 3461 if (IS_ERR(file)) { 3462 bpf_link_free_id(id); 3463 put_unused_fd(fd); 3464 return PTR_ERR(file); 3465 } 3466 3467 primer->link = link; 3468 primer->file = file; 3469 primer->fd = fd; 3470 primer->id = id; 3471 return 0; 3472 } 3473 3474 int bpf_link_settle(struct bpf_link_primer *primer) 3475 { 3476 /* make bpf_link fetchable by ID */ 3477 spin_lock_bh(&link_idr_lock); 3478 primer->link->id = primer->id; 3479 spin_unlock_bh(&link_idr_lock); 3480 /* make bpf_link fetchable by FD */ 3481 fd_install(primer->fd, primer->file); 3482 /* pass through installed FD */ 3483 return primer->fd; 3484 } 3485 3486 int bpf_link_new_fd(struct bpf_link *link) 3487 { 3488 return anon_inode_getfd("bpf-link", 3489 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3490 link, O_CLOEXEC); 3491 } 3492 3493 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 3494 { 3495 CLASS(fd, f)(ufd); 3496 struct bpf_link *link; 3497 3498 if (fd_empty(f)) 3499 return ERR_PTR(-EBADF); 3500 if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) 3501 return ERR_PTR(-EINVAL); 3502 3503 link = fd_file(f)->private_data; 3504 bpf_link_inc(link); 3505 return link; 3506 } 3507 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); 3508 3509 static void bpf_tracing_link_release(struct bpf_link *link) 3510 { 3511 struct bpf_tracing_link *tr_link = 3512 container_of(link, struct bpf_tracing_link, link.link); 3513 3514 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3515 tr_link->trampoline, 3516 tr_link->tgt_prog)); 3517 3518 bpf_trampoline_put(tr_link->trampoline); 3519 3520 /* tgt_prog is NULL if target is a kernel function */ 3521 if (tr_link->tgt_prog) 3522 bpf_prog_put(tr_link->tgt_prog); 3523 } 3524 3525 static void bpf_tracing_link_dealloc(struct bpf_link *link) 3526 { 3527 struct bpf_tracing_link *tr_link = 3528 container_of(link, struct bpf_tracing_link, link.link); 3529 3530 kfree(tr_link); 3531 } 3532 3533 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 3534 struct seq_file *seq) 3535 { 3536 struct bpf_tracing_link *tr_link = 3537 container_of(link, struct bpf_tracing_link, link.link); 3538 u32 target_btf_id, target_obj_id; 3539 3540 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3541 &target_obj_id, &target_btf_id); 3542 seq_printf(seq, 3543 "attach_type:\t%d\n" 3544 "target_obj_id:\t%u\n" 3545 "target_btf_id:\t%u\n" 3546 "cookie:\t%llu\n", 3547 link->attach_type, 3548 target_obj_id, 3549 target_btf_id, 3550 tr_link->link.cookie); 3551 } 3552 3553 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 3554 struct bpf_link_info *info) 3555 { 3556 struct bpf_tracing_link *tr_link = 3557 container_of(link, struct bpf_tracing_link, link.link); 3558 3559 info->tracing.attach_type = link->attach_type; 3560 info->tracing.cookie = tr_link->link.cookie; 3561 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3562 &info->tracing.target_obj_id, 3563 &info->tracing.target_btf_id); 3564 3565 return 0; 3566 } 3567 3568 static const struct bpf_link_ops bpf_tracing_link_lops = { 3569 .release = bpf_tracing_link_release, 3570 .dealloc = bpf_tracing_link_dealloc, 3571 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3572 .fill_link_info = bpf_tracing_link_fill_link_info, 3573 }; 3574 3575 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3576 int tgt_prog_fd, 3577 u32 btf_id, 3578 u64 bpf_cookie, 3579 enum bpf_attach_type attach_type) 3580 { 3581 struct bpf_link_primer link_primer; 3582 struct bpf_prog *tgt_prog = NULL; 3583 struct bpf_trampoline *tr = NULL; 3584 struct bpf_tracing_link *link; 3585 u64 key = 0; 3586 int err; 3587 3588 switch (prog->type) { 3589 case BPF_PROG_TYPE_TRACING: 3590 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3591 prog->expected_attach_type != BPF_TRACE_FEXIT && 3592 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3593 err = -EINVAL; 3594 goto out_put_prog; 3595 } 3596 break; 3597 case BPF_PROG_TYPE_EXT: 3598 if (prog->expected_attach_type != 0) { 3599 err = -EINVAL; 3600 goto out_put_prog; 3601 } 3602 break; 3603 case BPF_PROG_TYPE_LSM: 3604 if (prog->expected_attach_type != BPF_LSM_MAC) { 3605 err = -EINVAL; 3606 goto out_put_prog; 3607 } 3608 break; 3609 default: 3610 err = -EINVAL; 3611 goto out_put_prog; 3612 } 3613 3614 if (!!tgt_prog_fd != !!btf_id) { 3615 err = -EINVAL; 3616 goto out_put_prog; 3617 } 3618 3619 if (tgt_prog_fd) { 3620 /* 3621 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this 3622 * part would be changed to implement the same for 3623 * BPF_PROG_TYPE_TRACING, do not forget to update the way how 3624 * attach_tracing_prog flag is set. 3625 */ 3626 if (prog->type != BPF_PROG_TYPE_EXT) { 3627 err = -EINVAL; 3628 goto out_put_prog; 3629 } 3630 3631 tgt_prog = bpf_prog_get(tgt_prog_fd); 3632 if (IS_ERR(tgt_prog)) { 3633 err = PTR_ERR(tgt_prog); 3634 tgt_prog = NULL; 3635 goto out_put_prog; 3636 } 3637 3638 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3639 } 3640 3641 link = kzalloc(sizeof(*link), GFP_USER); 3642 if (!link) { 3643 err = -ENOMEM; 3644 goto out_put_prog; 3645 } 3646 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3647 &bpf_tracing_link_lops, prog, attach_type); 3648 3649 link->link.cookie = bpf_cookie; 3650 3651 mutex_lock(&prog->aux->dst_mutex); 3652 3653 /* There are a few possible cases here: 3654 * 3655 * - if prog->aux->dst_trampoline is set, the program was just loaded 3656 * and not yet attached to anything, so we can use the values stored 3657 * in prog->aux 3658 * 3659 * - if prog->aux->dst_trampoline is NULL, the program has already been 3660 * attached to a target and its initial target was cleared (below) 3661 * 3662 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3663 * target_btf_id using the link_create API. 3664 * 3665 * - if tgt_prog == NULL when this function was called using the old 3666 * raw_tracepoint_open API, and we need a target from prog->aux 3667 * 3668 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3669 * was detached and is going for re-attachment. 3670 * 3671 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf 3672 * are NULL, then program was already attached and user did not provide 3673 * tgt_prog_fd so we have no way to find out or create trampoline 3674 */ 3675 if (!prog->aux->dst_trampoline && !tgt_prog) { 3676 /* 3677 * Allow re-attach for TRACING and LSM programs. If it's 3678 * currently linked, bpf_trampoline_link_prog will fail. 3679 * EXT programs need to specify tgt_prog_fd, so they 3680 * re-attach in separate code path. 3681 */ 3682 if (prog->type != BPF_PROG_TYPE_TRACING && 3683 prog->type != BPF_PROG_TYPE_LSM) { 3684 err = -EINVAL; 3685 goto out_unlock; 3686 } 3687 /* We can allow re-attach only if we have valid attach_btf. */ 3688 if (!prog->aux->attach_btf) { 3689 err = -EINVAL; 3690 goto out_unlock; 3691 } 3692 btf_id = prog->aux->attach_btf_id; 3693 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3694 } 3695 3696 if (!prog->aux->dst_trampoline || 3697 (key && key != prog->aux->dst_trampoline->key)) { 3698 /* If there is no saved target, or the specified target is 3699 * different from the destination specified at load time, we 3700 * need a new trampoline and a check for compatibility 3701 */ 3702 struct bpf_attach_target_info tgt_info = {}; 3703 3704 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3705 &tgt_info); 3706 if (err) 3707 goto out_unlock; 3708 3709 if (tgt_info.tgt_mod) { 3710 module_put(prog->aux->mod); 3711 prog->aux->mod = tgt_info.tgt_mod; 3712 } 3713 3714 tr = bpf_trampoline_get(key, &tgt_info); 3715 if (!tr) { 3716 err = -ENOMEM; 3717 goto out_unlock; 3718 } 3719 } else { 3720 /* The caller didn't specify a target, or the target was the 3721 * same as the destination supplied during program load. This 3722 * means we can reuse the trampoline and reference from program 3723 * load time, and there is no need to allocate a new one. This 3724 * can only happen once for any program, as the saved values in 3725 * prog->aux are cleared below. 3726 */ 3727 tr = prog->aux->dst_trampoline; 3728 tgt_prog = prog->aux->dst_prog; 3729 } 3730 3731 err = bpf_link_prime(&link->link.link, &link_primer); 3732 if (err) 3733 goto out_unlock; 3734 3735 err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3736 if (err) { 3737 bpf_link_cleanup(&link_primer); 3738 link = NULL; 3739 goto out_unlock; 3740 } 3741 3742 link->tgt_prog = tgt_prog; 3743 link->trampoline = tr; 3744 3745 /* Always clear the trampoline and target prog from prog->aux to make 3746 * sure the original attach destination is not kept alive after a 3747 * program is (re-)attached to another target. 3748 */ 3749 if (prog->aux->dst_prog && 3750 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3751 /* got extra prog ref from syscall, or attaching to different prog */ 3752 bpf_prog_put(prog->aux->dst_prog); 3753 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3754 /* we allocated a new trampoline, so free the old one */ 3755 bpf_trampoline_put(prog->aux->dst_trampoline); 3756 3757 prog->aux->dst_prog = NULL; 3758 prog->aux->dst_trampoline = NULL; 3759 mutex_unlock(&prog->aux->dst_mutex); 3760 3761 return bpf_link_settle(&link_primer); 3762 out_unlock: 3763 if (tr && tr != prog->aux->dst_trampoline) 3764 bpf_trampoline_put(tr); 3765 mutex_unlock(&prog->aux->dst_mutex); 3766 kfree(link); 3767 out_put_prog: 3768 if (tgt_prog_fd && tgt_prog) 3769 bpf_prog_put(tgt_prog); 3770 return err; 3771 } 3772 3773 static void bpf_raw_tp_link_release(struct bpf_link *link) 3774 { 3775 struct bpf_raw_tp_link *raw_tp = 3776 container_of(link, struct bpf_raw_tp_link, link); 3777 3778 bpf_probe_unregister(raw_tp->btp, raw_tp); 3779 bpf_put_raw_tracepoint(raw_tp->btp); 3780 } 3781 3782 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3783 { 3784 struct bpf_raw_tp_link *raw_tp = 3785 container_of(link, struct bpf_raw_tp_link, link); 3786 3787 kfree(raw_tp); 3788 } 3789 3790 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3791 struct seq_file *seq) 3792 { 3793 struct bpf_raw_tp_link *raw_tp_link = 3794 container_of(link, struct bpf_raw_tp_link, link); 3795 3796 seq_printf(seq, 3797 "tp_name:\t%s\n" 3798 "cookie:\t%llu\n", 3799 raw_tp_link->btp->tp->name, 3800 raw_tp_link->cookie); 3801 } 3802 3803 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, 3804 u32 len) 3805 { 3806 if (ulen >= len + 1) { 3807 if (copy_to_user(ubuf, buf, len + 1)) 3808 return -EFAULT; 3809 } else { 3810 char zero = '\0'; 3811 3812 if (copy_to_user(ubuf, buf, ulen - 1)) 3813 return -EFAULT; 3814 if (put_user(zero, ubuf + ulen - 1)) 3815 return -EFAULT; 3816 return -ENOSPC; 3817 } 3818 3819 return 0; 3820 } 3821 3822 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3823 struct bpf_link_info *info) 3824 { 3825 struct bpf_raw_tp_link *raw_tp_link = 3826 container_of(link, struct bpf_raw_tp_link, link); 3827 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3828 const char *tp_name = raw_tp_link->btp->tp->name; 3829 u32 ulen = info->raw_tracepoint.tp_name_len; 3830 size_t tp_len = strlen(tp_name); 3831 3832 if (!ulen ^ !ubuf) 3833 return -EINVAL; 3834 3835 info->raw_tracepoint.tp_name_len = tp_len + 1; 3836 info->raw_tracepoint.cookie = raw_tp_link->cookie; 3837 3838 if (!ubuf) 3839 return 0; 3840 3841 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len); 3842 } 3843 3844 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3845 .release = bpf_raw_tp_link_release, 3846 .dealloc_deferred = bpf_raw_tp_link_dealloc, 3847 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3848 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3849 }; 3850 3851 #ifdef CONFIG_PERF_EVENTS 3852 struct bpf_perf_link { 3853 struct bpf_link link; 3854 struct file *perf_file; 3855 }; 3856 3857 static void bpf_perf_link_release(struct bpf_link *link) 3858 { 3859 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3860 struct perf_event *event = perf_link->perf_file->private_data; 3861 3862 perf_event_free_bpf_prog(event); 3863 fput(perf_link->perf_file); 3864 } 3865 3866 static void bpf_perf_link_dealloc(struct bpf_link *link) 3867 { 3868 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3869 3870 kfree(perf_link); 3871 } 3872 3873 static int bpf_perf_link_fill_common(const struct perf_event *event, 3874 char __user *uname, u32 *ulenp, 3875 u64 *probe_offset, u64 *probe_addr, 3876 u32 *fd_type, unsigned long *missed) 3877 { 3878 const char *buf; 3879 u32 prog_id, ulen; 3880 size_t len; 3881 int err; 3882 3883 ulen = *ulenp; 3884 if (!ulen ^ !uname) 3885 return -EINVAL; 3886 3887 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, 3888 probe_offset, probe_addr, missed); 3889 if (err) 3890 return err; 3891 3892 if (buf) { 3893 len = strlen(buf); 3894 *ulenp = len + 1; 3895 } else { 3896 *ulenp = 1; 3897 } 3898 if (!uname) 3899 return 0; 3900 3901 if (buf) { 3902 err = bpf_copy_to_user(uname, buf, ulen, len); 3903 if (err) 3904 return err; 3905 } else { 3906 char zero = '\0'; 3907 3908 if (put_user(zero, uname)) 3909 return -EFAULT; 3910 } 3911 return 0; 3912 } 3913 3914 #ifdef CONFIG_KPROBE_EVENTS 3915 static int bpf_perf_link_fill_kprobe(const struct perf_event *event, 3916 struct bpf_link_info *info) 3917 { 3918 unsigned long missed; 3919 char __user *uname; 3920 u64 addr, offset; 3921 u32 ulen, type; 3922 int err; 3923 3924 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); 3925 ulen = info->perf_event.kprobe.name_len; 3926 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, 3927 &type, &missed); 3928 if (err) 3929 return err; 3930 if (type == BPF_FD_TYPE_KRETPROBE) 3931 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; 3932 else 3933 info->perf_event.type = BPF_PERF_EVENT_KPROBE; 3934 info->perf_event.kprobe.name_len = ulen; 3935 info->perf_event.kprobe.offset = offset; 3936 info->perf_event.kprobe.missed = missed; 3937 if (!kallsyms_show_value(current_cred())) 3938 addr = 0; 3939 info->perf_event.kprobe.addr = addr; 3940 info->perf_event.kprobe.cookie = event->bpf_cookie; 3941 return 0; 3942 } 3943 3944 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, 3945 struct seq_file *seq) 3946 { 3947 const char *name; 3948 int err; 3949 u32 prog_id, type; 3950 u64 offset, addr; 3951 unsigned long missed; 3952 3953 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3954 &offset, &addr, &missed); 3955 if (err) 3956 return; 3957 3958 seq_printf(seq, 3959 "name:\t%s\n" 3960 "offset:\t%#llx\n" 3961 "missed:\t%lu\n" 3962 "addr:\t%#llx\n" 3963 "event_type:\t%s\n" 3964 "cookie:\t%llu\n", 3965 name, offset, missed, addr, 3966 type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", 3967 event->bpf_cookie); 3968 } 3969 #endif 3970 3971 #ifdef CONFIG_UPROBE_EVENTS 3972 static int bpf_perf_link_fill_uprobe(const struct perf_event *event, 3973 struct bpf_link_info *info) 3974 { 3975 u64 ref_ctr_offset, offset; 3976 char __user *uname; 3977 u32 ulen, type; 3978 int err; 3979 3980 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); 3981 ulen = info->perf_event.uprobe.name_len; 3982 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, 3983 &type, NULL); 3984 if (err) 3985 return err; 3986 3987 if (type == BPF_FD_TYPE_URETPROBE) 3988 info->perf_event.type = BPF_PERF_EVENT_URETPROBE; 3989 else 3990 info->perf_event.type = BPF_PERF_EVENT_UPROBE; 3991 info->perf_event.uprobe.name_len = ulen; 3992 info->perf_event.uprobe.offset = offset; 3993 info->perf_event.uprobe.cookie = event->bpf_cookie; 3994 info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; 3995 return 0; 3996 } 3997 3998 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, 3999 struct seq_file *seq) 4000 { 4001 const char *name; 4002 int err; 4003 u32 prog_id, type; 4004 u64 offset, ref_ctr_offset; 4005 unsigned long missed; 4006 4007 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 4008 &offset, &ref_ctr_offset, &missed); 4009 if (err) 4010 return; 4011 4012 seq_printf(seq, 4013 "name:\t%s\n" 4014 "offset:\t%#llx\n" 4015 "ref_ctr_offset:\t%#llx\n" 4016 "event_type:\t%s\n" 4017 "cookie:\t%llu\n", 4018 name, offset, ref_ctr_offset, 4019 type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", 4020 event->bpf_cookie); 4021 } 4022 #endif 4023 4024 static int bpf_perf_link_fill_probe(const struct perf_event *event, 4025 struct bpf_link_info *info) 4026 { 4027 #ifdef CONFIG_KPROBE_EVENTS 4028 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4029 return bpf_perf_link_fill_kprobe(event, info); 4030 #endif 4031 #ifdef CONFIG_UPROBE_EVENTS 4032 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4033 return bpf_perf_link_fill_uprobe(event, info); 4034 #endif 4035 return -EOPNOTSUPP; 4036 } 4037 4038 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, 4039 struct bpf_link_info *info) 4040 { 4041 char __user *uname; 4042 u32 ulen; 4043 int err; 4044 4045 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); 4046 ulen = info->perf_event.tracepoint.name_len; 4047 err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); 4048 if (err) 4049 return err; 4050 4051 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; 4052 info->perf_event.tracepoint.name_len = ulen; 4053 info->perf_event.tracepoint.cookie = event->bpf_cookie; 4054 return 0; 4055 } 4056 4057 static int bpf_perf_link_fill_perf_event(const struct perf_event *event, 4058 struct bpf_link_info *info) 4059 { 4060 info->perf_event.event.type = event->attr.type; 4061 info->perf_event.event.config = event->attr.config; 4062 info->perf_event.event.cookie = event->bpf_cookie; 4063 info->perf_event.type = BPF_PERF_EVENT_EVENT; 4064 return 0; 4065 } 4066 4067 static int bpf_perf_link_fill_link_info(const struct bpf_link *link, 4068 struct bpf_link_info *info) 4069 { 4070 struct bpf_perf_link *perf_link; 4071 const struct perf_event *event; 4072 4073 perf_link = container_of(link, struct bpf_perf_link, link); 4074 event = perf_get_event(perf_link->perf_file); 4075 if (IS_ERR(event)) 4076 return PTR_ERR(event); 4077 4078 switch (event->prog->type) { 4079 case BPF_PROG_TYPE_PERF_EVENT: 4080 return bpf_perf_link_fill_perf_event(event, info); 4081 case BPF_PROG_TYPE_TRACEPOINT: 4082 return bpf_perf_link_fill_tracepoint(event, info); 4083 case BPF_PROG_TYPE_KPROBE: 4084 return bpf_perf_link_fill_probe(event, info); 4085 default: 4086 return -EOPNOTSUPP; 4087 } 4088 } 4089 4090 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, 4091 struct seq_file *seq) 4092 { 4093 seq_printf(seq, 4094 "type:\t%u\n" 4095 "config:\t%llu\n" 4096 "event_type:\t%s\n" 4097 "cookie:\t%llu\n", 4098 event->attr.type, event->attr.config, 4099 "event", event->bpf_cookie); 4100 } 4101 4102 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, 4103 struct seq_file *seq) 4104 { 4105 int err; 4106 const char *name; 4107 u32 prog_id; 4108 4109 err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, 4110 NULL, NULL); 4111 if (err) 4112 return; 4113 4114 seq_printf(seq, 4115 "tp_name:\t%s\n" 4116 "event_type:\t%s\n" 4117 "cookie:\t%llu\n", 4118 name, "tracepoint", event->bpf_cookie); 4119 } 4120 4121 static void bpf_probe_link_show_fdinfo(const struct perf_event *event, 4122 struct seq_file *seq) 4123 { 4124 #ifdef CONFIG_KPROBE_EVENTS 4125 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4126 return bpf_perf_link_fdinfo_kprobe(event, seq); 4127 #endif 4128 4129 #ifdef CONFIG_UPROBE_EVENTS 4130 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4131 return bpf_perf_link_fdinfo_uprobe(event, seq); 4132 #endif 4133 } 4134 4135 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, 4136 struct seq_file *seq) 4137 { 4138 struct bpf_perf_link *perf_link; 4139 const struct perf_event *event; 4140 4141 perf_link = container_of(link, struct bpf_perf_link, link); 4142 event = perf_get_event(perf_link->perf_file); 4143 if (IS_ERR(event)) 4144 return; 4145 4146 switch (event->prog->type) { 4147 case BPF_PROG_TYPE_PERF_EVENT: 4148 return bpf_perf_event_link_show_fdinfo(event, seq); 4149 case BPF_PROG_TYPE_TRACEPOINT: 4150 return bpf_tracepoint_link_show_fdinfo(event, seq); 4151 case BPF_PROG_TYPE_KPROBE: 4152 return bpf_probe_link_show_fdinfo(event, seq); 4153 default: 4154 return; 4155 } 4156 } 4157 4158 static const struct bpf_link_ops bpf_perf_link_lops = { 4159 .release = bpf_perf_link_release, 4160 .dealloc = bpf_perf_link_dealloc, 4161 .fill_link_info = bpf_perf_link_fill_link_info, 4162 .show_fdinfo = bpf_perf_link_show_fdinfo, 4163 }; 4164 4165 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4166 { 4167 struct bpf_link_primer link_primer; 4168 struct bpf_perf_link *link; 4169 struct perf_event *event; 4170 struct file *perf_file; 4171 int err; 4172 4173 if (attr->link_create.flags) 4174 return -EINVAL; 4175 4176 perf_file = perf_event_get(attr->link_create.target_fd); 4177 if (IS_ERR(perf_file)) 4178 return PTR_ERR(perf_file); 4179 4180 link = kzalloc(sizeof(*link), GFP_USER); 4181 if (!link) { 4182 err = -ENOMEM; 4183 goto out_put_file; 4184 } 4185 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog, 4186 attr->link_create.attach_type); 4187 link->perf_file = perf_file; 4188 4189 err = bpf_link_prime(&link->link, &link_primer); 4190 if (err) { 4191 kfree(link); 4192 goto out_put_file; 4193 } 4194 4195 event = perf_file->private_data; 4196 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 4197 if (err) { 4198 bpf_link_cleanup(&link_primer); 4199 goto out_put_file; 4200 } 4201 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 4202 bpf_prog_inc(prog); 4203 4204 return bpf_link_settle(&link_primer); 4205 4206 out_put_file: 4207 fput(perf_file); 4208 return err; 4209 } 4210 #else 4211 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4212 { 4213 return -EOPNOTSUPP; 4214 } 4215 #endif /* CONFIG_PERF_EVENTS */ 4216 4217 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 4218 const char __user *user_tp_name, u64 cookie, 4219 enum bpf_attach_type attach_type) 4220 { 4221 struct bpf_link_primer link_primer; 4222 struct bpf_raw_tp_link *link; 4223 struct bpf_raw_event_map *btp; 4224 const char *tp_name; 4225 char buf[128]; 4226 int err; 4227 4228 switch (prog->type) { 4229 case BPF_PROG_TYPE_TRACING: 4230 case BPF_PROG_TYPE_EXT: 4231 case BPF_PROG_TYPE_LSM: 4232 if (user_tp_name) 4233 /* The attach point for this category of programs 4234 * should be specified via btf_id during program load. 4235 */ 4236 return -EINVAL; 4237 if (prog->type == BPF_PROG_TYPE_TRACING && 4238 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 4239 tp_name = prog->aux->attach_func_name; 4240 break; 4241 } 4242 return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type); 4243 case BPF_PROG_TYPE_RAW_TRACEPOINT: 4244 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 4245 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 4246 return -EFAULT; 4247 buf[sizeof(buf) - 1] = 0; 4248 tp_name = buf; 4249 break; 4250 default: 4251 return -EINVAL; 4252 } 4253 4254 btp = bpf_get_raw_tracepoint(tp_name); 4255 if (!btp) 4256 return -ENOENT; 4257 4258 link = kzalloc(sizeof(*link), GFP_USER); 4259 if (!link) { 4260 err = -ENOMEM; 4261 goto out_put_btp; 4262 } 4263 bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 4264 &bpf_raw_tp_link_lops, prog, attach_type, 4265 tracepoint_is_faultable(btp->tp)); 4266 link->btp = btp; 4267 link->cookie = cookie; 4268 4269 err = bpf_link_prime(&link->link, &link_primer); 4270 if (err) { 4271 kfree(link); 4272 goto out_put_btp; 4273 } 4274 4275 err = bpf_probe_register(link->btp, link); 4276 if (err) { 4277 bpf_link_cleanup(&link_primer); 4278 goto out_put_btp; 4279 } 4280 4281 return bpf_link_settle(&link_primer); 4282 4283 out_put_btp: 4284 bpf_put_raw_tracepoint(btp); 4285 return err; 4286 } 4287 4288 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie 4289 4290 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 4291 { 4292 struct bpf_prog *prog; 4293 void __user *tp_name; 4294 __u64 cookie; 4295 int fd; 4296 4297 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 4298 return -EINVAL; 4299 4300 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 4301 if (IS_ERR(prog)) 4302 return PTR_ERR(prog); 4303 4304 tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); 4305 cookie = attr->raw_tracepoint.cookie; 4306 fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type); 4307 if (fd < 0) 4308 bpf_prog_put(prog); 4309 return fd; 4310 } 4311 4312 static enum bpf_prog_type 4313 attach_type_to_prog_type(enum bpf_attach_type attach_type) 4314 { 4315 switch (attach_type) { 4316 case BPF_CGROUP_INET_INGRESS: 4317 case BPF_CGROUP_INET_EGRESS: 4318 return BPF_PROG_TYPE_CGROUP_SKB; 4319 case BPF_CGROUP_INET_SOCK_CREATE: 4320 case BPF_CGROUP_INET_SOCK_RELEASE: 4321 case BPF_CGROUP_INET4_POST_BIND: 4322 case BPF_CGROUP_INET6_POST_BIND: 4323 return BPF_PROG_TYPE_CGROUP_SOCK; 4324 case BPF_CGROUP_INET4_BIND: 4325 case BPF_CGROUP_INET6_BIND: 4326 case BPF_CGROUP_INET4_CONNECT: 4327 case BPF_CGROUP_INET6_CONNECT: 4328 case BPF_CGROUP_UNIX_CONNECT: 4329 case BPF_CGROUP_INET4_GETPEERNAME: 4330 case BPF_CGROUP_INET6_GETPEERNAME: 4331 case BPF_CGROUP_UNIX_GETPEERNAME: 4332 case BPF_CGROUP_INET4_GETSOCKNAME: 4333 case BPF_CGROUP_INET6_GETSOCKNAME: 4334 case BPF_CGROUP_UNIX_GETSOCKNAME: 4335 case BPF_CGROUP_UDP4_SENDMSG: 4336 case BPF_CGROUP_UDP6_SENDMSG: 4337 case BPF_CGROUP_UNIX_SENDMSG: 4338 case BPF_CGROUP_UDP4_RECVMSG: 4339 case BPF_CGROUP_UDP6_RECVMSG: 4340 case BPF_CGROUP_UNIX_RECVMSG: 4341 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 4342 case BPF_CGROUP_SOCK_OPS: 4343 return BPF_PROG_TYPE_SOCK_OPS; 4344 case BPF_CGROUP_DEVICE: 4345 return BPF_PROG_TYPE_CGROUP_DEVICE; 4346 case BPF_SK_MSG_VERDICT: 4347 return BPF_PROG_TYPE_SK_MSG; 4348 case BPF_SK_SKB_STREAM_PARSER: 4349 case BPF_SK_SKB_STREAM_VERDICT: 4350 case BPF_SK_SKB_VERDICT: 4351 return BPF_PROG_TYPE_SK_SKB; 4352 case BPF_LIRC_MODE2: 4353 return BPF_PROG_TYPE_LIRC_MODE2; 4354 case BPF_FLOW_DISSECTOR: 4355 return BPF_PROG_TYPE_FLOW_DISSECTOR; 4356 case BPF_CGROUP_SYSCTL: 4357 return BPF_PROG_TYPE_CGROUP_SYSCTL; 4358 case BPF_CGROUP_GETSOCKOPT: 4359 case BPF_CGROUP_SETSOCKOPT: 4360 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 4361 case BPF_TRACE_ITER: 4362 case BPF_TRACE_RAW_TP: 4363 case BPF_TRACE_FENTRY: 4364 case BPF_TRACE_FEXIT: 4365 case BPF_MODIFY_RETURN: 4366 return BPF_PROG_TYPE_TRACING; 4367 case BPF_LSM_MAC: 4368 return BPF_PROG_TYPE_LSM; 4369 case BPF_SK_LOOKUP: 4370 return BPF_PROG_TYPE_SK_LOOKUP; 4371 case BPF_XDP: 4372 return BPF_PROG_TYPE_XDP; 4373 case BPF_LSM_CGROUP: 4374 return BPF_PROG_TYPE_LSM; 4375 case BPF_TCX_INGRESS: 4376 case BPF_TCX_EGRESS: 4377 case BPF_NETKIT_PRIMARY: 4378 case BPF_NETKIT_PEER: 4379 return BPF_PROG_TYPE_SCHED_CLS; 4380 default: 4381 return BPF_PROG_TYPE_UNSPEC; 4382 } 4383 } 4384 4385 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 4386 enum bpf_attach_type attach_type) 4387 { 4388 enum bpf_prog_type ptype; 4389 4390 switch (prog->type) { 4391 case BPF_PROG_TYPE_CGROUP_SOCK: 4392 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4393 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4394 case BPF_PROG_TYPE_SK_LOOKUP: 4395 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 4396 case BPF_PROG_TYPE_CGROUP_SKB: 4397 if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) 4398 /* cg-skb progs can be loaded by unpriv user. 4399 * check permissions at attach time. 4400 */ 4401 return -EPERM; 4402 4403 ptype = attach_type_to_prog_type(attach_type); 4404 if (prog->type != ptype) 4405 return -EINVAL; 4406 4407 return prog->enforce_expected_attach_type && 4408 prog->expected_attach_type != attach_type ? 4409 -EINVAL : 0; 4410 case BPF_PROG_TYPE_EXT: 4411 return 0; 4412 case BPF_PROG_TYPE_NETFILTER: 4413 if (attach_type != BPF_NETFILTER) 4414 return -EINVAL; 4415 return 0; 4416 case BPF_PROG_TYPE_PERF_EVENT: 4417 case BPF_PROG_TYPE_TRACEPOINT: 4418 if (attach_type != BPF_PERF_EVENT) 4419 return -EINVAL; 4420 return 0; 4421 case BPF_PROG_TYPE_KPROBE: 4422 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 4423 attach_type != BPF_TRACE_KPROBE_MULTI) 4424 return -EINVAL; 4425 if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && 4426 attach_type != BPF_TRACE_KPROBE_SESSION) 4427 return -EINVAL; 4428 if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 4429 attach_type != BPF_TRACE_UPROBE_MULTI) 4430 return -EINVAL; 4431 if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION && 4432 attach_type != BPF_TRACE_UPROBE_SESSION) 4433 return -EINVAL; 4434 if (attach_type != BPF_PERF_EVENT && 4435 attach_type != BPF_TRACE_KPROBE_MULTI && 4436 attach_type != BPF_TRACE_KPROBE_SESSION && 4437 attach_type != BPF_TRACE_UPROBE_MULTI && 4438 attach_type != BPF_TRACE_UPROBE_SESSION) 4439 return -EINVAL; 4440 return 0; 4441 case BPF_PROG_TYPE_SCHED_CLS: 4442 if (attach_type != BPF_TCX_INGRESS && 4443 attach_type != BPF_TCX_EGRESS && 4444 attach_type != BPF_NETKIT_PRIMARY && 4445 attach_type != BPF_NETKIT_PEER) 4446 return -EINVAL; 4447 return 0; 4448 default: 4449 ptype = attach_type_to_prog_type(attach_type); 4450 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 4451 return -EINVAL; 4452 return 0; 4453 } 4454 } 4455 4456 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, 4457 bool check_atype) 4458 { 4459 switch (ptype) { 4460 case BPF_PROG_TYPE_CGROUP_DEVICE: 4461 case BPF_PROG_TYPE_CGROUP_SKB: 4462 case BPF_PROG_TYPE_CGROUP_SOCK: 4463 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4464 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4465 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4466 case BPF_PROG_TYPE_SOCK_OPS: 4467 return true; 4468 case BPF_PROG_TYPE_LSM: 4469 return check_atype ? atype == BPF_LSM_CGROUP : true; 4470 default: 4471 return false; 4472 } 4473 } 4474 4475 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision 4476 4477 #define BPF_F_ATTACH_MASK_BASE \ 4478 (BPF_F_ALLOW_OVERRIDE | \ 4479 BPF_F_ALLOW_MULTI | \ 4480 BPF_F_REPLACE | \ 4481 BPF_F_PREORDER) 4482 4483 #define BPF_F_ATTACH_MASK_MPROG \ 4484 (BPF_F_REPLACE | \ 4485 BPF_F_BEFORE | \ 4486 BPF_F_AFTER | \ 4487 BPF_F_ID | \ 4488 BPF_F_LINK) 4489 4490 static int bpf_prog_attach(const union bpf_attr *attr) 4491 { 4492 enum bpf_prog_type ptype; 4493 struct bpf_prog *prog; 4494 int ret; 4495 4496 if (CHECK_ATTR(BPF_PROG_ATTACH)) 4497 return -EINVAL; 4498 4499 ptype = attach_type_to_prog_type(attr->attach_type); 4500 if (ptype == BPF_PROG_TYPE_UNSPEC) 4501 return -EINVAL; 4502 if (bpf_mprog_supported(ptype)) { 4503 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4504 return -EINVAL; 4505 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4506 if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) 4507 return -EINVAL; 4508 } else { 4509 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) 4510 return -EINVAL; 4511 if (attr->relative_fd || 4512 attr->expected_revision) 4513 return -EINVAL; 4514 } 4515 4516 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4517 if (IS_ERR(prog)) 4518 return PTR_ERR(prog); 4519 4520 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 4521 bpf_prog_put(prog); 4522 return -EINVAL; 4523 } 4524 4525 if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { 4526 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 4527 goto out; 4528 } 4529 4530 switch (ptype) { 4531 case BPF_PROG_TYPE_SK_SKB: 4532 case BPF_PROG_TYPE_SK_MSG: 4533 ret = sock_map_get_from_fd(attr, prog); 4534 break; 4535 case BPF_PROG_TYPE_LIRC_MODE2: 4536 ret = lirc_prog_attach(attr, prog); 4537 break; 4538 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4539 ret = netns_bpf_prog_attach(attr, prog); 4540 break; 4541 case BPF_PROG_TYPE_SCHED_CLS: 4542 if (attr->attach_type == BPF_TCX_INGRESS || 4543 attr->attach_type == BPF_TCX_EGRESS) 4544 ret = tcx_prog_attach(attr, prog); 4545 else 4546 ret = netkit_prog_attach(attr, prog); 4547 break; 4548 default: 4549 ret = -EINVAL; 4550 } 4551 out: 4552 if (ret) 4553 bpf_prog_put(prog); 4554 return ret; 4555 } 4556 4557 #define BPF_PROG_DETACH_LAST_FIELD expected_revision 4558 4559 static int bpf_prog_detach(const union bpf_attr *attr) 4560 { 4561 struct bpf_prog *prog = NULL; 4562 enum bpf_prog_type ptype; 4563 int ret; 4564 4565 if (CHECK_ATTR(BPF_PROG_DETACH)) 4566 return -EINVAL; 4567 4568 ptype = attach_type_to_prog_type(attr->attach_type); 4569 if (bpf_mprog_supported(ptype)) { 4570 if (ptype == BPF_PROG_TYPE_UNSPEC) 4571 return -EINVAL; 4572 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4573 return -EINVAL; 4574 if (attr->attach_bpf_fd) { 4575 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4576 if (IS_ERR(prog)) 4577 return PTR_ERR(prog); 4578 } 4579 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4580 if (attr->attach_flags || attr->relative_fd) 4581 return -EINVAL; 4582 } else if (attr->attach_flags || 4583 attr->relative_fd || 4584 attr->expected_revision) { 4585 return -EINVAL; 4586 } 4587 4588 switch (ptype) { 4589 case BPF_PROG_TYPE_SK_MSG: 4590 case BPF_PROG_TYPE_SK_SKB: 4591 ret = sock_map_prog_detach(attr, ptype); 4592 break; 4593 case BPF_PROG_TYPE_LIRC_MODE2: 4594 ret = lirc_prog_detach(attr); 4595 break; 4596 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4597 ret = netns_bpf_prog_detach(attr, ptype); 4598 break; 4599 case BPF_PROG_TYPE_CGROUP_DEVICE: 4600 case BPF_PROG_TYPE_CGROUP_SKB: 4601 case BPF_PROG_TYPE_CGROUP_SOCK: 4602 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4603 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4604 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4605 case BPF_PROG_TYPE_SOCK_OPS: 4606 case BPF_PROG_TYPE_LSM: 4607 ret = cgroup_bpf_prog_detach(attr, ptype); 4608 break; 4609 case BPF_PROG_TYPE_SCHED_CLS: 4610 if (attr->attach_type == BPF_TCX_INGRESS || 4611 attr->attach_type == BPF_TCX_EGRESS) 4612 ret = tcx_prog_detach(attr, prog); 4613 else 4614 ret = netkit_prog_detach(attr, prog); 4615 break; 4616 default: 4617 ret = -EINVAL; 4618 } 4619 4620 if (prog) 4621 bpf_prog_put(prog); 4622 return ret; 4623 } 4624 4625 #define BPF_PROG_QUERY_LAST_FIELD query.revision 4626 4627 static int bpf_prog_query(const union bpf_attr *attr, 4628 union bpf_attr __user *uattr) 4629 { 4630 if (!bpf_net_capable()) 4631 return -EPERM; 4632 if (CHECK_ATTR(BPF_PROG_QUERY)) 4633 return -EINVAL; 4634 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 4635 return -EINVAL; 4636 4637 switch (attr->query.attach_type) { 4638 case BPF_CGROUP_INET_INGRESS: 4639 case BPF_CGROUP_INET_EGRESS: 4640 case BPF_CGROUP_INET_SOCK_CREATE: 4641 case BPF_CGROUP_INET_SOCK_RELEASE: 4642 case BPF_CGROUP_INET4_BIND: 4643 case BPF_CGROUP_INET6_BIND: 4644 case BPF_CGROUP_INET4_POST_BIND: 4645 case BPF_CGROUP_INET6_POST_BIND: 4646 case BPF_CGROUP_INET4_CONNECT: 4647 case BPF_CGROUP_INET6_CONNECT: 4648 case BPF_CGROUP_UNIX_CONNECT: 4649 case BPF_CGROUP_INET4_GETPEERNAME: 4650 case BPF_CGROUP_INET6_GETPEERNAME: 4651 case BPF_CGROUP_UNIX_GETPEERNAME: 4652 case BPF_CGROUP_INET4_GETSOCKNAME: 4653 case BPF_CGROUP_INET6_GETSOCKNAME: 4654 case BPF_CGROUP_UNIX_GETSOCKNAME: 4655 case BPF_CGROUP_UDP4_SENDMSG: 4656 case BPF_CGROUP_UDP6_SENDMSG: 4657 case BPF_CGROUP_UNIX_SENDMSG: 4658 case BPF_CGROUP_UDP4_RECVMSG: 4659 case BPF_CGROUP_UDP6_RECVMSG: 4660 case BPF_CGROUP_UNIX_RECVMSG: 4661 case BPF_CGROUP_SOCK_OPS: 4662 case BPF_CGROUP_DEVICE: 4663 case BPF_CGROUP_SYSCTL: 4664 case BPF_CGROUP_GETSOCKOPT: 4665 case BPF_CGROUP_SETSOCKOPT: 4666 case BPF_LSM_CGROUP: 4667 return cgroup_bpf_prog_query(attr, uattr); 4668 case BPF_LIRC_MODE2: 4669 return lirc_prog_query(attr, uattr); 4670 case BPF_FLOW_DISSECTOR: 4671 case BPF_SK_LOOKUP: 4672 return netns_bpf_prog_query(attr, uattr); 4673 case BPF_SK_SKB_STREAM_PARSER: 4674 case BPF_SK_SKB_STREAM_VERDICT: 4675 case BPF_SK_MSG_VERDICT: 4676 case BPF_SK_SKB_VERDICT: 4677 return sock_map_bpf_prog_query(attr, uattr); 4678 case BPF_TCX_INGRESS: 4679 case BPF_TCX_EGRESS: 4680 return tcx_prog_query(attr, uattr); 4681 case BPF_NETKIT_PRIMARY: 4682 case BPF_NETKIT_PEER: 4683 return netkit_prog_query(attr, uattr); 4684 default: 4685 return -EINVAL; 4686 } 4687 } 4688 4689 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 4690 4691 static int bpf_prog_test_run(const union bpf_attr *attr, 4692 union bpf_attr __user *uattr) 4693 { 4694 struct bpf_prog *prog; 4695 int ret = -ENOTSUPP; 4696 4697 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 4698 return -EINVAL; 4699 4700 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 4701 (!attr->test.ctx_size_in && attr->test.ctx_in)) 4702 return -EINVAL; 4703 4704 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 4705 (!attr->test.ctx_size_out && attr->test.ctx_out)) 4706 return -EINVAL; 4707 4708 prog = bpf_prog_get(attr->test.prog_fd); 4709 if (IS_ERR(prog)) 4710 return PTR_ERR(prog); 4711 4712 if (prog->aux->ops->test_run) 4713 ret = prog->aux->ops->test_run(prog, attr, uattr); 4714 4715 bpf_prog_put(prog); 4716 return ret; 4717 } 4718 4719 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 4720 4721 static int bpf_obj_get_next_id(const union bpf_attr *attr, 4722 union bpf_attr __user *uattr, 4723 struct idr *idr, 4724 spinlock_t *lock) 4725 { 4726 u32 next_id = attr->start_id; 4727 int err = 0; 4728 4729 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 4730 return -EINVAL; 4731 4732 if (!capable(CAP_SYS_ADMIN)) 4733 return -EPERM; 4734 4735 next_id++; 4736 spin_lock_bh(lock); 4737 if (!idr_get_next(idr, &next_id)) 4738 err = -ENOENT; 4739 spin_unlock_bh(lock); 4740 4741 if (!err) 4742 err = put_user(next_id, &uattr->next_id); 4743 4744 return err; 4745 } 4746 4747 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 4748 { 4749 struct bpf_map *map; 4750 4751 spin_lock_bh(&map_idr_lock); 4752 again: 4753 map = idr_get_next(&map_idr, id); 4754 if (map) { 4755 map = __bpf_map_inc_not_zero(map, false); 4756 if (IS_ERR(map)) { 4757 (*id)++; 4758 goto again; 4759 } 4760 } 4761 spin_unlock_bh(&map_idr_lock); 4762 4763 return map; 4764 } 4765 4766 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 4767 { 4768 struct bpf_prog *prog; 4769 4770 spin_lock_bh(&prog_idr_lock); 4771 again: 4772 prog = idr_get_next(&prog_idr, id); 4773 if (prog) { 4774 prog = bpf_prog_inc_not_zero(prog); 4775 if (IS_ERR(prog)) { 4776 (*id)++; 4777 goto again; 4778 } 4779 } 4780 spin_unlock_bh(&prog_idr_lock); 4781 4782 return prog; 4783 } 4784 4785 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 4786 4787 struct bpf_prog *bpf_prog_by_id(u32 id) 4788 { 4789 struct bpf_prog *prog; 4790 4791 if (!id) 4792 return ERR_PTR(-ENOENT); 4793 4794 spin_lock_bh(&prog_idr_lock); 4795 prog = idr_find(&prog_idr, id); 4796 if (prog) 4797 prog = bpf_prog_inc_not_zero(prog); 4798 else 4799 prog = ERR_PTR(-ENOENT); 4800 spin_unlock_bh(&prog_idr_lock); 4801 return prog; 4802 } 4803 4804 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 4805 { 4806 struct bpf_prog *prog; 4807 u32 id = attr->prog_id; 4808 int fd; 4809 4810 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 4811 return -EINVAL; 4812 4813 if (!capable(CAP_SYS_ADMIN)) 4814 return -EPERM; 4815 4816 prog = bpf_prog_by_id(id); 4817 if (IS_ERR(prog)) 4818 return PTR_ERR(prog); 4819 4820 fd = bpf_prog_new_fd(prog); 4821 if (fd < 0) 4822 bpf_prog_put(prog); 4823 4824 return fd; 4825 } 4826 4827 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 4828 4829 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 4830 { 4831 struct bpf_map *map; 4832 u32 id = attr->map_id; 4833 int f_flags; 4834 int fd; 4835 4836 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 4837 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 4838 return -EINVAL; 4839 4840 if (!capable(CAP_SYS_ADMIN)) 4841 return -EPERM; 4842 4843 f_flags = bpf_get_file_flag(attr->open_flags); 4844 if (f_flags < 0) 4845 return f_flags; 4846 4847 spin_lock_bh(&map_idr_lock); 4848 map = idr_find(&map_idr, id); 4849 if (map) 4850 map = __bpf_map_inc_not_zero(map, true); 4851 else 4852 map = ERR_PTR(-ENOENT); 4853 spin_unlock_bh(&map_idr_lock); 4854 4855 if (IS_ERR(map)) 4856 return PTR_ERR(map); 4857 4858 fd = bpf_map_new_fd(map, f_flags); 4859 if (fd < 0) 4860 bpf_map_put_with_uref(map); 4861 4862 return fd; 4863 } 4864 4865 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 4866 unsigned long addr, u32 *off, 4867 u32 *type) 4868 { 4869 const struct bpf_map *map; 4870 int i; 4871 4872 mutex_lock(&prog->aux->used_maps_mutex); 4873 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 4874 map = prog->aux->used_maps[i]; 4875 if (map == (void *)addr) { 4876 *type = BPF_PSEUDO_MAP_FD; 4877 goto out; 4878 } 4879 if (!map->ops->map_direct_value_meta) 4880 continue; 4881 if (!map->ops->map_direct_value_meta(map, addr, off)) { 4882 *type = BPF_PSEUDO_MAP_VALUE; 4883 goto out; 4884 } 4885 } 4886 map = NULL; 4887 4888 out: 4889 mutex_unlock(&prog->aux->used_maps_mutex); 4890 return map; 4891 } 4892 4893 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 4894 const struct cred *f_cred) 4895 { 4896 const struct bpf_map *map; 4897 struct bpf_insn *insns; 4898 u32 off, type; 4899 u64 imm; 4900 u8 code; 4901 int i; 4902 4903 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 4904 GFP_USER); 4905 if (!insns) 4906 return insns; 4907 4908 for (i = 0; i < prog->len; i++) { 4909 code = insns[i].code; 4910 4911 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 4912 insns[i].code = BPF_JMP | BPF_CALL; 4913 insns[i].imm = BPF_FUNC_tail_call; 4914 /* fall-through */ 4915 } 4916 if (code == (BPF_JMP | BPF_CALL) || 4917 code == (BPF_JMP | BPF_CALL_ARGS)) { 4918 if (code == (BPF_JMP | BPF_CALL_ARGS)) 4919 insns[i].code = BPF_JMP | BPF_CALL; 4920 if (!bpf_dump_raw_ok(f_cred)) 4921 insns[i].imm = 0; 4922 continue; 4923 } 4924 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 4925 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 4926 continue; 4927 } 4928 4929 if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX || 4930 BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) { 4931 insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM; 4932 continue; 4933 } 4934 4935 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 4936 continue; 4937 4938 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 4939 map = bpf_map_from_imm(prog, imm, &off, &type); 4940 if (map) { 4941 insns[i].src_reg = type; 4942 insns[i].imm = map->id; 4943 insns[i + 1].imm = off; 4944 continue; 4945 } 4946 } 4947 4948 return insns; 4949 } 4950 4951 static int set_info_rec_size(struct bpf_prog_info *info) 4952 { 4953 /* 4954 * Ensure info.*_rec_size is the same as kernel expected size 4955 * 4956 * or 4957 * 4958 * Only allow zero *_rec_size if both _rec_size and _cnt are 4959 * zero. In this case, the kernel will set the expected 4960 * _rec_size back to the info. 4961 */ 4962 4963 if ((info->nr_func_info || info->func_info_rec_size) && 4964 info->func_info_rec_size != sizeof(struct bpf_func_info)) 4965 return -EINVAL; 4966 4967 if ((info->nr_line_info || info->line_info_rec_size) && 4968 info->line_info_rec_size != sizeof(struct bpf_line_info)) 4969 return -EINVAL; 4970 4971 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 4972 info->jited_line_info_rec_size != sizeof(__u64)) 4973 return -EINVAL; 4974 4975 info->func_info_rec_size = sizeof(struct bpf_func_info); 4976 info->line_info_rec_size = sizeof(struct bpf_line_info); 4977 info->jited_line_info_rec_size = sizeof(__u64); 4978 4979 return 0; 4980 } 4981 4982 static int bpf_prog_get_info_by_fd(struct file *file, 4983 struct bpf_prog *prog, 4984 const union bpf_attr *attr, 4985 union bpf_attr __user *uattr) 4986 { 4987 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4988 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 4989 struct bpf_prog_info info; 4990 u32 info_len = attr->info.info_len; 4991 struct bpf_prog_kstats stats; 4992 char __user *uinsns; 4993 u32 ulen; 4994 int err; 4995 4996 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4997 if (err) 4998 return err; 4999 info_len = min_t(u32, sizeof(info), info_len); 5000 5001 memset(&info, 0, sizeof(info)); 5002 if (copy_from_user(&info, uinfo, info_len)) 5003 return -EFAULT; 5004 5005 info.type = prog->type; 5006 info.id = prog->aux->id; 5007 info.load_time = prog->aux->load_time; 5008 info.created_by_uid = from_kuid_munged(current_user_ns(), 5009 prog->aux->user->uid); 5010 info.gpl_compatible = prog->gpl_compatible; 5011 5012 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 5013 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 5014 5015 mutex_lock(&prog->aux->used_maps_mutex); 5016 ulen = info.nr_map_ids; 5017 info.nr_map_ids = prog->aux->used_map_cnt; 5018 ulen = min_t(u32, info.nr_map_ids, ulen); 5019 if (ulen) { 5020 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 5021 u32 i; 5022 5023 for (i = 0; i < ulen; i++) 5024 if (put_user(prog->aux->used_maps[i]->id, 5025 &user_map_ids[i])) { 5026 mutex_unlock(&prog->aux->used_maps_mutex); 5027 return -EFAULT; 5028 } 5029 } 5030 mutex_unlock(&prog->aux->used_maps_mutex); 5031 5032 err = set_info_rec_size(&info); 5033 if (err) 5034 return err; 5035 5036 bpf_prog_get_stats(prog, &stats); 5037 info.run_time_ns = stats.nsecs; 5038 info.run_cnt = stats.cnt; 5039 info.recursion_misses = stats.misses; 5040 5041 info.verified_insns = prog->aux->verified_insns; 5042 if (prog->aux->btf) 5043 info.btf_id = btf_obj_id(prog->aux->btf); 5044 5045 if (!bpf_capable()) { 5046 info.jited_prog_len = 0; 5047 info.xlated_prog_len = 0; 5048 info.nr_jited_ksyms = 0; 5049 info.nr_jited_func_lens = 0; 5050 info.nr_func_info = 0; 5051 info.nr_line_info = 0; 5052 info.nr_jited_line_info = 0; 5053 goto done; 5054 } 5055 5056 ulen = info.xlated_prog_len; 5057 info.xlated_prog_len = bpf_prog_insn_size(prog); 5058 if (info.xlated_prog_len && ulen) { 5059 struct bpf_insn *insns_sanitized; 5060 bool fault; 5061 5062 if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) { 5063 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 5064 if (!insns_sanitized) 5065 return -ENOMEM; 5066 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 5067 ulen = min_t(u32, info.xlated_prog_len, ulen); 5068 fault = copy_to_user(uinsns, insns_sanitized, ulen); 5069 kfree(insns_sanitized); 5070 if (fault) 5071 return -EFAULT; 5072 } else { 5073 info.xlated_prog_insns = 0; 5074 } 5075 } 5076 5077 if (bpf_prog_is_offloaded(prog->aux)) { 5078 err = bpf_prog_offload_info_fill(&info, prog); 5079 if (err) 5080 return err; 5081 goto done; 5082 } 5083 5084 /* NOTE: the following code is supposed to be skipped for offload. 5085 * bpf_prog_offload_info_fill() is the place to fill similar fields 5086 * for offload. 5087 */ 5088 ulen = info.jited_prog_len; 5089 if (prog->aux->func_cnt) { 5090 u32 i; 5091 5092 info.jited_prog_len = 0; 5093 for (i = 0; i < prog->aux->func_cnt; i++) 5094 info.jited_prog_len += prog->aux->func[i]->jited_len; 5095 } else { 5096 info.jited_prog_len = prog->jited_len; 5097 } 5098 5099 if (info.jited_prog_len && ulen) { 5100 if (bpf_dump_raw_ok(file->f_cred)) { 5101 uinsns = u64_to_user_ptr(info.jited_prog_insns); 5102 ulen = min_t(u32, info.jited_prog_len, ulen); 5103 5104 /* for multi-function programs, copy the JITed 5105 * instructions for all the functions 5106 */ 5107 if (prog->aux->func_cnt) { 5108 u32 len, free, i; 5109 u8 *img; 5110 5111 free = ulen; 5112 for (i = 0; i < prog->aux->func_cnt; i++) { 5113 len = prog->aux->func[i]->jited_len; 5114 len = min_t(u32, len, free); 5115 img = (u8 *) prog->aux->func[i]->bpf_func; 5116 if (copy_to_user(uinsns, img, len)) 5117 return -EFAULT; 5118 uinsns += len; 5119 free -= len; 5120 if (!free) 5121 break; 5122 } 5123 } else { 5124 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 5125 return -EFAULT; 5126 } 5127 } else { 5128 info.jited_prog_insns = 0; 5129 } 5130 } 5131 5132 ulen = info.nr_jited_ksyms; 5133 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 5134 if (ulen) { 5135 if (bpf_dump_raw_ok(file->f_cred)) { 5136 unsigned long ksym_addr; 5137 u64 __user *user_ksyms; 5138 u32 i; 5139 5140 /* copy the address of the kernel symbol 5141 * corresponding to each function 5142 */ 5143 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 5144 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 5145 if (prog->aux->func_cnt) { 5146 for (i = 0; i < ulen; i++) { 5147 ksym_addr = (unsigned long) 5148 prog->aux->func[i]->bpf_func; 5149 if (put_user((u64) ksym_addr, 5150 &user_ksyms[i])) 5151 return -EFAULT; 5152 } 5153 } else { 5154 ksym_addr = (unsigned long) prog->bpf_func; 5155 if (put_user((u64) ksym_addr, &user_ksyms[0])) 5156 return -EFAULT; 5157 } 5158 } else { 5159 info.jited_ksyms = 0; 5160 } 5161 } 5162 5163 ulen = info.nr_jited_func_lens; 5164 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 5165 if (ulen) { 5166 if (bpf_dump_raw_ok(file->f_cred)) { 5167 u32 __user *user_lens; 5168 u32 func_len, i; 5169 5170 /* copy the JITed image lengths for each function */ 5171 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 5172 user_lens = u64_to_user_ptr(info.jited_func_lens); 5173 if (prog->aux->func_cnt) { 5174 for (i = 0; i < ulen; i++) { 5175 func_len = 5176 prog->aux->func[i]->jited_len; 5177 if (put_user(func_len, &user_lens[i])) 5178 return -EFAULT; 5179 } 5180 } else { 5181 func_len = prog->jited_len; 5182 if (put_user(func_len, &user_lens[0])) 5183 return -EFAULT; 5184 } 5185 } else { 5186 info.jited_func_lens = 0; 5187 } 5188 } 5189 5190 info.attach_btf_id = prog->aux->attach_btf_id; 5191 if (attach_btf) 5192 info.attach_btf_obj_id = btf_obj_id(attach_btf); 5193 5194 ulen = info.nr_func_info; 5195 info.nr_func_info = prog->aux->func_info_cnt; 5196 if (info.nr_func_info && ulen) { 5197 char __user *user_finfo; 5198 5199 user_finfo = u64_to_user_ptr(info.func_info); 5200 ulen = min_t(u32, info.nr_func_info, ulen); 5201 if (copy_to_user(user_finfo, prog->aux->func_info, 5202 info.func_info_rec_size * ulen)) 5203 return -EFAULT; 5204 } 5205 5206 ulen = info.nr_line_info; 5207 info.nr_line_info = prog->aux->nr_linfo; 5208 if (info.nr_line_info && ulen) { 5209 __u8 __user *user_linfo; 5210 5211 user_linfo = u64_to_user_ptr(info.line_info); 5212 ulen = min_t(u32, info.nr_line_info, ulen); 5213 if (copy_to_user(user_linfo, prog->aux->linfo, 5214 info.line_info_rec_size * ulen)) 5215 return -EFAULT; 5216 } 5217 5218 ulen = info.nr_jited_line_info; 5219 if (prog->aux->jited_linfo) 5220 info.nr_jited_line_info = prog->aux->nr_linfo; 5221 else 5222 info.nr_jited_line_info = 0; 5223 if (info.nr_jited_line_info && ulen) { 5224 if (bpf_dump_raw_ok(file->f_cred)) { 5225 unsigned long line_addr; 5226 __u64 __user *user_linfo; 5227 u32 i; 5228 5229 user_linfo = u64_to_user_ptr(info.jited_line_info); 5230 ulen = min_t(u32, info.nr_jited_line_info, ulen); 5231 for (i = 0; i < ulen; i++) { 5232 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 5233 if (put_user((__u64)line_addr, &user_linfo[i])) 5234 return -EFAULT; 5235 } 5236 } else { 5237 info.jited_line_info = 0; 5238 } 5239 } 5240 5241 ulen = info.nr_prog_tags; 5242 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 5243 if (ulen) { 5244 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 5245 u32 i; 5246 5247 user_prog_tags = u64_to_user_ptr(info.prog_tags); 5248 ulen = min_t(u32, info.nr_prog_tags, ulen); 5249 if (prog->aux->func_cnt) { 5250 for (i = 0; i < ulen; i++) { 5251 if (copy_to_user(user_prog_tags[i], 5252 prog->aux->func[i]->tag, 5253 BPF_TAG_SIZE)) 5254 return -EFAULT; 5255 } 5256 } else { 5257 if (copy_to_user(user_prog_tags[0], 5258 prog->tag, BPF_TAG_SIZE)) 5259 return -EFAULT; 5260 } 5261 } 5262 5263 done: 5264 if (copy_to_user(uinfo, &info, info_len) || 5265 put_user(info_len, &uattr->info.info_len)) 5266 return -EFAULT; 5267 5268 return 0; 5269 } 5270 5271 static int bpf_map_get_info_by_fd(struct file *file, 5272 struct bpf_map *map, 5273 const union bpf_attr *attr, 5274 union bpf_attr __user *uattr) 5275 { 5276 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5277 struct bpf_map_info info; 5278 u32 info_len = attr->info.info_len; 5279 int err; 5280 5281 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5282 if (err) 5283 return err; 5284 info_len = min_t(u32, sizeof(info), info_len); 5285 5286 memset(&info, 0, sizeof(info)); 5287 if (copy_from_user(&info, uinfo, info_len)) 5288 return -EFAULT; 5289 5290 info.type = map->map_type; 5291 info.id = map->id; 5292 info.key_size = map->key_size; 5293 info.value_size = map->value_size; 5294 info.max_entries = map->max_entries; 5295 info.map_flags = map->map_flags; 5296 info.map_extra = map->map_extra; 5297 memcpy(info.name, map->name, sizeof(map->name)); 5298 5299 if (map->btf) { 5300 info.btf_id = btf_obj_id(map->btf); 5301 info.btf_key_type_id = map->btf_key_type_id; 5302 info.btf_value_type_id = map->btf_value_type_id; 5303 } 5304 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5305 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) 5306 bpf_map_struct_ops_info_fill(&info, map); 5307 5308 if (bpf_map_is_offloaded(map)) { 5309 err = bpf_map_offload_info_fill(&info, map); 5310 if (err) 5311 return err; 5312 } 5313 5314 if (info.hash) { 5315 char __user *uhash = u64_to_user_ptr(info.hash); 5316 5317 if (!map->ops->map_get_hash) 5318 return -EINVAL; 5319 5320 if (info.hash_size != SHA256_DIGEST_SIZE) 5321 return -EINVAL; 5322 5323 err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); 5324 if (err != 0) 5325 return err; 5326 5327 if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) 5328 return -EFAULT; 5329 } else if (info.hash_size) { 5330 return -EINVAL; 5331 } 5332 5333 if (copy_to_user(uinfo, &info, info_len) || 5334 put_user(info_len, &uattr->info.info_len)) 5335 return -EFAULT; 5336 5337 return 0; 5338 } 5339 5340 static int bpf_btf_get_info_by_fd(struct file *file, 5341 struct btf *btf, 5342 const union bpf_attr *attr, 5343 union bpf_attr __user *uattr) 5344 { 5345 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5346 u32 info_len = attr->info.info_len; 5347 int err; 5348 5349 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5350 if (err) 5351 return err; 5352 5353 return btf_get_info_by_fd(btf, attr, uattr); 5354 } 5355 5356 static int bpf_link_get_info_by_fd(struct file *file, 5357 struct bpf_link *link, 5358 const union bpf_attr *attr, 5359 union bpf_attr __user *uattr) 5360 { 5361 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5362 struct bpf_link_info info; 5363 u32 info_len = attr->info.info_len; 5364 int err; 5365 5366 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5367 if (err) 5368 return err; 5369 info_len = min_t(u32, sizeof(info), info_len); 5370 5371 memset(&info, 0, sizeof(info)); 5372 if (copy_from_user(&info, uinfo, info_len)) 5373 return -EFAULT; 5374 5375 info.type = link->type; 5376 info.id = link->id; 5377 if (link->prog) 5378 info.prog_id = link->prog->aux->id; 5379 5380 if (link->ops->fill_link_info) { 5381 err = link->ops->fill_link_info(link, &info); 5382 if (err) 5383 return err; 5384 } 5385 5386 if (copy_to_user(uinfo, &info, info_len) || 5387 put_user(info_len, &uattr->info.info_len)) 5388 return -EFAULT; 5389 5390 return 0; 5391 } 5392 5393 5394 static int token_get_info_by_fd(struct file *file, 5395 struct bpf_token *token, 5396 const union bpf_attr *attr, 5397 union bpf_attr __user *uattr) 5398 { 5399 struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5400 u32 info_len = attr->info.info_len; 5401 int err; 5402 5403 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5404 if (err) 5405 return err; 5406 return bpf_token_get_info_by_fd(token, attr, uattr); 5407 } 5408 5409 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 5410 5411 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 5412 union bpf_attr __user *uattr) 5413 { 5414 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 5415 return -EINVAL; 5416 5417 CLASS(fd, f)(attr->info.bpf_fd); 5418 if (fd_empty(f)) 5419 return -EBADFD; 5420 5421 if (fd_file(f)->f_op == &bpf_prog_fops) 5422 return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5423 uattr); 5424 else if (fd_file(f)->f_op == &bpf_map_fops) 5425 return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5426 uattr); 5427 else if (fd_file(f)->f_op == &btf_fops) 5428 return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); 5429 else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) 5430 return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5431 attr, uattr); 5432 else if (fd_file(f)->f_op == &bpf_token_fops) 5433 return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5434 attr, uattr); 5435 return -EINVAL; 5436 } 5437 5438 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd 5439 5440 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 5441 { 5442 struct bpf_token *token = NULL; 5443 5444 if (CHECK_ATTR(BPF_BTF_LOAD)) 5445 return -EINVAL; 5446 5447 if (attr->btf_flags & ~BPF_F_TOKEN_FD) 5448 return -EINVAL; 5449 5450 if (attr->btf_flags & BPF_F_TOKEN_FD) { 5451 token = bpf_token_get_from_fd(attr->btf_token_fd); 5452 if (IS_ERR(token)) 5453 return PTR_ERR(token); 5454 if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { 5455 bpf_token_put(token); 5456 token = NULL; 5457 } 5458 } 5459 5460 if (!bpf_token_capable(token, CAP_BPF)) { 5461 bpf_token_put(token); 5462 return -EPERM; 5463 } 5464 5465 bpf_token_put(token); 5466 5467 return btf_new_fd(attr, uattr, uattr_size); 5468 } 5469 5470 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5471 5472 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5473 { 5474 struct bpf_token *token = NULL; 5475 5476 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5477 return -EINVAL; 5478 5479 if (attr->open_flags & ~BPF_F_TOKEN_FD) 5480 return -EINVAL; 5481 5482 if (attr->open_flags & BPF_F_TOKEN_FD) { 5483 token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5484 if (IS_ERR(token)) 5485 return PTR_ERR(token); 5486 if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5487 bpf_token_put(token); 5488 token = NULL; 5489 } 5490 } 5491 5492 if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5493 bpf_token_put(token); 5494 return -EPERM; 5495 } 5496 5497 bpf_token_put(token); 5498 5499 return btf_get_fd_by_id(attr->btf_id); 5500 } 5501 5502 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 5503 union bpf_attr __user *uattr, 5504 u32 prog_id, u32 fd_type, 5505 const char *buf, u64 probe_offset, 5506 u64 probe_addr) 5507 { 5508 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 5509 u32 len = buf ? strlen(buf) : 0, input_len; 5510 int err = 0; 5511 5512 if (put_user(len, &uattr->task_fd_query.buf_len)) 5513 return -EFAULT; 5514 input_len = attr->task_fd_query.buf_len; 5515 if (input_len && ubuf) { 5516 if (!len) { 5517 /* nothing to copy, just make ubuf NULL terminated */ 5518 char zero = '\0'; 5519 5520 if (put_user(zero, ubuf)) 5521 return -EFAULT; 5522 } else { 5523 err = bpf_copy_to_user(ubuf, buf, input_len, len); 5524 if (err == -EFAULT) 5525 return err; 5526 } 5527 } 5528 5529 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 5530 put_user(fd_type, &uattr->task_fd_query.fd_type) || 5531 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 5532 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 5533 return -EFAULT; 5534 5535 return err; 5536 } 5537 5538 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 5539 5540 static int bpf_task_fd_query(const union bpf_attr *attr, 5541 union bpf_attr __user *uattr) 5542 { 5543 pid_t pid = attr->task_fd_query.pid; 5544 u32 fd = attr->task_fd_query.fd; 5545 const struct perf_event *event; 5546 struct task_struct *task; 5547 struct file *file; 5548 int err; 5549 5550 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 5551 return -EINVAL; 5552 5553 if (!capable(CAP_SYS_ADMIN)) 5554 return -EPERM; 5555 5556 if (attr->task_fd_query.flags != 0) 5557 return -EINVAL; 5558 5559 rcu_read_lock(); 5560 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 5561 rcu_read_unlock(); 5562 if (!task) 5563 return -ENOENT; 5564 5565 err = 0; 5566 file = fget_task(task, fd); 5567 put_task_struct(task); 5568 if (!file) 5569 return -EBADF; 5570 5571 if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { 5572 struct bpf_link *link = file->private_data; 5573 5574 if (link->ops == &bpf_raw_tp_link_lops) { 5575 struct bpf_raw_tp_link *raw_tp = 5576 container_of(link, struct bpf_raw_tp_link, link); 5577 struct bpf_raw_event_map *btp = raw_tp->btp; 5578 5579 err = bpf_task_fd_query_copy(attr, uattr, 5580 raw_tp->link.prog->aux->id, 5581 BPF_FD_TYPE_RAW_TRACEPOINT, 5582 btp->tp->name, 0, 0); 5583 goto put_file; 5584 } 5585 goto out_not_supp; 5586 } 5587 5588 event = perf_get_event(file); 5589 if (!IS_ERR(event)) { 5590 u64 probe_offset, probe_addr; 5591 u32 prog_id, fd_type; 5592 const char *buf; 5593 5594 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 5595 &buf, &probe_offset, 5596 &probe_addr, NULL); 5597 if (!err) 5598 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 5599 fd_type, buf, 5600 probe_offset, 5601 probe_addr); 5602 goto put_file; 5603 } 5604 5605 out_not_supp: 5606 err = -ENOTSUPP; 5607 put_file: 5608 fput(file); 5609 return err; 5610 } 5611 5612 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 5613 5614 #define BPF_DO_BATCH(fn, ...) \ 5615 do { \ 5616 if (!fn) { \ 5617 err = -ENOTSUPP; \ 5618 goto err_put; \ 5619 } \ 5620 err = fn(__VA_ARGS__); \ 5621 } while (0) 5622 5623 static int bpf_map_do_batch(const union bpf_attr *attr, 5624 union bpf_attr __user *uattr, 5625 int cmd) 5626 { 5627 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 5628 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 5629 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 5630 struct bpf_map *map; 5631 int err; 5632 5633 if (CHECK_ATTR(BPF_MAP_BATCH)) 5634 return -EINVAL; 5635 5636 CLASS(fd, f)(attr->batch.map_fd); 5637 5638 map = __bpf_map_get(f); 5639 if (IS_ERR(map)) 5640 return PTR_ERR(map); 5641 if (has_write) 5642 bpf_map_write_active_inc(map); 5643 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 5644 err = -EPERM; 5645 goto err_put; 5646 } 5647 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 5648 err = -EPERM; 5649 goto err_put; 5650 } 5651 5652 if (cmd == BPF_MAP_LOOKUP_BATCH) 5653 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 5654 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 5655 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 5656 else if (cmd == BPF_MAP_UPDATE_BATCH) 5657 BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr); 5658 else 5659 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 5660 err_put: 5661 if (has_write) { 5662 maybe_wait_bpf_programs(map); 5663 bpf_map_write_active_dec(map); 5664 } 5665 return err; 5666 } 5667 5668 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 5669 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 5670 { 5671 struct bpf_prog *prog; 5672 int ret; 5673 5674 if (CHECK_ATTR(BPF_LINK_CREATE)) 5675 return -EINVAL; 5676 5677 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 5678 return bpf_struct_ops_link_create(attr); 5679 5680 prog = bpf_prog_get(attr->link_create.prog_fd); 5681 if (IS_ERR(prog)) 5682 return PTR_ERR(prog); 5683 5684 ret = bpf_prog_attach_check_attach_type(prog, 5685 attr->link_create.attach_type); 5686 if (ret) 5687 goto out; 5688 5689 switch (prog->type) { 5690 case BPF_PROG_TYPE_CGROUP_SKB: 5691 case BPF_PROG_TYPE_CGROUP_SOCK: 5692 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5693 case BPF_PROG_TYPE_SOCK_OPS: 5694 case BPF_PROG_TYPE_CGROUP_DEVICE: 5695 case BPF_PROG_TYPE_CGROUP_SYSCTL: 5696 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 5697 ret = cgroup_bpf_link_attach(attr, prog); 5698 break; 5699 case BPF_PROG_TYPE_EXT: 5700 ret = bpf_tracing_prog_attach(prog, 5701 attr->link_create.target_fd, 5702 attr->link_create.target_btf_id, 5703 attr->link_create.tracing.cookie, 5704 attr->link_create.attach_type); 5705 break; 5706 case BPF_PROG_TYPE_LSM: 5707 case BPF_PROG_TYPE_TRACING: 5708 if (attr->link_create.attach_type != prog->expected_attach_type) { 5709 ret = -EINVAL; 5710 goto out; 5711 } 5712 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 5713 ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie, 5714 attr->link_create.attach_type); 5715 else if (prog->expected_attach_type == BPF_TRACE_ITER) 5716 ret = bpf_iter_link_attach(attr, uattr, prog); 5717 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 5718 ret = cgroup_bpf_link_attach(attr, prog); 5719 else 5720 ret = bpf_tracing_prog_attach(prog, 5721 attr->link_create.target_fd, 5722 attr->link_create.target_btf_id, 5723 attr->link_create.tracing.cookie, 5724 attr->link_create.attach_type); 5725 break; 5726 case BPF_PROG_TYPE_FLOW_DISSECTOR: 5727 case BPF_PROG_TYPE_SK_LOOKUP: 5728 ret = netns_bpf_link_create(attr, prog); 5729 break; 5730 case BPF_PROG_TYPE_SK_MSG: 5731 case BPF_PROG_TYPE_SK_SKB: 5732 ret = sock_map_link_create(attr, prog); 5733 break; 5734 #ifdef CONFIG_NET 5735 case BPF_PROG_TYPE_XDP: 5736 ret = bpf_xdp_link_attach(attr, prog); 5737 break; 5738 case BPF_PROG_TYPE_SCHED_CLS: 5739 if (attr->link_create.attach_type == BPF_TCX_INGRESS || 5740 attr->link_create.attach_type == BPF_TCX_EGRESS) 5741 ret = tcx_link_attach(attr, prog); 5742 else 5743 ret = netkit_link_attach(attr, prog); 5744 break; 5745 case BPF_PROG_TYPE_NETFILTER: 5746 ret = bpf_nf_link_attach(attr, prog); 5747 break; 5748 #endif 5749 case BPF_PROG_TYPE_PERF_EVENT: 5750 case BPF_PROG_TYPE_TRACEPOINT: 5751 ret = bpf_perf_link_attach(attr, prog); 5752 break; 5753 case BPF_PROG_TYPE_KPROBE: 5754 if (attr->link_create.attach_type == BPF_PERF_EVENT) 5755 ret = bpf_perf_link_attach(attr, prog); 5756 else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || 5757 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) 5758 ret = bpf_kprobe_multi_link_attach(attr, prog); 5759 else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI || 5760 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION) 5761 ret = bpf_uprobe_multi_link_attach(attr, prog); 5762 break; 5763 default: 5764 ret = -EINVAL; 5765 } 5766 5767 out: 5768 if (ret < 0) 5769 bpf_prog_put(prog); 5770 return ret; 5771 } 5772 5773 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 5774 { 5775 struct bpf_map *new_map, *old_map = NULL; 5776 int ret; 5777 5778 new_map = bpf_map_get(attr->link_update.new_map_fd); 5779 if (IS_ERR(new_map)) 5780 return PTR_ERR(new_map); 5781 5782 if (attr->link_update.flags & BPF_F_REPLACE) { 5783 old_map = bpf_map_get(attr->link_update.old_map_fd); 5784 if (IS_ERR(old_map)) { 5785 ret = PTR_ERR(old_map); 5786 goto out_put; 5787 } 5788 } else if (attr->link_update.old_map_fd) { 5789 ret = -EINVAL; 5790 goto out_put; 5791 } 5792 5793 ret = link->ops->update_map(link, new_map, old_map); 5794 5795 if (old_map) 5796 bpf_map_put(old_map); 5797 out_put: 5798 bpf_map_put(new_map); 5799 return ret; 5800 } 5801 5802 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 5803 5804 static int link_update(union bpf_attr *attr) 5805 { 5806 struct bpf_prog *old_prog = NULL, *new_prog; 5807 struct bpf_link *link; 5808 u32 flags; 5809 int ret; 5810 5811 if (CHECK_ATTR(BPF_LINK_UPDATE)) 5812 return -EINVAL; 5813 5814 flags = attr->link_update.flags; 5815 if (flags & ~BPF_F_REPLACE) 5816 return -EINVAL; 5817 5818 link = bpf_link_get_from_fd(attr->link_update.link_fd); 5819 if (IS_ERR(link)) 5820 return PTR_ERR(link); 5821 5822 if (link->ops->update_map) { 5823 ret = link_update_map(link, attr); 5824 goto out_put_link; 5825 } 5826 5827 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 5828 if (IS_ERR(new_prog)) { 5829 ret = PTR_ERR(new_prog); 5830 goto out_put_link; 5831 } 5832 5833 if (flags & BPF_F_REPLACE) { 5834 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 5835 if (IS_ERR(old_prog)) { 5836 ret = PTR_ERR(old_prog); 5837 old_prog = NULL; 5838 goto out_put_progs; 5839 } 5840 } else if (attr->link_update.old_prog_fd) { 5841 ret = -EINVAL; 5842 goto out_put_progs; 5843 } 5844 5845 if (link->ops->update_prog) 5846 ret = link->ops->update_prog(link, new_prog, old_prog); 5847 else 5848 ret = -EINVAL; 5849 5850 out_put_progs: 5851 if (old_prog) 5852 bpf_prog_put(old_prog); 5853 if (ret) 5854 bpf_prog_put(new_prog); 5855 out_put_link: 5856 bpf_link_put_direct(link); 5857 return ret; 5858 } 5859 5860 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 5861 5862 static int link_detach(union bpf_attr *attr) 5863 { 5864 struct bpf_link *link; 5865 int ret; 5866 5867 if (CHECK_ATTR(BPF_LINK_DETACH)) 5868 return -EINVAL; 5869 5870 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 5871 if (IS_ERR(link)) 5872 return PTR_ERR(link); 5873 5874 if (link->ops->detach) 5875 ret = link->ops->detach(link); 5876 else 5877 ret = -EOPNOTSUPP; 5878 5879 bpf_link_put_direct(link); 5880 return ret; 5881 } 5882 5883 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 5884 { 5885 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 5886 } 5887 EXPORT_SYMBOL(bpf_link_inc_not_zero); 5888 5889 struct bpf_link *bpf_link_by_id(u32 id) 5890 { 5891 struct bpf_link *link; 5892 5893 if (!id) 5894 return ERR_PTR(-ENOENT); 5895 5896 spin_lock_bh(&link_idr_lock); 5897 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 5898 link = idr_find(&link_idr, id); 5899 if (link) { 5900 if (link->id) 5901 link = bpf_link_inc_not_zero(link); 5902 else 5903 link = ERR_PTR(-EAGAIN); 5904 } else { 5905 link = ERR_PTR(-ENOENT); 5906 } 5907 spin_unlock_bh(&link_idr_lock); 5908 return link; 5909 } 5910 5911 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 5912 { 5913 struct bpf_link *link; 5914 5915 spin_lock_bh(&link_idr_lock); 5916 again: 5917 link = idr_get_next(&link_idr, id); 5918 if (link) { 5919 link = bpf_link_inc_not_zero(link); 5920 if (IS_ERR(link)) { 5921 (*id)++; 5922 goto again; 5923 } 5924 } 5925 spin_unlock_bh(&link_idr_lock); 5926 5927 return link; 5928 } 5929 5930 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 5931 5932 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 5933 { 5934 struct bpf_link *link; 5935 u32 id = attr->link_id; 5936 int fd; 5937 5938 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 5939 return -EINVAL; 5940 5941 if (!capable(CAP_SYS_ADMIN)) 5942 return -EPERM; 5943 5944 link = bpf_link_by_id(id); 5945 if (IS_ERR(link)) 5946 return PTR_ERR(link); 5947 5948 fd = bpf_link_new_fd(link); 5949 if (fd < 0) 5950 bpf_link_put_direct(link); 5951 5952 return fd; 5953 } 5954 5955 DEFINE_MUTEX(bpf_stats_enabled_mutex); 5956 5957 static int bpf_stats_release(struct inode *inode, struct file *file) 5958 { 5959 mutex_lock(&bpf_stats_enabled_mutex); 5960 static_key_slow_dec(&bpf_stats_enabled_key.key); 5961 mutex_unlock(&bpf_stats_enabled_mutex); 5962 return 0; 5963 } 5964 5965 static const struct file_operations bpf_stats_fops = { 5966 .release = bpf_stats_release, 5967 }; 5968 5969 static int bpf_enable_runtime_stats(void) 5970 { 5971 int fd; 5972 5973 mutex_lock(&bpf_stats_enabled_mutex); 5974 5975 /* Set a very high limit to avoid overflow */ 5976 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 5977 mutex_unlock(&bpf_stats_enabled_mutex); 5978 return -EBUSY; 5979 } 5980 5981 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 5982 if (fd >= 0) 5983 static_key_slow_inc(&bpf_stats_enabled_key.key); 5984 5985 mutex_unlock(&bpf_stats_enabled_mutex); 5986 return fd; 5987 } 5988 5989 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 5990 5991 static int bpf_enable_stats(union bpf_attr *attr) 5992 { 5993 5994 if (CHECK_ATTR(BPF_ENABLE_STATS)) 5995 return -EINVAL; 5996 5997 if (!capable(CAP_SYS_ADMIN)) 5998 return -EPERM; 5999 6000 switch (attr->enable_stats.type) { 6001 case BPF_STATS_RUN_TIME: 6002 return bpf_enable_runtime_stats(); 6003 default: 6004 break; 6005 } 6006 return -EINVAL; 6007 } 6008 6009 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 6010 6011 static int bpf_iter_create(union bpf_attr *attr) 6012 { 6013 struct bpf_link *link; 6014 int err; 6015 6016 if (CHECK_ATTR(BPF_ITER_CREATE)) 6017 return -EINVAL; 6018 6019 if (attr->iter_create.flags) 6020 return -EINVAL; 6021 6022 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 6023 if (IS_ERR(link)) 6024 return PTR_ERR(link); 6025 6026 err = bpf_iter_new_fd(link); 6027 bpf_link_put_direct(link); 6028 6029 return err; 6030 } 6031 6032 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 6033 6034 static int bpf_prog_bind_map(union bpf_attr *attr) 6035 { 6036 struct bpf_prog *prog; 6037 struct bpf_map *map; 6038 struct bpf_map **used_maps_old, **used_maps_new; 6039 int i, ret = 0; 6040 6041 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 6042 return -EINVAL; 6043 6044 if (attr->prog_bind_map.flags) 6045 return -EINVAL; 6046 6047 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 6048 if (IS_ERR(prog)) 6049 return PTR_ERR(prog); 6050 6051 map = bpf_map_get(attr->prog_bind_map.map_fd); 6052 if (IS_ERR(map)) { 6053 ret = PTR_ERR(map); 6054 goto out_prog_put; 6055 } 6056 6057 mutex_lock(&prog->aux->used_maps_mutex); 6058 6059 used_maps_old = prog->aux->used_maps; 6060 6061 for (i = 0; i < prog->aux->used_map_cnt; i++) 6062 if (used_maps_old[i] == map) { 6063 bpf_map_put(map); 6064 goto out_unlock; 6065 } 6066 6067 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 6068 sizeof(used_maps_new[0]), 6069 GFP_KERNEL); 6070 if (!used_maps_new) { 6071 ret = -ENOMEM; 6072 goto out_unlock; 6073 } 6074 6075 /* The bpf program will not access the bpf map, but for the sake of 6076 * simplicity, increase sleepable_refcnt for sleepable program as well. 6077 */ 6078 if (prog->sleepable) 6079 atomic64_inc(&map->sleepable_refcnt); 6080 memcpy(used_maps_new, used_maps_old, 6081 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 6082 used_maps_new[prog->aux->used_map_cnt] = map; 6083 6084 prog->aux->used_map_cnt++; 6085 prog->aux->used_maps = used_maps_new; 6086 6087 kfree(used_maps_old); 6088 6089 out_unlock: 6090 mutex_unlock(&prog->aux->used_maps_mutex); 6091 6092 if (ret) 6093 bpf_map_put(map); 6094 out_prog_put: 6095 bpf_prog_put(prog); 6096 return ret; 6097 } 6098 6099 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd 6100 6101 static int token_create(union bpf_attr *attr) 6102 { 6103 if (CHECK_ATTR(BPF_TOKEN_CREATE)) 6104 return -EINVAL; 6105 6106 /* no flags are supported yet */ 6107 if (attr->token_create.flags) 6108 return -EINVAL; 6109 6110 return bpf_token_create(attr); 6111 } 6112 6113 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd 6114 6115 static int prog_stream_read(union bpf_attr *attr) 6116 { 6117 char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); 6118 u32 len = attr->prog_stream_read.stream_buf_len; 6119 struct bpf_prog *prog; 6120 int ret; 6121 6122 if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) 6123 return -EINVAL; 6124 6125 prog = bpf_prog_get(attr->prog_stream_read.prog_fd); 6126 if (IS_ERR(prog)) 6127 return PTR_ERR(prog); 6128 6129 ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); 6130 bpf_prog_put(prog); 6131 6132 return ret; 6133 } 6134 6135 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) 6136 { 6137 union bpf_attr attr; 6138 int err; 6139 6140 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 6141 if (err) 6142 return err; 6143 size = min_t(u32, size, sizeof(attr)); 6144 6145 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 6146 memset(&attr, 0, sizeof(attr)); 6147 if (copy_from_bpfptr(&attr, uattr, size) != 0) 6148 return -EFAULT; 6149 6150 err = security_bpf(cmd, &attr, size, uattr.is_kernel); 6151 if (err < 0) 6152 return err; 6153 6154 switch (cmd) { 6155 case BPF_MAP_CREATE: 6156 err = map_create(&attr, uattr); 6157 break; 6158 case BPF_MAP_LOOKUP_ELEM: 6159 err = map_lookup_elem(&attr); 6160 break; 6161 case BPF_MAP_UPDATE_ELEM: 6162 err = map_update_elem(&attr, uattr); 6163 break; 6164 case BPF_MAP_DELETE_ELEM: 6165 err = map_delete_elem(&attr, uattr); 6166 break; 6167 case BPF_MAP_GET_NEXT_KEY: 6168 err = map_get_next_key(&attr); 6169 break; 6170 case BPF_MAP_FREEZE: 6171 err = map_freeze(&attr); 6172 break; 6173 case BPF_PROG_LOAD: 6174 err = bpf_prog_load(&attr, uattr, size); 6175 break; 6176 case BPF_OBJ_PIN: 6177 err = bpf_obj_pin(&attr); 6178 break; 6179 case BPF_OBJ_GET: 6180 err = bpf_obj_get(&attr); 6181 break; 6182 case BPF_PROG_ATTACH: 6183 err = bpf_prog_attach(&attr); 6184 break; 6185 case BPF_PROG_DETACH: 6186 err = bpf_prog_detach(&attr); 6187 break; 6188 case BPF_PROG_QUERY: 6189 err = bpf_prog_query(&attr, uattr.user); 6190 break; 6191 case BPF_PROG_TEST_RUN: 6192 err = bpf_prog_test_run(&attr, uattr.user); 6193 break; 6194 case BPF_PROG_GET_NEXT_ID: 6195 err = bpf_obj_get_next_id(&attr, uattr.user, 6196 &prog_idr, &prog_idr_lock); 6197 break; 6198 case BPF_MAP_GET_NEXT_ID: 6199 err = bpf_obj_get_next_id(&attr, uattr.user, 6200 &map_idr, &map_idr_lock); 6201 break; 6202 case BPF_BTF_GET_NEXT_ID: 6203 err = bpf_obj_get_next_id(&attr, uattr.user, 6204 &btf_idr, &btf_idr_lock); 6205 break; 6206 case BPF_PROG_GET_FD_BY_ID: 6207 err = bpf_prog_get_fd_by_id(&attr); 6208 break; 6209 case BPF_MAP_GET_FD_BY_ID: 6210 err = bpf_map_get_fd_by_id(&attr); 6211 break; 6212 case BPF_OBJ_GET_INFO_BY_FD: 6213 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 6214 break; 6215 case BPF_RAW_TRACEPOINT_OPEN: 6216 err = bpf_raw_tracepoint_open(&attr); 6217 break; 6218 case BPF_BTF_LOAD: 6219 err = bpf_btf_load(&attr, uattr, size); 6220 break; 6221 case BPF_BTF_GET_FD_BY_ID: 6222 err = bpf_btf_get_fd_by_id(&attr); 6223 break; 6224 case BPF_TASK_FD_QUERY: 6225 err = bpf_task_fd_query(&attr, uattr.user); 6226 break; 6227 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 6228 err = map_lookup_and_delete_elem(&attr); 6229 break; 6230 case BPF_MAP_LOOKUP_BATCH: 6231 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 6232 break; 6233 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 6234 err = bpf_map_do_batch(&attr, uattr.user, 6235 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 6236 break; 6237 case BPF_MAP_UPDATE_BATCH: 6238 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 6239 break; 6240 case BPF_MAP_DELETE_BATCH: 6241 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 6242 break; 6243 case BPF_LINK_CREATE: 6244 err = link_create(&attr, uattr); 6245 break; 6246 case BPF_LINK_UPDATE: 6247 err = link_update(&attr); 6248 break; 6249 case BPF_LINK_GET_FD_BY_ID: 6250 err = bpf_link_get_fd_by_id(&attr); 6251 break; 6252 case BPF_LINK_GET_NEXT_ID: 6253 err = bpf_obj_get_next_id(&attr, uattr.user, 6254 &link_idr, &link_idr_lock); 6255 break; 6256 case BPF_ENABLE_STATS: 6257 err = bpf_enable_stats(&attr); 6258 break; 6259 case BPF_ITER_CREATE: 6260 err = bpf_iter_create(&attr); 6261 break; 6262 case BPF_LINK_DETACH: 6263 err = link_detach(&attr); 6264 break; 6265 case BPF_PROG_BIND_MAP: 6266 err = bpf_prog_bind_map(&attr); 6267 break; 6268 case BPF_TOKEN_CREATE: 6269 err = token_create(&attr); 6270 break; 6271 case BPF_PROG_STREAM_READ_BY_FD: 6272 err = prog_stream_read(&attr); 6273 break; 6274 default: 6275 err = -EINVAL; 6276 break; 6277 } 6278 6279 return err; 6280 } 6281 6282 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 6283 { 6284 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 6285 } 6286 6287 static bool syscall_prog_is_valid_access(int off, int size, 6288 enum bpf_access_type type, 6289 const struct bpf_prog *prog, 6290 struct bpf_insn_access_aux *info) 6291 { 6292 if (off < 0 || off >= U16_MAX) 6293 return false; 6294 if (off % size != 0) 6295 return false; 6296 return true; 6297 } 6298 6299 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 6300 { 6301 switch (cmd) { 6302 case BPF_MAP_CREATE: 6303 case BPF_MAP_DELETE_ELEM: 6304 case BPF_MAP_UPDATE_ELEM: 6305 case BPF_MAP_FREEZE: 6306 case BPF_MAP_GET_FD_BY_ID: 6307 case BPF_PROG_LOAD: 6308 case BPF_BTF_LOAD: 6309 case BPF_LINK_CREATE: 6310 case BPF_RAW_TRACEPOINT_OPEN: 6311 break; 6312 default: 6313 return -EINVAL; 6314 } 6315 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 6316 } 6317 6318 6319 /* To shut up -Wmissing-prototypes. 6320 * This function is used by the kernel light skeleton 6321 * to load bpf programs when modules are loaded or during kernel boot. 6322 * See tools/lib/bpf/skel_internal.h 6323 */ 6324 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 6325 6326 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 6327 { 6328 struct bpf_prog * __maybe_unused prog; 6329 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 6330 6331 switch (cmd) { 6332 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 6333 case BPF_PROG_TEST_RUN: 6334 if (attr->test.data_in || attr->test.data_out || 6335 attr->test.ctx_out || attr->test.duration || 6336 attr->test.repeat || attr->test.flags) 6337 return -EINVAL; 6338 6339 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 6340 if (IS_ERR(prog)) 6341 return PTR_ERR(prog); 6342 6343 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 6344 attr->test.ctx_size_in > U16_MAX) { 6345 bpf_prog_put(prog); 6346 return -EINVAL; 6347 } 6348 6349 run_ctx.bpf_cookie = 0; 6350 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 6351 /* recursion detected */ 6352 __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx); 6353 bpf_prog_put(prog); 6354 return -EBUSY; 6355 } 6356 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 6357 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 6358 &run_ctx); 6359 bpf_prog_put(prog); 6360 return 0; 6361 #endif 6362 default: 6363 return ____bpf_sys_bpf(cmd, attr, size); 6364 } 6365 } 6366 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); 6367 6368 static const struct bpf_func_proto bpf_sys_bpf_proto = { 6369 .func = bpf_sys_bpf, 6370 .gpl_only = false, 6371 .ret_type = RET_INTEGER, 6372 .arg1_type = ARG_ANYTHING, 6373 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6374 .arg3_type = ARG_CONST_SIZE, 6375 }; 6376 6377 const struct bpf_func_proto * __weak 6378 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6379 { 6380 return bpf_base_func_proto(func_id, prog); 6381 } 6382 6383 BPF_CALL_1(bpf_sys_close, u32, fd) 6384 { 6385 /* When bpf program calls this helper there should not be 6386 * an fdget() without matching completed fdput(). 6387 * This helper is allowed in the following callchain only: 6388 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 6389 */ 6390 return close_fd(fd); 6391 } 6392 6393 static const struct bpf_func_proto bpf_sys_close_proto = { 6394 .func = bpf_sys_close, 6395 .gpl_only = false, 6396 .ret_type = RET_INTEGER, 6397 .arg1_type = ARG_ANYTHING, 6398 }; 6399 6400 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 6401 { 6402 *res = 0; 6403 if (flags) 6404 return -EINVAL; 6405 6406 if (name_sz <= 1 || name[name_sz - 1]) 6407 return -EINVAL; 6408 6409 if (!bpf_dump_raw_ok(current_cred())) 6410 return -EPERM; 6411 6412 *res = kallsyms_lookup_name(name); 6413 return *res ? 0 : -ENOENT; 6414 } 6415 6416 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 6417 .func = bpf_kallsyms_lookup_name, 6418 .gpl_only = false, 6419 .ret_type = RET_INTEGER, 6420 .arg1_type = ARG_PTR_TO_MEM, 6421 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 6422 .arg3_type = ARG_ANYTHING, 6423 .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 6424 .arg4_size = sizeof(u64), 6425 }; 6426 6427 static const struct bpf_func_proto * 6428 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6429 { 6430 switch (func_id) { 6431 case BPF_FUNC_sys_bpf: 6432 return !bpf_token_capable(prog->aux->token, CAP_PERFMON) 6433 ? NULL : &bpf_sys_bpf_proto; 6434 case BPF_FUNC_btf_find_by_name_kind: 6435 return &bpf_btf_find_by_name_kind_proto; 6436 case BPF_FUNC_sys_close: 6437 return &bpf_sys_close_proto; 6438 case BPF_FUNC_kallsyms_lookup_name: 6439 return &bpf_kallsyms_lookup_name_proto; 6440 default: 6441 return tracing_prog_func_proto(func_id, prog); 6442 } 6443 } 6444 6445 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 6446 .get_func_proto = syscall_prog_func_proto, 6447 .is_valid_access = syscall_prog_is_valid_access, 6448 }; 6449 6450 const struct bpf_prog_ops bpf_syscall_prog_ops = { 6451 .test_run = bpf_prog_test_run_syscall, 6452 }; 6453 6454 #ifdef CONFIG_SYSCTL 6455 static int bpf_stats_handler(const struct ctl_table *table, int write, 6456 void *buffer, size_t *lenp, loff_t *ppos) 6457 { 6458 struct static_key *key = (struct static_key *)table->data; 6459 static int saved_val; 6460 int val, ret; 6461 struct ctl_table tmp = { 6462 .data = &val, 6463 .maxlen = sizeof(val), 6464 .mode = table->mode, 6465 .extra1 = SYSCTL_ZERO, 6466 .extra2 = SYSCTL_ONE, 6467 }; 6468 6469 if (write && !capable(CAP_SYS_ADMIN)) 6470 return -EPERM; 6471 6472 mutex_lock(&bpf_stats_enabled_mutex); 6473 val = saved_val; 6474 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6475 if (write && !ret && val != saved_val) { 6476 if (val) 6477 static_key_slow_inc(key); 6478 else 6479 static_key_slow_dec(key); 6480 saved_val = val; 6481 } 6482 mutex_unlock(&bpf_stats_enabled_mutex); 6483 return ret; 6484 } 6485 6486 void __weak unpriv_ebpf_notify(int new_state) 6487 { 6488 } 6489 6490 static int bpf_unpriv_handler(const struct ctl_table *table, int write, 6491 void *buffer, size_t *lenp, loff_t *ppos) 6492 { 6493 int ret, unpriv_enable = *(int *)table->data; 6494 bool locked_state = unpriv_enable == 1; 6495 struct ctl_table tmp = *table; 6496 6497 if (write && !capable(CAP_SYS_ADMIN)) 6498 return -EPERM; 6499 6500 tmp.data = &unpriv_enable; 6501 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6502 if (write && !ret) { 6503 if (locked_state && unpriv_enable != 1) 6504 return -EPERM; 6505 *(int *)table->data = unpriv_enable; 6506 } 6507 6508 if (write) 6509 unpriv_ebpf_notify(unpriv_enable); 6510 6511 return ret; 6512 } 6513 6514 static const struct ctl_table bpf_syscall_table[] = { 6515 { 6516 .procname = "unprivileged_bpf_disabled", 6517 .data = &sysctl_unprivileged_bpf_disabled, 6518 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 6519 .mode = 0644, 6520 .proc_handler = bpf_unpriv_handler, 6521 .extra1 = SYSCTL_ZERO, 6522 .extra2 = SYSCTL_TWO, 6523 }, 6524 { 6525 .procname = "bpf_stats_enabled", 6526 .data = &bpf_stats_enabled_key.key, 6527 .mode = 0644, 6528 .proc_handler = bpf_stats_handler, 6529 }, 6530 }; 6531 6532 static int __init bpf_syscall_sysctl_init(void) 6533 { 6534 register_sysctl_init("kernel", bpf_syscall_table); 6535 return 0; 6536 } 6537 late_initcall(bpf_syscall_sysctl_init); 6538 #endif /* CONFIG_SYSCTL */ 6539