1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <crypto/sha2.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf-cgroup.h> 7 #include <linux/bpf_trace.h> 8 #include <linux/bpf_lirc.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bsearch.h> 11 #include <linux/btf.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched/signal.h> 15 #include <linux/vmalloc.h> 16 #include <linux/mmzone.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/fdtable.h> 19 #include <linux/file.h> 20 #include <linux/fs.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/kernel.h> 24 #include <linux/idr.h> 25 #include <linux/cred.h> 26 #include <linux/timekeeping.h> 27 #include <linux/ctype.h> 28 #include <linux/nospec.h> 29 #include <linux/audit.h> 30 #include <uapi/linux/btf.h> 31 #include <linux/pgtable.h> 32 #include <linux/bpf_lsm.h> 33 #include <linux/poll.h> 34 #include <linux/sort.h> 35 #include <linux/bpf-netns.h> 36 #include <linux/rcupdate_trace.h> 37 #include <linux/memcontrol.h> 38 #include <linux/trace_events.h> 39 #include <linux/tracepoint.h> 40 #include <linux/overflow.h> 41 #include <linux/cookie.h> 42 #include <linux/verification.h> 43 44 #include <net/netfilter/nf_bpf_link.h> 45 #include <net/netkit.h> 46 #include <net/tcx.h> 47 48 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 49 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 50 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 51 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 52 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 53 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 54 IS_FD_HASH(map)) 55 56 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 57 58 DEFINE_PER_CPU(int, bpf_prog_active); 59 DEFINE_COOKIE(bpf_map_cookie); 60 static DEFINE_IDR(prog_idr); 61 static DEFINE_SPINLOCK(prog_idr_lock); 62 static DEFINE_IDR(map_idr); 63 static DEFINE_SPINLOCK(map_idr_lock); 64 static DEFINE_IDR(link_idr); 65 static DEFINE_SPINLOCK(link_idr_lock); 66 67 int sysctl_unprivileged_bpf_disabled __read_mostly = 68 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 69 70 static const struct bpf_map_ops * const bpf_map_types[] = { 71 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 72 #define BPF_MAP_TYPE(_id, _ops) \ 73 [_id] = &_ops, 74 #define BPF_LINK_TYPE(_id, _name) 75 #include <linux/bpf_types.h> 76 #undef BPF_PROG_TYPE 77 #undef BPF_MAP_TYPE 78 #undef BPF_LINK_TYPE 79 }; 80 81 /* 82 * If we're handed a bigger struct than we know of, ensure all the unknown bits 83 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 84 * we don't know about yet. 85 * 86 * There is a ToCToU between this function call and the following 87 * copy_from_user() call. However, this is not a concern since this function is 88 * meant to be a future-proofing of bits. 89 */ 90 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 91 size_t expected_size, 92 size_t actual_size) 93 { 94 int res; 95 96 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 97 return -E2BIG; 98 99 if (actual_size <= expected_size) 100 return 0; 101 102 if (uaddr.is_kernel) 103 res = memchr_inv(uaddr.kernel + expected_size, 0, 104 actual_size - expected_size) == NULL; 105 else 106 res = check_zeroed_user(uaddr.user + expected_size, 107 actual_size - expected_size); 108 if (res < 0) 109 return res; 110 return res ? 0 : -E2BIG; 111 } 112 113 const struct bpf_map_ops bpf_map_offload_ops = { 114 .map_meta_equal = bpf_map_meta_equal, 115 .map_alloc = bpf_map_offload_map_alloc, 116 .map_free = bpf_map_offload_map_free, 117 .map_check_btf = map_check_no_btf, 118 .map_mem_usage = bpf_map_offload_map_mem_usage, 119 }; 120 121 static void bpf_map_write_active_inc(struct bpf_map *map) 122 { 123 atomic64_inc(&map->writecnt); 124 } 125 126 static void bpf_map_write_active_dec(struct bpf_map *map) 127 { 128 atomic64_dec(&map->writecnt); 129 } 130 131 bool bpf_map_write_active(const struct bpf_map *map) 132 { 133 return atomic64_read(&map->writecnt) != 0; 134 } 135 136 static u32 bpf_map_value_size(const struct bpf_map *map) 137 { 138 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 139 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 140 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 141 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 142 return round_up(map->value_size, 8) * num_possible_cpus(); 143 else if (IS_FD_MAP(map)) 144 return sizeof(u32); 145 else 146 return map->value_size; 147 } 148 149 static void maybe_wait_bpf_programs(struct bpf_map *map) 150 { 151 /* Wait for any running non-sleepable BPF programs to complete so that 152 * userspace, when we return to it, knows that all non-sleepable 153 * programs that could be running use the new map value. For sleepable 154 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait 155 * for the completions of these programs, but considering the waiting 156 * time can be very long and userspace may think it will hang forever, 157 * so don't handle sleepable BPF programs now. 158 */ 159 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 160 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 161 synchronize_rcu(); 162 } 163 164 static void unpin_uptr_kaddr(void *kaddr) 165 { 166 if (kaddr) 167 unpin_user_page(virt_to_page(kaddr)); 168 } 169 170 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) 171 { 172 const struct btf_field *field; 173 void **uptr_addr; 174 int i; 175 176 for (i = 0, field = rec->fields; i < cnt; i++, field++) { 177 if (field->type != BPF_UPTR) 178 continue; 179 180 uptr_addr = obj + field->offset; 181 unpin_uptr_kaddr(*uptr_addr); 182 } 183 } 184 185 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) 186 { 187 if (!btf_record_has_field(rec, BPF_UPTR)) 188 return; 189 190 __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); 191 } 192 193 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) 194 { 195 const struct btf_field *field; 196 const struct btf_type *t; 197 unsigned long start, end; 198 struct page *page; 199 void **uptr_addr; 200 int i, err; 201 202 if (!btf_record_has_field(rec, BPF_UPTR)) 203 return 0; 204 205 for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { 206 if (field->type != BPF_UPTR) 207 continue; 208 209 uptr_addr = obj + field->offset; 210 start = *(unsigned long *)uptr_addr; 211 if (!start) 212 continue; 213 214 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 215 /* t->size was checked for zero before */ 216 if (check_add_overflow(start, t->size - 1, &end)) { 217 err = -EFAULT; 218 goto unpin_all; 219 } 220 221 /* The uptr's struct cannot span across two pages */ 222 if ((start & PAGE_MASK) != (end & PAGE_MASK)) { 223 err = -EOPNOTSUPP; 224 goto unpin_all; 225 } 226 227 err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); 228 if (err != 1) 229 goto unpin_all; 230 231 if (PageHighMem(page)) { 232 err = -EOPNOTSUPP; 233 unpin_user_page(page); 234 goto unpin_all; 235 } 236 237 *uptr_addr = page_address(page) + offset_in_page(start); 238 } 239 240 return 0; 241 242 unpin_all: 243 __bpf_obj_unpin_uptrs(rec, i, obj); 244 return err; 245 } 246 247 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 248 void *key, void *value, __u64 flags) 249 { 250 int err; 251 252 /* Need to create a kthread, thus must support schedule */ 253 if (bpf_map_is_offloaded(map)) { 254 return bpf_map_offload_update_elem(map, key, value, flags); 255 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 256 map->map_type == BPF_MAP_TYPE_ARENA || 257 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 258 return map->ops->map_update_elem(map, key, value, flags); 259 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 260 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 261 return sock_map_update_elem_sys(map, key, value, flags); 262 } else if (IS_FD_PROG_ARRAY(map)) { 263 return bpf_fd_array_map_update_elem(map, map_file, key, value, 264 flags); 265 } 266 267 bpf_disable_instrumentation(); 268 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 269 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 270 err = bpf_percpu_hash_update(map, key, value, flags); 271 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 272 err = bpf_percpu_array_update(map, key, value, flags); 273 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 274 err = bpf_percpu_cgroup_storage_update(map, key, value, 275 flags); 276 } else if (IS_FD_ARRAY(map)) { 277 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 278 flags); 279 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 280 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 281 flags); 282 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 283 /* rcu_read_lock() is not needed */ 284 err = bpf_fd_reuseport_array_update_elem(map, key, value, 285 flags); 286 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 287 map->map_type == BPF_MAP_TYPE_STACK || 288 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 289 err = map->ops->map_push_elem(map, value, flags); 290 } else { 291 err = bpf_obj_pin_uptrs(map->record, value); 292 if (!err) { 293 rcu_read_lock(); 294 err = map->ops->map_update_elem(map, key, value, flags); 295 rcu_read_unlock(); 296 if (err) 297 bpf_obj_unpin_uptrs(map->record, value); 298 } 299 } 300 bpf_enable_instrumentation(); 301 302 return err; 303 } 304 305 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 306 __u64 flags) 307 { 308 void *ptr; 309 int err; 310 311 if (bpf_map_is_offloaded(map)) 312 return bpf_map_offload_lookup_elem(map, key, value); 313 314 bpf_disable_instrumentation(); 315 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 316 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 317 err = bpf_percpu_hash_copy(map, key, value); 318 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 319 err = bpf_percpu_array_copy(map, key, value); 320 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 321 err = bpf_percpu_cgroup_storage_copy(map, key, value); 322 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 323 err = bpf_stackmap_extract(map, key, value, false); 324 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 325 err = bpf_fd_array_map_lookup_elem(map, key, value); 326 } else if (IS_FD_HASH(map)) { 327 err = bpf_fd_htab_map_lookup_elem(map, key, value); 328 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 329 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 330 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 331 map->map_type == BPF_MAP_TYPE_STACK || 332 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 333 err = map->ops->map_peek_elem(map, value); 334 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 335 /* struct_ops map requires directly updating "value" */ 336 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 337 } else { 338 rcu_read_lock(); 339 if (map->ops->map_lookup_elem_sys_only) 340 ptr = map->ops->map_lookup_elem_sys_only(map, key); 341 else 342 ptr = map->ops->map_lookup_elem(map, key); 343 if (IS_ERR(ptr)) { 344 err = PTR_ERR(ptr); 345 } else if (!ptr) { 346 err = -ENOENT; 347 } else { 348 err = 0; 349 if (flags & BPF_F_LOCK) 350 /* lock 'ptr' and copy everything but lock */ 351 copy_map_value_locked(map, value, ptr, true); 352 else 353 copy_map_value(map, value, ptr); 354 /* mask lock and timer, since value wasn't zero inited */ 355 check_and_init_map_value(map, value); 356 } 357 rcu_read_unlock(); 358 } 359 360 bpf_enable_instrumentation(); 361 362 return err; 363 } 364 365 /* Please, do not use this function outside from the map creation path 366 * (e.g. in map update path) without taking care of setting the active 367 * memory cgroup (see at bpf_map_kmalloc_node() for example). 368 */ 369 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 370 { 371 /* We really just want to fail instead of triggering OOM killer 372 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 373 * which is used for lower order allocation requests. 374 * 375 * It has been observed that higher order allocation requests done by 376 * vmalloc with __GFP_NORETRY being set might fail due to not trying 377 * to reclaim memory from the page cache, thus we set 378 * __GFP_RETRY_MAYFAIL to avoid such situations. 379 */ 380 381 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 382 unsigned int flags = 0; 383 unsigned long align = 1; 384 void *area; 385 386 if (size >= SIZE_MAX) 387 return NULL; 388 389 /* kmalloc()'ed memory can't be mmap()'ed */ 390 if (mmapable) { 391 BUG_ON(!PAGE_ALIGNED(size)); 392 align = SHMLBA; 393 flags = VM_USERMAP; 394 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 395 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 396 numa_node); 397 if (area != NULL) 398 return area; 399 } 400 401 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 402 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 403 flags, numa_node, __builtin_return_address(0)); 404 } 405 406 void *bpf_map_area_alloc(u64 size, int numa_node) 407 { 408 return __bpf_map_area_alloc(size, numa_node, false); 409 } 410 411 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 412 { 413 return __bpf_map_area_alloc(size, numa_node, true); 414 } 415 416 void bpf_map_area_free(void *area) 417 { 418 kvfree(area); 419 } 420 421 static u32 bpf_map_flags_retain_permanent(u32 flags) 422 { 423 /* Some map creation flags are not tied to the map object but 424 * rather to the map fd instead, so they have no meaning upon 425 * map object inspection since multiple file descriptors with 426 * different (access) properties can exist here. Thus, given 427 * this has zero meaning for the map itself, lets clear these 428 * from here. 429 */ 430 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 431 } 432 433 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 434 { 435 map->map_type = attr->map_type; 436 map->key_size = attr->key_size; 437 map->value_size = attr->value_size; 438 map->max_entries = attr->max_entries; 439 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 440 map->numa_node = bpf_map_attr_numa_node(attr); 441 map->map_extra = attr->map_extra; 442 } 443 444 static int bpf_map_alloc_id(struct bpf_map *map) 445 { 446 int id; 447 448 idr_preload(GFP_KERNEL); 449 spin_lock_bh(&map_idr_lock); 450 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 451 if (id > 0) 452 map->id = id; 453 spin_unlock_bh(&map_idr_lock); 454 idr_preload_end(); 455 456 if (WARN_ON_ONCE(!id)) 457 return -ENOSPC; 458 459 return id > 0 ? 0 : id; 460 } 461 462 void bpf_map_free_id(struct bpf_map *map) 463 { 464 unsigned long flags; 465 466 /* Offloaded maps are removed from the IDR store when their device 467 * disappears - even if someone holds an fd to them they are unusable, 468 * the memory is gone, all ops will fail; they are simply waiting for 469 * refcnt to drop to be freed. 470 */ 471 if (!map->id) 472 return; 473 474 spin_lock_irqsave(&map_idr_lock, flags); 475 476 idr_remove(&map_idr, map->id); 477 map->id = 0; 478 479 spin_unlock_irqrestore(&map_idr_lock, flags); 480 } 481 482 #ifdef CONFIG_MEMCG 483 static void bpf_map_save_memcg(struct bpf_map *map) 484 { 485 /* Currently if a map is created by a process belonging to the root 486 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 487 * So we have to check map->objcg for being NULL each time it's 488 * being used. 489 */ 490 if (memcg_bpf_enabled()) 491 map->objcg = get_obj_cgroup_from_current(); 492 } 493 494 static void bpf_map_release_memcg(struct bpf_map *map) 495 { 496 if (map->objcg) 497 obj_cgroup_put(map->objcg); 498 } 499 500 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 501 { 502 if (map->objcg) 503 return get_mem_cgroup_from_objcg(map->objcg); 504 505 return root_mem_cgroup; 506 } 507 508 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 509 int node) 510 { 511 struct mem_cgroup *memcg, *old_memcg; 512 void *ptr; 513 514 memcg = bpf_map_get_memcg(map); 515 old_memcg = set_active_memcg(memcg); 516 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 517 set_active_memcg(old_memcg); 518 mem_cgroup_put(memcg); 519 520 return ptr; 521 } 522 523 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 524 int node) 525 { 526 struct mem_cgroup *memcg, *old_memcg; 527 void *ptr; 528 529 memcg = bpf_map_get_memcg(map); 530 old_memcg = set_active_memcg(memcg); 531 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 532 set_active_memcg(old_memcg); 533 mem_cgroup_put(memcg); 534 535 return ptr; 536 } 537 538 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 539 { 540 struct mem_cgroup *memcg, *old_memcg; 541 void *ptr; 542 543 memcg = bpf_map_get_memcg(map); 544 old_memcg = set_active_memcg(memcg); 545 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 546 set_active_memcg(old_memcg); 547 mem_cgroup_put(memcg); 548 549 return ptr; 550 } 551 552 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 553 gfp_t flags) 554 { 555 struct mem_cgroup *memcg, *old_memcg; 556 void *ptr; 557 558 memcg = bpf_map_get_memcg(map); 559 old_memcg = set_active_memcg(memcg); 560 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 561 set_active_memcg(old_memcg); 562 mem_cgroup_put(memcg); 563 564 return ptr; 565 } 566 567 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 568 size_t align, gfp_t flags) 569 { 570 struct mem_cgroup *memcg, *old_memcg; 571 void __percpu *ptr; 572 573 memcg = bpf_map_get_memcg(map); 574 old_memcg = set_active_memcg(memcg); 575 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 576 set_active_memcg(old_memcg); 577 mem_cgroup_put(memcg); 578 579 return ptr; 580 } 581 582 #else 583 static void bpf_map_save_memcg(struct bpf_map *map) 584 { 585 } 586 587 static void bpf_map_release_memcg(struct bpf_map *map) 588 { 589 } 590 #endif 591 592 static bool can_alloc_pages(void) 593 { 594 return preempt_count() == 0 && !irqs_disabled() && 595 !IS_ENABLED(CONFIG_PREEMPT_RT); 596 } 597 598 static struct page *__bpf_alloc_page(int nid) 599 { 600 if (!can_alloc_pages()) 601 return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); 602 603 return alloc_pages_node(nid, 604 GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT 605 | __GFP_NOWARN, 606 0); 607 } 608 609 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 610 unsigned long nr_pages, struct page **pages) 611 { 612 unsigned long i, j; 613 struct page *pg; 614 int ret = 0; 615 #ifdef CONFIG_MEMCG 616 struct mem_cgroup *memcg, *old_memcg; 617 618 memcg = bpf_map_get_memcg(map); 619 old_memcg = set_active_memcg(memcg); 620 #endif 621 for (i = 0; i < nr_pages; i++) { 622 pg = __bpf_alloc_page(nid); 623 624 if (pg) { 625 pages[i] = pg; 626 continue; 627 } 628 for (j = 0; j < i; j++) 629 free_pages_nolock(pages[j], 0); 630 ret = -ENOMEM; 631 break; 632 } 633 634 #ifdef CONFIG_MEMCG 635 set_active_memcg(old_memcg); 636 mem_cgroup_put(memcg); 637 #endif 638 return ret; 639 } 640 641 642 static int btf_field_cmp(const void *a, const void *b) 643 { 644 const struct btf_field *f1 = a, *f2 = b; 645 646 if (f1->offset < f2->offset) 647 return -1; 648 else if (f1->offset > f2->offset) 649 return 1; 650 return 0; 651 } 652 653 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 654 u32 field_mask) 655 { 656 struct btf_field *field; 657 658 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 659 return NULL; 660 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 661 if (!field || !(field->type & field_mask)) 662 return NULL; 663 return field; 664 } 665 666 void btf_record_free(struct btf_record *rec) 667 { 668 int i; 669 670 if (IS_ERR_OR_NULL(rec)) 671 return; 672 for (i = 0; i < rec->cnt; i++) { 673 switch (rec->fields[i].type) { 674 case BPF_KPTR_UNREF: 675 case BPF_KPTR_REF: 676 case BPF_KPTR_PERCPU: 677 case BPF_UPTR: 678 if (rec->fields[i].kptr.module) 679 module_put(rec->fields[i].kptr.module); 680 if (btf_is_kernel(rec->fields[i].kptr.btf)) 681 btf_put(rec->fields[i].kptr.btf); 682 break; 683 case BPF_LIST_HEAD: 684 case BPF_LIST_NODE: 685 case BPF_RB_ROOT: 686 case BPF_RB_NODE: 687 case BPF_SPIN_LOCK: 688 case BPF_RES_SPIN_LOCK: 689 case BPF_TIMER: 690 case BPF_REFCOUNT: 691 case BPF_WORKQUEUE: 692 case BPF_TASK_WORK: 693 /* Nothing to release */ 694 break; 695 default: 696 WARN_ON_ONCE(1); 697 continue; 698 } 699 } 700 kfree(rec); 701 } 702 703 void bpf_map_free_record(struct bpf_map *map) 704 { 705 btf_record_free(map->record); 706 map->record = NULL; 707 } 708 709 struct btf_record *btf_record_dup(const struct btf_record *rec) 710 { 711 const struct btf_field *fields; 712 struct btf_record *new_rec; 713 int ret, size, i; 714 715 if (IS_ERR_OR_NULL(rec)) 716 return NULL; 717 size = struct_size(rec, fields, rec->cnt); 718 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 719 if (!new_rec) 720 return ERR_PTR(-ENOMEM); 721 /* Do a deep copy of the btf_record */ 722 fields = rec->fields; 723 new_rec->cnt = 0; 724 for (i = 0; i < rec->cnt; i++) { 725 switch (fields[i].type) { 726 case BPF_KPTR_UNREF: 727 case BPF_KPTR_REF: 728 case BPF_KPTR_PERCPU: 729 case BPF_UPTR: 730 if (btf_is_kernel(fields[i].kptr.btf)) 731 btf_get(fields[i].kptr.btf); 732 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 733 ret = -ENXIO; 734 goto free; 735 } 736 break; 737 case BPF_LIST_HEAD: 738 case BPF_LIST_NODE: 739 case BPF_RB_ROOT: 740 case BPF_RB_NODE: 741 case BPF_SPIN_LOCK: 742 case BPF_RES_SPIN_LOCK: 743 case BPF_TIMER: 744 case BPF_REFCOUNT: 745 case BPF_WORKQUEUE: 746 case BPF_TASK_WORK: 747 /* Nothing to acquire */ 748 break; 749 default: 750 ret = -EFAULT; 751 WARN_ON_ONCE(1); 752 goto free; 753 } 754 new_rec->cnt++; 755 } 756 return new_rec; 757 free: 758 btf_record_free(new_rec); 759 return ERR_PTR(ret); 760 } 761 762 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 763 { 764 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 765 int size; 766 767 if (!a_has_fields && !b_has_fields) 768 return true; 769 if (a_has_fields != b_has_fields) 770 return false; 771 if (rec_a->cnt != rec_b->cnt) 772 return false; 773 size = struct_size(rec_a, fields, rec_a->cnt); 774 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 775 * members are zeroed out. So memcmp is safe to do without worrying 776 * about padding/unused fields. 777 * 778 * While spin_lock, timer, and kptr have no relation to map BTF, 779 * list_head metadata is specific to map BTF, the btf and value_rec 780 * members in particular. btf is the map BTF, while value_rec points to 781 * btf_record in that map BTF. 782 * 783 * So while by default, we don't rely on the map BTF (which the records 784 * were parsed from) matching for both records, which is not backwards 785 * compatible, in case list_head is part of it, we implicitly rely on 786 * that by way of depending on memcmp succeeding for it. 787 */ 788 return !memcmp(rec_a, rec_b, size); 789 } 790 791 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 792 { 793 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 794 return; 795 bpf_timer_cancel_and_free(obj + rec->timer_off); 796 } 797 798 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) 799 { 800 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) 801 return; 802 bpf_wq_cancel_and_free(obj + rec->wq_off); 803 } 804 805 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) 806 { 807 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) 808 return; 809 bpf_task_work_cancel_and_free(obj + rec->task_work_off); 810 } 811 812 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 813 { 814 const struct btf_field *fields; 815 int i; 816 817 if (IS_ERR_OR_NULL(rec)) 818 return; 819 fields = rec->fields; 820 for (i = 0; i < rec->cnt; i++) { 821 struct btf_struct_meta *pointee_struct_meta; 822 const struct btf_field *field = &fields[i]; 823 void *field_ptr = obj + field->offset; 824 void *xchgd_field; 825 826 switch (fields[i].type) { 827 case BPF_SPIN_LOCK: 828 case BPF_RES_SPIN_LOCK: 829 break; 830 case BPF_TIMER: 831 bpf_timer_cancel_and_free(field_ptr); 832 break; 833 case BPF_WORKQUEUE: 834 bpf_wq_cancel_and_free(field_ptr); 835 break; 836 case BPF_TASK_WORK: 837 bpf_task_work_cancel_and_free(field_ptr); 838 break; 839 case BPF_KPTR_UNREF: 840 WRITE_ONCE(*(u64 *)field_ptr, 0); 841 break; 842 case BPF_KPTR_REF: 843 case BPF_KPTR_PERCPU: 844 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 845 if (!xchgd_field) 846 break; 847 848 if (!btf_is_kernel(field->kptr.btf)) { 849 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 850 field->kptr.btf_id); 851 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 852 pointee_struct_meta->record : NULL, 853 fields[i].type == BPF_KPTR_PERCPU); 854 } else { 855 field->kptr.dtor(xchgd_field); 856 } 857 break; 858 case BPF_UPTR: 859 /* The caller ensured that no one is using the uptr */ 860 unpin_uptr_kaddr(*(void **)field_ptr); 861 break; 862 case BPF_LIST_HEAD: 863 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 864 continue; 865 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 866 break; 867 case BPF_RB_ROOT: 868 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 869 continue; 870 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 871 break; 872 case BPF_LIST_NODE: 873 case BPF_RB_NODE: 874 case BPF_REFCOUNT: 875 break; 876 default: 877 WARN_ON_ONCE(1); 878 continue; 879 } 880 } 881 } 882 883 static void bpf_map_free(struct bpf_map *map) 884 { 885 struct btf_record *rec = map->record; 886 struct btf *btf = map->btf; 887 888 /* implementation dependent freeing. Disabling migration to simplify 889 * the free of values or special fields allocated from bpf memory 890 * allocator. 891 */ 892 kfree(map->excl_prog_sha); 893 migrate_disable(); 894 map->ops->map_free(map); 895 migrate_enable(); 896 897 /* Delay freeing of btf_record for maps, as map_free 898 * callback usually needs access to them. It is better to do it here 899 * than require each callback to do the free itself manually. 900 * 901 * Note that the btf_record stashed in map->inner_map_meta->record was 902 * already freed using the map_free callback for map in map case which 903 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 904 * template bpf_map struct used during verification. 905 */ 906 btf_record_free(rec); 907 /* Delay freeing of btf for maps, as map_free callback may need 908 * struct_meta info which will be freed with btf_put(). 909 */ 910 btf_put(btf); 911 } 912 913 /* called from workqueue */ 914 static void bpf_map_free_deferred(struct work_struct *work) 915 { 916 struct bpf_map *map = container_of(work, struct bpf_map, work); 917 918 security_bpf_map_free(map); 919 bpf_map_release_memcg(map); 920 bpf_map_owner_free(map); 921 bpf_map_free(map); 922 } 923 924 static void bpf_map_put_uref(struct bpf_map *map) 925 { 926 if (atomic64_dec_and_test(&map->usercnt)) { 927 if (map->ops->map_release_uref) 928 map->ops->map_release_uref(map); 929 } 930 } 931 932 static void bpf_map_free_in_work(struct bpf_map *map) 933 { 934 INIT_WORK(&map->work, bpf_map_free_deferred); 935 /* Avoid spawning kworkers, since they all might contend 936 * for the same mutex like slab_mutex. 937 */ 938 queue_work(system_dfl_wq, &map->work); 939 } 940 941 static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 942 { 943 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 944 } 945 946 static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 947 { 948 if (rcu_trace_implies_rcu_gp()) 949 bpf_map_free_rcu_gp(rcu); 950 else 951 call_rcu(rcu, bpf_map_free_rcu_gp); 952 } 953 954 /* decrement map refcnt and schedule it for freeing via workqueue 955 * (underlying map implementation ops->map_free() might sleep) 956 */ 957 void bpf_map_put(struct bpf_map *map) 958 { 959 if (atomic64_dec_and_test(&map->refcnt)) { 960 /* bpf_map_free_id() must be called first */ 961 bpf_map_free_id(map); 962 963 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 964 if (READ_ONCE(map->free_after_mult_rcu_gp)) 965 call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 966 else if (READ_ONCE(map->free_after_rcu_gp)) 967 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 968 else 969 bpf_map_free_in_work(map); 970 } 971 } 972 EXPORT_SYMBOL_GPL(bpf_map_put); 973 974 void bpf_map_put_with_uref(struct bpf_map *map) 975 { 976 bpf_map_put_uref(map); 977 bpf_map_put(map); 978 } 979 980 static int bpf_map_release(struct inode *inode, struct file *filp) 981 { 982 struct bpf_map *map = filp->private_data; 983 984 if (map->ops->map_release) 985 map->ops->map_release(map, filp); 986 987 bpf_map_put_with_uref(map); 988 return 0; 989 } 990 991 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 992 { 993 fmode_t mode = fd_file(f)->f_mode; 994 995 /* Our file permissions may have been overridden by global 996 * map permissions facing syscall side. 997 */ 998 if (READ_ONCE(map->frozen)) 999 mode &= ~FMODE_CAN_WRITE; 1000 return mode; 1001 } 1002 1003 #ifdef CONFIG_PROC_FS 1004 /* Show the memory usage of a bpf map */ 1005 static u64 bpf_map_memory_usage(const struct bpf_map *map) 1006 { 1007 return map->ops->map_mem_usage(map); 1008 } 1009 1010 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 1011 { 1012 struct bpf_map *map = filp->private_data; 1013 u32 type = 0, jited = 0; 1014 1015 spin_lock(&map->owner_lock); 1016 if (map->owner) { 1017 type = map->owner->type; 1018 jited = map->owner->jited; 1019 } 1020 spin_unlock(&map->owner_lock); 1021 1022 seq_printf(m, 1023 "map_type:\t%u\n" 1024 "key_size:\t%u\n" 1025 "value_size:\t%u\n" 1026 "max_entries:\t%u\n" 1027 "map_flags:\t%#x\n" 1028 "map_extra:\t%#llx\n" 1029 "memlock:\t%llu\n" 1030 "map_id:\t%u\n" 1031 "frozen:\t%u\n", 1032 map->map_type, 1033 map->key_size, 1034 map->value_size, 1035 map->max_entries, 1036 map->map_flags, 1037 (unsigned long long)map->map_extra, 1038 bpf_map_memory_usage(map), 1039 map->id, 1040 READ_ONCE(map->frozen)); 1041 if (type) { 1042 seq_printf(m, "owner_prog_type:\t%u\n", type); 1043 seq_printf(m, "owner_jited:\t%u\n", jited); 1044 } 1045 } 1046 #endif 1047 1048 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 1049 loff_t *ppos) 1050 { 1051 /* We need this handler such that alloc_file() enables 1052 * f_mode with FMODE_CAN_READ. 1053 */ 1054 return -EINVAL; 1055 } 1056 1057 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 1058 size_t siz, loff_t *ppos) 1059 { 1060 /* We need this handler such that alloc_file() enables 1061 * f_mode with FMODE_CAN_WRITE. 1062 */ 1063 return -EINVAL; 1064 } 1065 1066 /* called for any extra memory-mapped regions (except initial) */ 1067 static void bpf_map_mmap_open(struct vm_area_struct *vma) 1068 { 1069 struct bpf_map *map = vma->vm_file->private_data; 1070 1071 if (vma->vm_flags & VM_MAYWRITE) 1072 bpf_map_write_active_inc(map); 1073 } 1074 1075 /* called for all unmapped memory region (including initial) */ 1076 static void bpf_map_mmap_close(struct vm_area_struct *vma) 1077 { 1078 struct bpf_map *map = vma->vm_file->private_data; 1079 1080 if (vma->vm_flags & VM_MAYWRITE) 1081 bpf_map_write_active_dec(map); 1082 } 1083 1084 static const struct vm_operations_struct bpf_map_default_vmops = { 1085 .open = bpf_map_mmap_open, 1086 .close = bpf_map_mmap_close, 1087 }; 1088 1089 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 1090 { 1091 struct bpf_map *map = filp->private_data; 1092 int err = 0; 1093 1094 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 1095 return -ENOTSUPP; 1096 1097 if (!(vma->vm_flags & VM_SHARED)) 1098 return -EINVAL; 1099 1100 mutex_lock(&map->freeze_mutex); 1101 1102 if (vma->vm_flags & VM_WRITE) { 1103 if (map->frozen) { 1104 err = -EPERM; 1105 goto out; 1106 } 1107 /* map is meant to be read-only, so do not allow mapping as 1108 * writable, because it's possible to leak a writable page 1109 * reference and allows user-space to still modify it after 1110 * freezing, while verifier will assume contents do not change 1111 */ 1112 if (map->map_flags & BPF_F_RDONLY_PROG) { 1113 err = -EACCES; 1114 goto out; 1115 } 1116 bpf_map_write_active_inc(map); 1117 } 1118 out: 1119 mutex_unlock(&map->freeze_mutex); 1120 if (err) 1121 return err; 1122 1123 /* set default open/close callbacks */ 1124 vma->vm_ops = &bpf_map_default_vmops; 1125 vma->vm_private_data = map; 1126 vm_flags_clear(vma, VM_MAYEXEC); 1127 /* If mapping is read-only, then disallow potentially re-mapping with 1128 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing 1129 * means that as far as BPF map's memory-mapped VMAs are concerned, 1130 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, 1131 * both should be set, so we can forget about VM_MAYWRITE and always 1132 * check just VM_WRITE 1133 */ 1134 if (!(vma->vm_flags & VM_WRITE)) 1135 vm_flags_clear(vma, VM_MAYWRITE); 1136 1137 err = map->ops->map_mmap(map, vma); 1138 if (err) { 1139 if (vma->vm_flags & VM_WRITE) 1140 bpf_map_write_active_dec(map); 1141 } 1142 1143 return err; 1144 } 1145 1146 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 1147 { 1148 struct bpf_map *map = filp->private_data; 1149 1150 if (map->ops->map_poll) 1151 return map->ops->map_poll(map, filp, pts); 1152 1153 return EPOLLERR; 1154 } 1155 1156 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, 1157 unsigned long len, unsigned long pgoff, 1158 unsigned long flags) 1159 { 1160 struct bpf_map *map = filp->private_data; 1161 1162 if (map->ops->map_get_unmapped_area) 1163 return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); 1164 #ifdef CONFIG_MMU 1165 return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags); 1166 #else 1167 return addr; 1168 #endif 1169 } 1170 1171 const struct file_operations bpf_map_fops = { 1172 #ifdef CONFIG_PROC_FS 1173 .show_fdinfo = bpf_map_show_fdinfo, 1174 #endif 1175 .release = bpf_map_release, 1176 .read = bpf_dummy_read, 1177 .write = bpf_dummy_write, 1178 .mmap = bpf_map_mmap, 1179 .poll = bpf_map_poll, 1180 .get_unmapped_area = bpf_get_unmapped_area, 1181 }; 1182 1183 int bpf_map_new_fd(struct bpf_map *map, int flags) 1184 { 1185 int ret; 1186 1187 ret = security_bpf_map(map, OPEN_FMODE(flags)); 1188 if (ret < 0) 1189 return ret; 1190 1191 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 1192 flags | O_CLOEXEC); 1193 } 1194 1195 int bpf_get_file_flag(int flags) 1196 { 1197 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 1198 return -EINVAL; 1199 if (flags & BPF_F_RDONLY) 1200 return O_RDONLY; 1201 if (flags & BPF_F_WRONLY) 1202 return O_WRONLY; 1203 return O_RDWR; 1204 } 1205 1206 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 1207 #define CHECK_ATTR(CMD) \ 1208 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 1209 sizeof(attr->CMD##_LAST_FIELD), 0, \ 1210 sizeof(*attr) - \ 1211 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 1212 sizeof(attr->CMD##_LAST_FIELD)) != NULL 1213 1214 /* dst and src must have at least "size" number of bytes. 1215 * Return strlen on success and < 0 on error. 1216 */ 1217 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 1218 { 1219 const char *end = src + size; 1220 const char *orig_src = src; 1221 1222 memset(dst, 0, size); 1223 /* Copy all isalnum(), '_' and '.' chars. */ 1224 while (src < end && *src) { 1225 if (!isalnum(*src) && 1226 *src != '_' && *src != '.') 1227 return -EINVAL; 1228 *dst++ = *src++; 1229 } 1230 1231 /* No '\0' found in "size" number of bytes */ 1232 if (src == end) 1233 return -EINVAL; 1234 1235 return src - orig_src; 1236 } 1237 1238 int map_check_no_btf(const struct bpf_map *map, 1239 const struct btf *btf, 1240 const struct btf_type *key_type, 1241 const struct btf_type *value_type) 1242 { 1243 return -ENOTSUPP; 1244 } 1245 1246 static int map_check_btf(struct bpf_map *map, struct bpf_token *token, 1247 const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 1248 { 1249 const struct btf_type *key_type, *value_type; 1250 u32 key_size, value_size; 1251 int ret = 0; 1252 1253 /* Some maps allow key to be unspecified. */ 1254 if (btf_key_id) { 1255 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1256 if (!key_type || key_size != map->key_size) 1257 return -EINVAL; 1258 } else { 1259 key_type = btf_type_by_id(btf, 0); 1260 if (!map->ops->map_check_btf) 1261 return -EINVAL; 1262 } 1263 1264 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1265 if (!value_type || value_size != map->value_size) 1266 return -EINVAL; 1267 1268 map->record = btf_parse_fields(btf, value_type, 1269 BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1270 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | 1271 BPF_TASK_WORK, 1272 map->value_size); 1273 if (!IS_ERR_OR_NULL(map->record)) { 1274 int i; 1275 1276 if (!bpf_token_capable(token, CAP_BPF)) { 1277 ret = -EPERM; 1278 goto free_map_tab; 1279 } 1280 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1281 ret = -EACCES; 1282 goto free_map_tab; 1283 } 1284 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1285 switch (map->record->field_mask & (1 << i)) { 1286 case 0: 1287 continue; 1288 case BPF_SPIN_LOCK: 1289 case BPF_RES_SPIN_LOCK: 1290 if (map->map_type != BPF_MAP_TYPE_HASH && 1291 map->map_type != BPF_MAP_TYPE_ARRAY && 1292 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1293 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1294 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1295 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1296 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1297 ret = -EOPNOTSUPP; 1298 goto free_map_tab; 1299 } 1300 break; 1301 case BPF_TIMER: 1302 case BPF_WORKQUEUE: 1303 case BPF_TASK_WORK: 1304 if (map->map_type != BPF_MAP_TYPE_HASH && 1305 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1306 map->map_type != BPF_MAP_TYPE_ARRAY) { 1307 ret = -EOPNOTSUPP; 1308 goto free_map_tab; 1309 } 1310 break; 1311 case BPF_KPTR_UNREF: 1312 case BPF_KPTR_REF: 1313 case BPF_KPTR_PERCPU: 1314 case BPF_REFCOUNT: 1315 if (map->map_type != BPF_MAP_TYPE_HASH && 1316 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1317 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1318 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1319 map->map_type != BPF_MAP_TYPE_ARRAY && 1320 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1321 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1322 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1323 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1324 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1325 ret = -EOPNOTSUPP; 1326 goto free_map_tab; 1327 } 1328 break; 1329 case BPF_UPTR: 1330 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { 1331 ret = -EOPNOTSUPP; 1332 goto free_map_tab; 1333 } 1334 break; 1335 case BPF_LIST_HEAD: 1336 case BPF_RB_ROOT: 1337 if (map->map_type != BPF_MAP_TYPE_HASH && 1338 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1339 map->map_type != BPF_MAP_TYPE_ARRAY) { 1340 ret = -EOPNOTSUPP; 1341 goto free_map_tab; 1342 } 1343 break; 1344 default: 1345 /* Fail if map_type checks are missing for a field type */ 1346 ret = -EOPNOTSUPP; 1347 goto free_map_tab; 1348 } 1349 } 1350 } 1351 1352 ret = btf_check_and_fixup_fields(btf, map->record); 1353 if (ret < 0) 1354 goto free_map_tab; 1355 1356 if (map->ops->map_check_btf) { 1357 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1358 if (ret < 0) 1359 goto free_map_tab; 1360 } 1361 1362 return ret; 1363 free_map_tab: 1364 bpf_map_free_record(map); 1365 return ret; 1366 } 1367 1368 static bool bpf_net_capable(void) 1369 { 1370 return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); 1371 } 1372 1373 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size 1374 /* called via syscall */ 1375 static int map_create(union bpf_attr *attr, bpfptr_t uattr) 1376 { 1377 const struct bpf_map_ops *ops; 1378 struct bpf_token *token = NULL; 1379 int numa_node = bpf_map_attr_numa_node(attr); 1380 u32 map_type = attr->map_type; 1381 struct bpf_map *map; 1382 bool token_flag; 1383 int f_flags; 1384 int err; 1385 1386 err = CHECK_ATTR(BPF_MAP_CREATE); 1387 if (err) 1388 return -EINVAL; 1389 1390 /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it 1391 * to avoid per-map type checks tripping on unknown flag 1392 */ 1393 token_flag = attr->map_flags & BPF_F_TOKEN_FD; 1394 attr->map_flags &= ~BPF_F_TOKEN_FD; 1395 1396 if (attr->btf_vmlinux_value_type_id) { 1397 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1398 attr->btf_key_type_id || attr->btf_value_type_id) 1399 return -EINVAL; 1400 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1401 return -EINVAL; 1402 } 1403 1404 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1405 attr->map_type != BPF_MAP_TYPE_ARENA && 1406 attr->map_extra != 0) 1407 return -EINVAL; 1408 1409 f_flags = bpf_get_file_flag(attr->map_flags); 1410 if (f_flags < 0) 1411 return f_flags; 1412 1413 if (numa_node != NUMA_NO_NODE && 1414 ((unsigned int)numa_node >= nr_node_ids || 1415 !node_online(numa_node))) 1416 return -EINVAL; 1417 1418 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1419 map_type = attr->map_type; 1420 if (map_type >= ARRAY_SIZE(bpf_map_types)) 1421 return -EINVAL; 1422 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1423 ops = bpf_map_types[map_type]; 1424 if (!ops) 1425 return -EINVAL; 1426 1427 if (ops->map_alloc_check) { 1428 err = ops->map_alloc_check(attr); 1429 if (err) 1430 return err; 1431 } 1432 if (attr->map_ifindex) 1433 ops = &bpf_map_offload_ops; 1434 if (!ops->map_mem_usage) 1435 return -EINVAL; 1436 1437 if (token_flag) { 1438 token = bpf_token_get_from_fd(attr->map_token_fd); 1439 if (IS_ERR(token)) 1440 return PTR_ERR(token); 1441 1442 /* if current token doesn't grant map creation permissions, 1443 * then we can't use this token, so ignore it and rely on 1444 * system-wide capabilities checks 1445 */ 1446 if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || 1447 !bpf_token_allow_map_type(token, attr->map_type)) { 1448 bpf_token_put(token); 1449 token = NULL; 1450 } 1451 } 1452 1453 err = -EPERM; 1454 1455 /* Intent here is for unprivileged_bpf_disabled to block BPF map 1456 * creation for unprivileged users; other actions depend 1457 * on fd availability and access to bpffs, so are dependent on 1458 * object creation success. Even with unprivileged BPF disabled, 1459 * capability checks are still carried out. 1460 */ 1461 if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) 1462 goto put_token; 1463 1464 /* check privileged map type permissions */ 1465 switch (map_type) { 1466 case BPF_MAP_TYPE_ARRAY: 1467 case BPF_MAP_TYPE_PERCPU_ARRAY: 1468 case BPF_MAP_TYPE_PROG_ARRAY: 1469 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1470 case BPF_MAP_TYPE_CGROUP_ARRAY: 1471 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1472 case BPF_MAP_TYPE_HASH: 1473 case BPF_MAP_TYPE_PERCPU_HASH: 1474 case BPF_MAP_TYPE_HASH_OF_MAPS: 1475 case BPF_MAP_TYPE_RINGBUF: 1476 case BPF_MAP_TYPE_USER_RINGBUF: 1477 case BPF_MAP_TYPE_CGROUP_STORAGE: 1478 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1479 /* unprivileged */ 1480 break; 1481 case BPF_MAP_TYPE_SK_STORAGE: 1482 case BPF_MAP_TYPE_INODE_STORAGE: 1483 case BPF_MAP_TYPE_TASK_STORAGE: 1484 case BPF_MAP_TYPE_CGRP_STORAGE: 1485 case BPF_MAP_TYPE_BLOOM_FILTER: 1486 case BPF_MAP_TYPE_LPM_TRIE: 1487 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1488 case BPF_MAP_TYPE_STACK_TRACE: 1489 case BPF_MAP_TYPE_QUEUE: 1490 case BPF_MAP_TYPE_STACK: 1491 case BPF_MAP_TYPE_LRU_HASH: 1492 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1493 case BPF_MAP_TYPE_STRUCT_OPS: 1494 case BPF_MAP_TYPE_CPUMAP: 1495 case BPF_MAP_TYPE_ARENA: 1496 if (!bpf_token_capable(token, CAP_BPF)) 1497 goto put_token; 1498 break; 1499 case BPF_MAP_TYPE_SOCKMAP: 1500 case BPF_MAP_TYPE_SOCKHASH: 1501 case BPF_MAP_TYPE_DEVMAP: 1502 case BPF_MAP_TYPE_DEVMAP_HASH: 1503 case BPF_MAP_TYPE_XSKMAP: 1504 if (!bpf_token_capable(token, CAP_NET_ADMIN)) 1505 goto put_token; 1506 break; 1507 default: 1508 WARN(1, "unsupported map type %d", map_type); 1509 goto put_token; 1510 } 1511 1512 map = ops->map_alloc(attr); 1513 if (IS_ERR(map)) { 1514 err = PTR_ERR(map); 1515 goto put_token; 1516 } 1517 map->ops = ops; 1518 map->map_type = map_type; 1519 1520 err = bpf_obj_name_cpy(map->name, attr->map_name, 1521 sizeof(attr->map_name)); 1522 if (err < 0) 1523 goto free_map; 1524 1525 preempt_disable(); 1526 map->cookie = gen_cookie_next(&bpf_map_cookie); 1527 preempt_enable(); 1528 1529 atomic64_set(&map->refcnt, 1); 1530 atomic64_set(&map->usercnt, 1); 1531 mutex_init(&map->freeze_mutex); 1532 spin_lock_init(&map->owner_lock); 1533 1534 if (attr->btf_key_type_id || attr->btf_value_type_id || 1535 /* Even the map's value is a kernel's struct, 1536 * the bpf_prog.o must have BTF to begin with 1537 * to figure out the corresponding kernel's 1538 * counter part. Thus, attr->btf_fd has 1539 * to be valid also. 1540 */ 1541 attr->btf_vmlinux_value_type_id) { 1542 struct btf *btf; 1543 1544 btf = btf_get_by_fd(attr->btf_fd); 1545 if (IS_ERR(btf)) { 1546 err = PTR_ERR(btf); 1547 goto free_map; 1548 } 1549 if (btf_is_kernel(btf)) { 1550 btf_put(btf); 1551 err = -EACCES; 1552 goto free_map; 1553 } 1554 map->btf = btf; 1555 1556 if (attr->btf_value_type_id) { 1557 err = map_check_btf(map, token, btf, attr->btf_key_type_id, 1558 attr->btf_value_type_id); 1559 if (err) 1560 goto free_map; 1561 } 1562 1563 map->btf_key_type_id = attr->btf_key_type_id; 1564 map->btf_value_type_id = attr->btf_value_type_id; 1565 map->btf_vmlinux_value_type_id = 1566 attr->btf_vmlinux_value_type_id; 1567 } 1568 1569 if (attr->excl_prog_hash) { 1570 bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); 1571 1572 if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { 1573 err = -EINVAL; 1574 goto free_map; 1575 } 1576 1577 map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 1578 if (!map->excl_prog_sha) { 1579 err = -ENOMEM; 1580 goto free_map; 1581 } 1582 1583 if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { 1584 err = -EFAULT; 1585 goto free_map; 1586 } 1587 } else if (attr->excl_prog_hash_size) { 1588 return -EINVAL; 1589 } 1590 1591 err = security_bpf_map_create(map, attr, token, uattr.is_kernel); 1592 if (err) 1593 goto free_map_sec; 1594 1595 err = bpf_map_alloc_id(map); 1596 if (err) 1597 goto free_map_sec; 1598 1599 bpf_map_save_memcg(map); 1600 bpf_token_put(token); 1601 1602 err = bpf_map_new_fd(map, f_flags); 1603 if (err < 0) { 1604 /* failed to allocate fd. 1605 * bpf_map_put_with_uref() is needed because the above 1606 * bpf_map_alloc_id() has published the map 1607 * to the userspace and the userspace may 1608 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1609 */ 1610 bpf_map_put_with_uref(map); 1611 return err; 1612 } 1613 1614 return err; 1615 1616 free_map_sec: 1617 security_bpf_map_free(map); 1618 free_map: 1619 bpf_map_free(map); 1620 put_token: 1621 bpf_token_put(token); 1622 return err; 1623 } 1624 1625 void bpf_map_inc(struct bpf_map *map) 1626 { 1627 atomic64_inc(&map->refcnt); 1628 } 1629 EXPORT_SYMBOL_GPL(bpf_map_inc); 1630 1631 void bpf_map_inc_with_uref(struct bpf_map *map) 1632 { 1633 atomic64_inc(&map->refcnt); 1634 atomic64_inc(&map->usercnt); 1635 } 1636 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1637 1638 struct bpf_map *bpf_map_get(u32 ufd) 1639 { 1640 CLASS(fd, f)(ufd); 1641 struct bpf_map *map = __bpf_map_get(f); 1642 1643 if (!IS_ERR(map)) 1644 bpf_map_inc(map); 1645 1646 return map; 1647 } 1648 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); 1649 1650 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1651 { 1652 CLASS(fd, f)(ufd); 1653 struct bpf_map *map = __bpf_map_get(f); 1654 1655 if (!IS_ERR(map)) 1656 bpf_map_inc_with_uref(map); 1657 1658 return map; 1659 } 1660 1661 /* map_idr_lock should have been held or the map should have been 1662 * protected by rcu read lock. 1663 */ 1664 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1665 { 1666 int refold; 1667 1668 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1669 if (!refold) 1670 return ERR_PTR(-ENOENT); 1671 if (uref) 1672 atomic64_inc(&map->usercnt); 1673 1674 return map; 1675 } 1676 1677 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1678 { 1679 lockdep_assert(rcu_read_lock_held()); 1680 return __bpf_map_inc_not_zero(map, false); 1681 } 1682 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1683 1684 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 1685 bool delete) 1686 { 1687 return -ENOTSUPP; 1688 } 1689 1690 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1691 { 1692 if (key_size) 1693 return vmemdup_user(ukey, key_size); 1694 1695 if (ukey) 1696 return ERR_PTR(-EINVAL); 1697 1698 return NULL; 1699 } 1700 1701 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1702 { 1703 if (key_size) 1704 return kvmemdup_bpfptr(ukey, key_size); 1705 1706 if (!bpfptr_is_null(ukey)) 1707 return ERR_PTR(-EINVAL); 1708 1709 return NULL; 1710 } 1711 1712 /* last field in 'union bpf_attr' used by this command */ 1713 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1714 1715 static int map_lookup_elem(union bpf_attr *attr) 1716 { 1717 void __user *ukey = u64_to_user_ptr(attr->key); 1718 void __user *uvalue = u64_to_user_ptr(attr->value); 1719 struct bpf_map *map; 1720 void *key, *value; 1721 u32 value_size; 1722 int err; 1723 1724 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1725 return -EINVAL; 1726 1727 if (attr->flags & ~BPF_F_LOCK) 1728 return -EINVAL; 1729 1730 CLASS(fd, f)(attr->map_fd); 1731 map = __bpf_map_get(f); 1732 if (IS_ERR(map)) 1733 return PTR_ERR(map); 1734 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1735 return -EPERM; 1736 1737 if ((attr->flags & BPF_F_LOCK) && 1738 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 1739 return -EINVAL; 1740 1741 key = __bpf_copy_key(ukey, map->key_size); 1742 if (IS_ERR(key)) 1743 return PTR_ERR(key); 1744 1745 value_size = bpf_map_value_size(map); 1746 1747 err = -ENOMEM; 1748 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1749 if (!value) 1750 goto free_key; 1751 1752 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1753 if (copy_from_user(value, uvalue, value_size)) 1754 err = -EFAULT; 1755 else 1756 err = bpf_map_copy_value(map, key, value, attr->flags); 1757 goto free_value; 1758 } 1759 1760 err = bpf_map_copy_value(map, key, value, attr->flags); 1761 if (err) 1762 goto free_value; 1763 1764 err = -EFAULT; 1765 if (copy_to_user(uvalue, value, value_size) != 0) 1766 goto free_value; 1767 1768 err = 0; 1769 1770 free_value: 1771 kvfree(value); 1772 free_key: 1773 kvfree(key); 1774 return err; 1775 } 1776 1777 1778 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1779 1780 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1781 { 1782 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1783 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1784 struct bpf_map *map; 1785 void *key, *value; 1786 u32 value_size; 1787 int err; 1788 1789 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1790 return -EINVAL; 1791 1792 CLASS(fd, f)(attr->map_fd); 1793 map = __bpf_map_get(f); 1794 if (IS_ERR(map)) 1795 return PTR_ERR(map); 1796 bpf_map_write_active_inc(map); 1797 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1798 err = -EPERM; 1799 goto err_put; 1800 } 1801 1802 if ((attr->flags & BPF_F_LOCK) && 1803 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1804 err = -EINVAL; 1805 goto err_put; 1806 } 1807 1808 key = ___bpf_copy_key(ukey, map->key_size); 1809 if (IS_ERR(key)) { 1810 err = PTR_ERR(key); 1811 goto err_put; 1812 } 1813 1814 value_size = bpf_map_value_size(map); 1815 value = kvmemdup_bpfptr(uvalue, value_size); 1816 if (IS_ERR(value)) { 1817 err = PTR_ERR(value); 1818 goto free_key; 1819 } 1820 1821 err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags); 1822 if (!err) 1823 maybe_wait_bpf_programs(map); 1824 1825 kvfree(value); 1826 free_key: 1827 kvfree(key); 1828 err_put: 1829 bpf_map_write_active_dec(map); 1830 return err; 1831 } 1832 1833 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1834 1835 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1836 { 1837 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1838 struct bpf_map *map; 1839 void *key; 1840 int err; 1841 1842 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1843 return -EINVAL; 1844 1845 CLASS(fd, f)(attr->map_fd); 1846 map = __bpf_map_get(f); 1847 if (IS_ERR(map)) 1848 return PTR_ERR(map); 1849 bpf_map_write_active_inc(map); 1850 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1851 err = -EPERM; 1852 goto err_put; 1853 } 1854 1855 key = ___bpf_copy_key(ukey, map->key_size); 1856 if (IS_ERR(key)) { 1857 err = PTR_ERR(key); 1858 goto err_put; 1859 } 1860 1861 if (bpf_map_is_offloaded(map)) { 1862 err = bpf_map_offload_delete_elem(map, key); 1863 goto out; 1864 } else if (IS_FD_PROG_ARRAY(map) || 1865 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1866 /* These maps require sleepable context */ 1867 err = map->ops->map_delete_elem(map, key); 1868 goto out; 1869 } 1870 1871 bpf_disable_instrumentation(); 1872 rcu_read_lock(); 1873 err = map->ops->map_delete_elem(map, key); 1874 rcu_read_unlock(); 1875 bpf_enable_instrumentation(); 1876 if (!err) 1877 maybe_wait_bpf_programs(map); 1878 out: 1879 kvfree(key); 1880 err_put: 1881 bpf_map_write_active_dec(map); 1882 return err; 1883 } 1884 1885 /* last field in 'union bpf_attr' used by this command */ 1886 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1887 1888 static int map_get_next_key(union bpf_attr *attr) 1889 { 1890 void __user *ukey = u64_to_user_ptr(attr->key); 1891 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1892 struct bpf_map *map; 1893 void *key, *next_key; 1894 int err; 1895 1896 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1897 return -EINVAL; 1898 1899 CLASS(fd, f)(attr->map_fd); 1900 map = __bpf_map_get(f); 1901 if (IS_ERR(map)) 1902 return PTR_ERR(map); 1903 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1904 return -EPERM; 1905 1906 if (ukey) { 1907 key = __bpf_copy_key(ukey, map->key_size); 1908 if (IS_ERR(key)) 1909 return PTR_ERR(key); 1910 } else { 1911 key = NULL; 1912 } 1913 1914 err = -ENOMEM; 1915 next_key = kvmalloc(map->key_size, GFP_USER); 1916 if (!next_key) 1917 goto free_key; 1918 1919 if (bpf_map_is_offloaded(map)) { 1920 err = bpf_map_offload_get_next_key(map, key, next_key); 1921 goto out; 1922 } 1923 1924 rcu_read_lock(); 1925 err = map->ops->map_get_next_key(map, key, next_key); 1926 rcu_read_unlock(); 1927 out: 1928 if (err) 1929 goto free_next_key; 1930 1931 err = -EFAULT; 1932 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1933 goto free_next_key; 1934 1935 err = 0; 1936 1937 free_next_key: 1938 kvfree(next_key); 1939 free_key: 1940 kvfree(key); 1941 return err; 1942 } 1943 1944 int generic_map_delete_batch(struct bpf_map *map, 1945 const union bpf_attr *attr, 1946 union bpf_attr __user *uattr) 1947 { 1948 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1949 u32 cp, max_count; 1950 int err = 0; 1951 void *key; 1952 1953 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1954 return -EINVAL; 1955 1956 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1957 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1958 return -EINVAL; 1959 } 1960 1961 max_count = attr->batch.count; 1962 if (!max_count) 1963 return 0; 1964 1965 if (put_user(0, &uattr->batch.count)) 1966 return -EFAULT; 1967 1968 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1969 if (!key) 1970 return -ENOMEM; 1971 1972 for (cp = 0; cp < max_count; cp++) { 1973 err = -EFAULT; 1974 if (copy_from_user(key, keys + cp * map->key_size, 1975 map->key_size)) 1976 break; 1977 1978 if (bpf_map_is_offloaded(map)) { 1979 err = bpf_map_offload_delete_elem(map, key); 1980 break; 1981 } 1982 1983 bpf_disable_instrumentation(); 1984 rcu_read_lock(); 1985 err = map->ops->map_delete_elem(map, key); 1986 rcu_read_unlock(); 1987 bpf_enable_instrumentation(); 1988 if (err) 1989 break; 1990 cond_resched(); 1991 } 1992 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1993 err = -EFAULT; 1994 1995 kvfree(key); 1996 1997 return err; 1998 } 1999 2000 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 2001 const union bpf_attr *attr, 2002 union bpf_attr __user *uattr) 2003 { 2004 void __user *values = u64_to_user_ptr(attr->batch.values); 2005 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2006 u32 value_size, cp, max_count; 2007 void *key, *value; 2008 int err = 0; 2009 2010 if (attr->batch.elem_flags & ~BPF_F_LOCK) 2011 return -EINVAL; 2012 2013 if ((attr->batch.elem_flags & BPF_F_LOCK) && 2014 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2015 return -EINVAL; 2016 } 2017 2018 value_size = bpf_map_value_size(map); 2019 2020 max_count = attr->batch.count; 2021 if (!max_count) 2022 return 0; 2023 2024 if (put_user(0, &uattr->batch.count)) 2025 return -EFAULT; 2026 2027 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2028 if (!key) 2029 return -ENOMEM; 2030 2031 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2032 if (!value) { 2033 kvfree(key); 2034 return -ENOMEM; 2035 } 2036 2037 for (cp = 0; cp < max_count; cp++) { 2038 err = -EFAULT; 2039 if (copy_from_user(key, keys + cp * map->key_size, 2040 map->key_size) || 2041 copy_from_user(value, values + cp * value_size, value_size)) 2042 break; 2043 2044 err = bpf_map_update_value(map, map_file, key, value, 2045 attr->batch.elem_flags); 2046 2047 if (err) 2048 break; 2049 cond_resched(); 2050 } 2051 2052 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 2053 err = -EFAULT; 2054 2055 kvfree(value); 2056 kvfree(key); 2057 2058 return err; 2059 } 2060 2061 int generic_map_lookup_batch(struct bpf_map *map, 2062 const union bpf_attr *attr, 2063 union bpf_attr __user *uattr) 2064 { 2065 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 2066 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 2067 void __user *values = u64_to_user_ptr(attr->batch.values); 2068 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2069 void *buf, *buf_prevkey, *prev_key, *key, *value; 2070 u32 value_size, cp, max_count; 2071 int err; 2072 2073 if (attr->batch.elem_flags & ~BPF_F_LOCK) 2074 return -EINVAL; 2075 2076 if ((attr->batch.elem_flags & BPF_F_LOCK) && 2077 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 2078 return -EINVAL; 2079 2080 value_size = bpf_map_value_size(map); 2081 2082 max_count = attr->batch.count; 2083 if (!max_count) 2084 return 0; 2085 2086 if (put_user(0, &uattr->batch.count)) 2087 return -EFAULT; 2088 2089 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2090 if (!buf_prevkey) 2091 return -ENOMEM; 2092 2093 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 2094 if (!buf) { 2095 kvfree(buf_prevkey); 2096 return -ENOMEM; 2097 } 2098 2099 err = -EFAULT; 2100 prev_key = NULL; 2101 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 2102 goto free_buf; 2103 key = buf; 2104 value = key + map->key_size; 2105 if (ubatch) 2106 prev_key = buf_prevkey; 2107 2108 for (cp = 0; cp < max_count;) { 2109 rcu_read_lock(); 2110 err = map->ops->map_get_next_key(map, prev_key, key); 2111 rcu_read_unlock(); 2112 if (err) 2113 break; 2114 err = bpf_map_copy_value(map, key, value, 2115 attr->batch.elem_flags); 2116 2117 if (err == -ENOENT) 2118 goto next_key; 2119 2120 if (err) 2121 goto free_buf; 2122 2123 if (copy_to_user(keys + cp * map->key_size, key, 2124 map->key_size)) { 2125 err = -EFAULT; 2126 goto free_buf; 2127 } 2128 if (copy_to_user(values + cp * value_size, value, value_size)) { 2129 err = -EFAULT; 2130 goto free_buf; 2131 } 2132 2133 cp++; 2134 next_key: 2135 if (!prev_key) 2136 prev_key = buf_prevkey; 2137 2138 swap(prev_key, key); 2139 cond_resched(); 2140 } 2141 2142 if (err == -EFAULT) 2143 goto free_buf; 2144 2145 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 2146 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 2147 err = -EFAULT; 2148 2149 free_buf: 2150 kvfree(buf_prevkey); 2151 kvfree(buf); 2152 return err; 2153 } 2154 2155 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 2156 2157 static int map_lookup_and_delete_elem(union bpf_attr *attr) 2158 { 2159 void __user *ukey = u64_to_user_ptr(attr->key); 2160 void __user *uvalue = u64_to_user_ptr(attr->value); 2161 struct bpf_map *map; 2162 void *key, *value; 2163 u32 value_size; 2164 int err; 2165 2166 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 2167 return -EINVAL; 2168 2169 if (attr->flags & ~BPF_F_LOCK) 2170 return -EINVAL; 2171 2172 CLASS(fd, f)(attr->map_fd); 2173 map = __bpf_map_get(f); 2174 if (IS_ERR(map)) 2175 return PTR_ERR(map); 2176 bpf_map_write_active_inc(map); 2177 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 2178 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 2179 err = -EPERM; 2180 goto err_put; 2181 } 2182 2183 if (attr->flags && 2184 (map->map_type == BPF_MAP_TYPE_QUEUE || 2185 map->map_type == BPF_MAP_TYPE_STACK)) { 2186 err = -EINVAL; 2187 goto err_put; 2188 } 2189 2190 if ((attr->flags & BPF_F_LOCK) && 2191 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2192 err = -EINVAL; 2193 goto err_put; 2194 } 2195 2196 key = __bpf_copy_key(ukey, map->key_size); 2197 if (IS_ERR(key)) { 2198 err = PTR_ERR(key); 2199 goto err_put; 2200 } 2201 2202 value_size = bpf_map_value_size(map); 2203 2204 err = -ENOMEM; 2205 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2206 if (!value) 2207 goto free_key; 2208 2209 err = -ENOTSUPP; 2210 if (map->map_type == BPF_MAP_TYPE_QUEUE || 2211 map->map_type == BPF_MAP_TYPE_STACK) { 2212 err = map->ops->map_pop_elem(map, value); 2213 } else if (map->map_type == BPF_MAP_TYPE_HASH || 2214 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2215 map->map_type == BPF_MAP_TYPE_LRU_HASH || 2216 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 2217 map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 2218 if (!bpf_map_is_offloaded(map)) { 2219 bpf_disable_instrumentation(); 2220 rcu_read_lock(); 2221 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 2222 rcu_read_unlock(); 2223 bpf_enable_instrumentation(); 2224 } 2225 } 2226 2227 if (err) 2228 goto free_value; 2229 2230 if (copy_to_user(uvalue, value, value_size) != 0) { 2231 err = -EFAULT; 2232 goto free_value; 2233 } 2234 2235 err = 0; 2236 2237 free_value: 2238 kvfree(value); 2239 free_key: 2240 kvfree(key); 2241 err_put: 2242 bpf_map_write_active_dec(map); 2243 return err; 2244 } 2245 2246 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 2247 2248 static int map_freeze(const union bpf_attr *attr) 2249 { 2250 int err = 0; 2251 struct bpf_map *map; 2252 2253 if (CHECK_ATTR(BPF_MAP_FREEZE)) 2254 return -EINVAL; 2255 2256 CLASS(fd, f)(attr->map_fd); 2257 map = __bpf_map_get(f); 2258 if (IS_ERR(map)) 2259 return PTR_ERR(map); 2260 2261 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) 2262 return -ENOTSUPP; 2263 2264 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) 2265 return -EPERM; 2266 2267 mutex_lock(&map->freeze_mutex); 2268 if (bpf_map_write_active(map)) { 2269 err = -EBUSY; 2270 goto err_put; 2271 } 2272 if (READ_ONCE(map->frozen)) { 2273 err = -EBUSY; 2274 goto err_put; 2275 } 2276 2277 WRITE_ONCE(map->frozen, true); 2278 err_put: 2279 mutex_unlock(&map->freeze_mutex); 2280 return err; 2281 } 2282 2283 static const struct bpf_prog_ops * const bpf_prog_types[] = { 2284 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 2285 [_id] = & _name ## _prog_ops, 2286 #define BPF_MAP_TYPE(_id, _ops) 2287 #define BPF_LINK_TYPE(_id, _name) 2288 #include <linux/bpf_types.h> 2289 #undef BPF_PROG_TYPE 2290 #undef BPF_MAP_TYPE 2291 #undef BPF_LINK_TYPE 2292 }; 2293 2294 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 2295 { 2296 const struct bpf_prog_ops *ops; 2297 2298 if (type >= ARRAY_SIZE(bpf_prog_types)) 2299 return -EINVAL; 2300 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 2301 ops = bpf_prog_types[type]; 2302 if (!ops) 2303 return -EINVAL; 2304 2305 if (!bpf_prog_is_offloaded(prog->aux)) 2306 prog->aux->ops = ops; 2307 else 2308 prog->aux->ops = &bpf_offload_prog_ops; 2309 prog->type = type; 2310 return 0; 2311 } 2312 2313 enum bpf_audit { 2314 BPF_AUDIT_LOAD, 2315 BPF_AUDIT_UNLOAD, 2316 BPF_AUDIT_MAX, 2317 }; 2318 2319 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2320 [BPF_AUDIT_LOAD] = "LOAD", 2321 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2322 }; 2323 2324 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2325 { 2326 struct audit_context *ctx = NULL; 2327 struct audit_buffer *ab; 2328 2329 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2330 return; 2331 if (audit_enabled == AUDIT_OFF) 2332 return; 2333 if (!in_irq() && !irqs_disabled()) 2334 ctx = audit_context(); 2335 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2336 if (unlikely(!ab)) 2337 return; 2338 audit_log_format(ab, "prog-id=%u op=%s", 2339 prog->aux->id, bpf_audit_str[op]); 2340 audit_log_end(ab); 2341 } 2342 2343 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2344 { 2345 int id; 2346 2347 idr_preload(GFP_KERNEL); 2348 spin_lock_bh(&prog_idr_lock); 2349 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2350 if (id > 0) 2351 prog->aux->id = id; 2352 spin_unlock_bh(&prog_idr_lock); 2353 idr_preload_end(); 2354 2355 /* id is in [1, INT_MAX) */ 2356 if (WARN_ON_ONCE(!id)) 2357 return -ENOSPC; 2358 2359 return id > 0 ? 0 : id; 2360 } 2361 2362 void bpf_prog_free_id(struct bpf_prog *prog) 2363 { 2364 unsigned long flags; 2365 2366 /* cBPF to eBPF migrations are currently not in the idr store. 2367 * Offloaded programs are removed from the store when their device 2368 * disappears - even if someone grabs an fd to them they are unusable, 2369 * simply waiting for refcnt to drop to be freed. 2370 */ 2371 if (!prog->aux->id) 2372 return; 2373 2374 spin_lock_irqsave(&prog_idr_lock, flags); 2375 idr_remove(&prog_idr, prog->aux->id); 2376 prog->aux->id = 0; 2377 spin_unlock_irqrestore(&prog_idr_lock, flags); 2378 } 2379 2380 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2381 { 2382 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2383 2384 kvfree(aux->func_info); 2385 kfree(aux->func_info_aux); 2386 free_uid(aux->user); 2387 security_bpf_prog_free(aux->prog); 2388 bpf_prog_free(aux->prog); 2389 } 2390 2391 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2392 { 2393 bpf_prog_kallsyms_del_all(prog); 2394 btf_put(prog->aux->btf); 2395 module_put(prog->aux->mod); 2396 kvfree(prog->aux->jited_linfo); 2397 kvfree(prog->aux->linfo); 2398 kfree(prog->aux->kfunc_tab); 2399 kfree(prog->aux->ctx_arg_info); 2400 if (prog->aux->attach_btf) 2401 btf_put(prog->aux->attach_btf); 2402 2403 if (deferred) { 2404 if (prog->sleepable) 2405 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2406 else 2407 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2408 } else { 2409 __bpf_prog_put_rcu(&prog->aux->rcu); 2410 } 2411 } 2412 2413 static void bpf_prog_put_deferred(struct work_struct *work) 2414 { 2415 struct bpf_prog_aux *aux; 2416 struct bpf_prog *prog; 2417 2418 aux = container_of(work, struct bpf_prog_aux, work); 2419 prog = aux->prog; 2420 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2421 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2422 bpf_prog_free_id(prog); 2423 __bpf_prog_put_noref(prog, true); 2424 } 2425 2426 static void __bpf_prog_put(struct bpf_prog *prog) 2427 { 2428 struct bpf_prog_aux *aux = prog->aux; 2429 2430 if (atomic64_dec_and_test(&aux->refcnt)) { 2431 if (in_irq() || irqs_disabled()) { 2432 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2433 schedule_work(&aux->work); 2434 } else { 2435 bpf_prog_put_deferred(&aux->work); 2436 } 2437 } 2438 } 2439 2440 void bpf_prog_put(struct bpf_prog *prog) 2441 { 2442 __bpf_prog_put(prog); 2443 } 2444 EXPORT_SYMBOL_GPL(bpf_prog_put); 2445 2446 static int bpf_prog_release(struct inode *inode, struct file *filp) 2447 { 2448 struct bpf_prog *prog = filp->private_data; 2449 2450 bpf_prog_put(prog); 2451 return 0; 2452 } 2453 2454 struct bpf_prog_kstats { 2455 u64 nsecs; 2456 u64 cnt; 2457 u64 misses; 2458 }; 2459 2460 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2461 { 2462 struct bpf_prog_stats *stats; 2463 unsigned int flags; 2464 2465 stats = this_cpu_ptr(prog->stats); 2466 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2467 u64_stats_inc(&stats->misses); 2468 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2469 } 2470 2471 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2472 struct bpf_prog_kstats *stats) 2473 { 2474 u64 nsecs = 0, cnt = 0, misses = 0; 2475 int cpu; 2476 2477 for_each_possible_cpu(cpu) { 2478 const struct bpf_prog_stats *st; 2479 unsigned int start; 2480 u64 tnsecs, tcnt, tmisses; 2481 2482 st = per_cpu_ptr(prog->stats, cpu); 2483 do { 2484 start = u64_stats_fetch_begin(&st->syncp); 2485 tnsecs = u64_stats_read(&st->nsecs); 2486 tcnt = u64_stats_read(&st->cnt); 2487 tmisses = u64_stats_read(&st->misses); 2488 } while (u64_stats_fetch_retry(&st->syncp, start)); 2489 nsecs += tnsecs; 2490 cnt += tcnt; 2491 misses += tmisses; 2492 } 2493 stats->nsecs = nsecs; 2494 stats->cnt = cnt; 2495 stats->misses = misses; 2496 } 2497 2498 #ifdef CONFIG_PROC_FS 2499 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2500 { 2501 const struct bpf_prog *prog = filp->private_data; 2502 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2503 struct bpf_prog_kstats stats; 2504 2505 bpf_prog_get_stats(prog, &stats); 2506 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2507 seq_printf(m, 2508 "prog_type:\t%u\n" 2509 "prog_jited:\t%u\n" 2510 "prog_tag:\t%s\n" 2511 "memlock:\t%llu\n" 2512 "prog_id:\t%u\n" 2513 "run_time_ns:\t%llu\n" 2514 "run_cnt:\t%llu\n" 2515 "recursion_misses:\t%llu\n" 2516 "verified_insns:\t%u\n", 2517 prog->type, 2518 prog->jited, 2519 prog_tag, 2520 prog->pages * 1ULL << PAGE_SHIFT, 2521 prog->aux->id, 2522 stats.nsecs, 2523 stats.cnt, 2524 stats.misses, 2525 prog->aux->verified_insns); 2526 } 2527 #endif 2528 2529 const struct file_operations bpf_prog_fops = { 2530 #ifdef CONFIG_PROC_FS 2531 .show_fdinfo = bpf_prog_show_fdinfo, 2532 #endif 2533 .release = bpf_prog_release, 2534 .read = bpf_dummy_read, 2535 .write = bpf_dummy_write, 2536 }; 2537 2538 int bpf_prog_new_fd(struct bpf_prog *prog) 2539 { 2540 int ret; 2541 2542 ret = security_bpf_prog(prog); 2543 if (ret < 0) 2544 return ret; 2545 2546 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2547 O_RDWR | O_CLOEXEC); 2548 } 2549 2550 void bpf_prog_add(struct bpf_prog *prog, int i) 2551 { 2552 atomic64_add(i, &prog->aux->refcnt); 2553 } 2554 EXPORT_SYMBOL_GPL(bpf_prog_add); 2555 2556 void bpf_prog_sub(struct bpf_prog *prog, int i) 2557 { 2558 /* Only to be used for undoing previous bpf_prog_add() in some 2559 * error path. We still know that another entity in our call 2560 * path holds a reference to the program, thus atomic_sub() can 2561 * be safely used in such cases! 2562 */ 2563 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2564 } 2565 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2566 2567 void bpf_prog_inc(struct bpf_prog *prog) 2568 { 2569 atomic64_inc(&prog->aux->refcnt); 2570 } 2571 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2572 2573 /* prog_idr_lock should have been held */ 2574 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2575 { 2576 int refold; 2577 2578 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2579 2580 if (!refold) 2581 return ERR_PTR(-ENOENT); 2582 2583 return prog; 2584 } 2585 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2586 2587 bool bpf_prog_get_ok(struct bpf_prog *prog, 2588 enum bpf_prog_type *attach_type, bool attach_drv) 2589 { 2590 /* not an attachment, just a refcount inc, always allow */ 2591 if (!attach_type) 2592 return true; 2593 2594 if (prog->type != *attach_type) 2595 return false; 2596 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2597 return false; 2598 2599 return true; 2600 } 2601 2602 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2603 bool attach_drv) 2604 { 2605 CLASS(fd, f)(ufd); 2606 struct bpf_prog *prog; 2607 2608 if (fd_empty(f)) 2609 return ERR_PTR(-EBADF); 2610 if (fd_file(f)->f_op != &bpf_prog_fops) 2611 return ERR_PTR(-EINVAL); 2612 2613 prog = fd_file(f)->private_data; 2614 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) 2615 return ERR_PTR(-EINVAL); 2616 2617 bpf_prog_inc(prog); 2618 return prog; 2619 } 2620 2621 struct bpf_prog *bpf_prog_get(u32 ufd) 2622 { 2623 return __bpf_prog_get(ufd, NULL, false); 2624 } 2625 2626 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2627 bool attach_drv) 2628 { 2629 return __bpf_prog_get(ufd, &type, attach_drv); 2630 } 2631 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2632 2633 /* Initially all BPF programs could be loaded w/o specifying 2634 * expected_attach_type. Later for some of them specifying expected_attach_type 2635 * at load time became required so that program could be validated properly. 2636 * Programs of types that are allowed to be loaded both w/ and w/o (for 2637 * backward compatibility) expected_attach_type, should have the default attach 2638 * type assigned to expected_attach_type for the latter case, so that it can be 2639 * validated later at attach time. 2640 * 2641 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2642 * prog type requires it but has some attach types that have to be backward 2643 * compatible. 2644 */ 2645 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2646 { 2647 switch (attr->prog_type) { 2648 case BPF_PROG_TYPE_CGROUP_SOCK: 2649 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2650 * exist so checking for non-zero is the way to go here. 2651 */ 2652 if (!attr->expected_attach_type) 2653 attr->expected_attach_type = 2654 BPF_CGROUP_INET_SOCK_CREATE; 2655 break; 2656 case BPF_PROG_TYPE_SK_REUSEPORT: 2657 if (!attr->expected_attach_type) 2658 attr->expected_attach_type = 2659 BPF_SK_REUSEPORT_SELECT; 2660 break; 2661 } 2662 } 2663 2664 static int 2665 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2666 enum bpf_attach_type expected_attach_type, 2667 struct btf *attach_btf, u32 btf_id, 2668 struct bpf_prog *dst_prog) 2669 { 2670 if (btf_id) { 2671 if (btf_id > BTF_MAX_TYPE) 2672 return -EINVAL; 2673 2674 if (!attach_btf && !dst_prog) 2675 return -EINVAL; 2676 2677 switch (prog_type) { 2678 case BPF_PROG_TYPE_TRACING: 2679 case BPF_PROG_TYPE_LSM: 2680 case BPF_PROG_TYPE_STRUCT_OPS: 2681 case BPF_PROG_TYPE_EXT: 2682 break; 2683 default: 2684 return -EINVAL; 2685 } 2686 } 2687 2688 if (attach_btf && (!btf_id || dst_prog)) 2689 return -EINVAL; 2690 2691 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2692 prog_type != BPF_PROG_TYPE_EXT) 2693 return -EINVAL; 2694 2695 switch (prog_type) { 2696 case BPF_PROG_TYPE_CGROUP_SOCK: 2697 switch (expected_attach_type) { 2698 case BPF_CGROUP_INET_SOCK_CREATE: 2699 case BPF_CGROUP_INET_SOCK_RELEASE: 2700 case BPF_CGROUP_INET4_POST_BIND: 2701 case BPF_CGROUP_INET6_POST_BIND: 2702 return 0; 2703 default: 2704 return -EINVAL; 2705 } 2706 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2707 switch (expected_attach_type) { 2708 case BPF_CGROUP_INET4_BIND: 2709 case BPF_CGROUP_INET6_BIND: 2710 case BPF_CGROUP_INET4_CONNECT: 2711 case BPF_CGROUP_INET6_CONNECT: 2712 case BPF_CGROUP_UNIX_CONNECT: 2713 case BPF_CGROUP_INET4_GETPEERNAME: 2714 case BPF_CGROUP_INET6_GETPEERNAME: 2715 case BPF_CGROUP_UNIX_GETPEERNAME: 2716 case BPF_CGROUP_INET4_GETSOCKNAME: 2717 case BPF_CGROUP_INET6_GETSOCKNAME: 2718 case BPF_CGROUP_UNIX_GETSOCKNAME: 2719 case BPF_CGROUP_UDP4_SENDMSG: 2720 case BPF_CGROUP_UDP6_SENDMSG: 2721 case BPF_CGROUP_UNIX_SENDMSG: 2722 case BPF_CGROUP_UDP4_RECVMSG: 2723 case BPF_CGROUP_UDP6_RECVMSG: 2724 case BPF_CGROUP_UNIX_RECVMSG: 2725 return 0; 2726 default: 2727 return -EINVAL; 2728 } 2729 case BPF_PROG_TYPE_CGROUP_SKB: 2730 switch (expected_attach_type) { 2731 case BPF_CGROUP_INET_INGRESS: 2732 case BPF_CGROUP_INET_EGRESS: 2733 return 0; 2734 default: 2735 return -EINVAL; 2736 } 2737 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2738 switch (expected_attach_type) { 2739 case BPF_CGROUP_SETSOCKOPT: 2740 case BPF_CGROUP_GETSOCKOPT: 2741 return 0; 2742 default: 2743 return -EINVAL; 2744 } 2745 case BPF_PROG_TYPE_SK_LOOKUP: 2746 if (expected_attach_type == BPF_SK_LOOKUP) 2747 return 0; 2748 return -EINVAL; 2749 case BPF_PROG_TYPE_SK_REUSEPORT: 2750 switch (expected_attach_type) { 2751 case BPF_SK_REUSEPORT_SELECT: 2752 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2753 return 0; 2754 default: 2755 return -EINVAL; 2756 } 2757 case BPF_PROG_TYPE_NETFILTER: 2758 if (expected_attach_type == BPF_NETFILTER) 2759 return 0; 2760 return -EINVAL; 2761 case BPF_PROG_TYPE_SYSCALL: 2762 case BPF_PROG_TYPE_EXT: 2763 if (expected_attach_type) 2764 return -EINVAL; 2765 fallthrough; 2766 default: 2767 return 0; 2768 } 2769 } 2770 2771 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2772 { 2773 switch (prog_type) { 2774 case BPF_PROG_TYPE_SCHED_CLS: 2775 case BPF_PROG_TYPE_SCHED_ACT: 2776 case BPF_PROG_TYPE_XDP: 2777 case BPF_PROG_TYPE_LWT_IN: 2778 case BPF_PROG_TYPE_LWT_OUT: 2779 case BPF_PROG_TYPE_LWT_XMIT: 2780 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2781 case BPF_PROG_TYPE_SK_SKB: 2782 case BPF_PROG_TYPE_SK_MSG: 2783 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2784 case BPF_PROG_TYPE_CGROUP_DEVICE: 2785 case BPF_PROG_TYPE_CGROUP_SOCK: 2786 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2787 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2788 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2789 case BPF_PROG_TYPE_SOCK_OPS: 2790 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2791 case BPF_PROG_TYPE_NETFILTER: 2792 return true; 2793 case BPF_PROG_TYPE_CGROUP_SKB: 2794 /* always unpriv */ 2795 case BPF_PROG_TYPE_SK_REUSEPORT: 2796 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2797 default: 2798 return false; 2799 } 2800 } 2801 2802 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2803 { 2804 switch (prog_type) { 2805 case BPF_PROG_TYPE_KPROBE: 2806 case BPF_PROG_TYPE_TRACEPOINT: 2807 case BPF_PROG_TYPE_PERF_EVENT: 2808 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2809 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2810 case BPF_PROG_TYPE_TRACING: 2811 case BPF_PROG_TYPE_LSM: 2812 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2813 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2814 return true; 2815 default: 2816 return false; 2817 } 2818 } 2819 2820 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, 2821 bool is_kernel) 2822 { 2823 bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); 2824 struct bpf_dynptr_kern sig_ptr, insns_ptr; 2825 struct bpf_key *key = NULL; 2826 void *sig; 2827 int err = 0; 2828 2829 if (system_keyring_id_check(attr->keyring_id) == 0) 2830 key = bpf_lookup_system_key(attr->keyring_id); 2831 else 2832 key = bpf_lookup_user_key(attr->keyring_id, 0); 2833 2834 if (!key) 2835 return -EINVAL; 2836 2837 sig = kvmemdup_bpfptr(usig, attr->signature_size); 2838 if (IS_ERR(sig)) { 2839 bpf_key_put(key); 2840 return -ENOMEM; 2841 } 2842 2843 bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, 2844 attr->signature_size); 2845 bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, 2846 prog->len * sizeof(struct bpf_insn)); 2847 2848 err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, 2849 (struct bpf_dynptr *)&sig_ptr, key); 2850 2851 bpf_key_put(key); 2852 kvfree(sig); 2853 return err; 2854 } 2855 2856 /* last field in 'union bpf_attr' used by this command */ 2857 #define BPF_PROG_LOAD_LAST_FIELD keyring_id 2858 2859 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2860 { 2861 enum bpf_prog_type type = attr->prog_type; 2862 struct bpf_prog *prog, *dst_prog = NULL; 2863 struct btf *attach_btf = NULL; 2864 struct bpf_token *token = NULL; 2865 bool bpf_cap; 2866 int err; 2867 char license[128]; 2868 2869 if (CHECK_ATTR(BPF_PROG_LOAD)) 2870 return -EINVAL; 2871 2872 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2873 BPF_F_ANY_ALIGNMENT | 2874 BPF_F_TEST_STATE_FREQ | 2875 BPF_F_SLEEPABLE | 2876 BPF_F_TEST_RND_HI32 | 2877 BPF_F_XDP_HAS_FRAGS | 2878 BPF_F_XDP_DEV_BOUND_ONLY | 2879 BPF_F_TEST_REG_INVARIANTS | 2880 BPF_F_TOKEN_FD)) 2881 return -EINVAL; 2882 2883 bpf_prog_load_fixup_attach_type(attr); 2884 2885 if (attr->prog_flags & BPF_F_TOKEN_FD) { 2886 token = bpf_token_get_from_fd(attr->prog_token_fd); 2887 if (IS_ERR(token)) 2888 return PTR_ERR(token); 2889 /* if current token doesn't grant prog loading permissions, 2890 * then we can't use this token, so ignore it and rely on 2891 * system-wide capabilities checks 2892 */ 2893 if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || 2894 !bpf_token_allow_prog_type(token, attr->prog_type, 2895 attr->expected_attach_type)) { 2896 bpf_token_put(token); 2897 token = NULL; 2898 } 2899 } 2900 2901 bpf_cap = bpf_token_capable(token, CAP_BPF); 2902 err = -EPERM; 2903 2904 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2905 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2906 !bpf_cap) 2907 goto put_token; 2908 2909 /* Intent here is for unprivileged_bpf_disabled to block BPF program 2910 * creation for unprivileged users; other actions depend 2911 * on fd availability and access to bpffs, so are dependent on 2912 * object creation success. Even with unprivileged BPF disabled, 2913 * capability checks are still carried out for these 2914 * and other operations. 2915 */ 2916 if (sysctl_unprivileged_bpf_disabled && !bpf_cap) 2917 goto put_token; 2918 2919 if (attr->insn_cnt == 0 || 2920 attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2921 err = -E2BIG; 2922 goto put_token; 2923 } 2924 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2925 type != BPF_PROG_TYPE_CGROUP_SKB && 2926 !bpf_cap) 2927 goto put_token; 2928 2929 if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) 2930 goto put_token; 2931 if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) 2932 goto put_token; 2933 2934 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2935 * or btf, we need to check which one it is 2936 */ 2937 if (attr->attach_prog_fd) { 2938 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2939 if (IS_ERR(dst_prog)) { 2940 dst_prog = NULL; 2941 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2942 if (IS_ERR(attach_btf)) { 2943 err = -EINVAL; 2944 goto put_token; 2945 } 2946 if (!btf_is_kernel(attach_btf)) { 2947 /* attaching through specifying bpf_prog's BTF 2948 * objects directly might be supported eventually 2949 */ 2950 btf_put(attach_btf); 2951 err = -ENOTSUPP; 2952 goto put_token; 2953 } 2954 } 2955 } else if (attr->attach_btf_id) { 2956 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2957 attach_btf = bpf_get_btf_vmlinux(); 2958 if (IS_ERR(attach_btf)) { 2959 err = PTR_ERR(attach_btf); 2960 goto put_token; 2961 } 2962 if (!attach_btf) { 2963 err = -EINVAL; 2964 goto put_token; 2965 } 2966 btf_get(attach_btf); 2967 } 2968 2969 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2970 attach_btf, attr->attach_btf_id, 2971 dst_prog)) { 2972 if (dst_prog) 2973 bpf_prog_put(dst_prog); 2974 if (attach_btf) 2975 btf_put(attach_btf); 2976 err = -EINVAL; 2977 goto put_token; 2978 } 2979 2980 /* plain bpf_prog allocation */ 2981 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2982 if (!prog) { 2983 if (dst_prog) 2984 bpf_prog_put(dst_prog); 2985 if (attach_btf) 2986 btf_put(attach_btf); 2987 err = -EINVAL; 2988 goto put_token; 2989 } 2990 2991 prog->expected_attach_type = attr->expected_attach_type; 2992 prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); 2993 prog->aux->attach_btf = attach_btf; 2994 prog->aux->attach_btf_id = attr->attach_btf_id; 2995 prog->aux->dst_prog = dst_prog; 2996 prog->aux->dev_bound = !!attr->prog_ifindex; 2997 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2998 2999 /* move token into prog->aux, reuse taken refcnt */ 3000 prog->aux->token = token; 3001 token = NULL; 3002 3003 prog->aux->user = get_current_user(); 3004 prog->len = attr->insn_cnt; 3005 3006 err = -EFAULT; 3007 if (copy_from_bpfptr(prog->insns, 3008 make_bpfptr(attr->insns, uattr.is_kernel), 3009 bpf_prog_insn_size(prog)) != 0) 3010 goto free_prog; 3011 /* copy eBPF program license from user space */ 3012 if (strncpy_from_bpfptr(license, 3013 make_bpfptr(attr->license, uattr.is_kernel), 3014 sizeof(license) - 1) < 0) 3015 goto free_prog; 3016 license[sizeof(license) - 1] = 0; 3017 3018 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 3019 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 3020 3021 if (attr->signature) { 3022 err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); 3023 if (err) 3024 goto free_prog; 3025 } 3026 3027 prog->orig_prog = NULL; 3028 prog->jited = 0; 3029 3030 atomic64_set(&prog->aux->refcnt, 1); 3031 3032 if (bpf_prog_is_dev_bound(prog->aux)) { 3033 err = bpf_prog_dev_bound_init(prog, attr); 3034 if (err) 3035 goto free_prog; 3036 } 3037 3038 if (type == BPF_PROG_TYPE_EXT && dst_prog && 3039 bpf_prog_is_dev_bound(dst_prog->aux)) { 3040 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 3041 if (err) 3042 goto free_prog; 3043 } 3044 3045 /* 3046 * Bookkeeping for managing the program attachment chain. 3047 * 3048 * It might be tempting to set attach_tracing_prog flag at the attachment 3049 * time, but this will not prevent from loading bunch of tracing prog 3050 * first, then attach them one to another. 3051 * 3052 * The flag attach_tracing_prog is set for the whole program lifecycle, and 3053 * doesn't have to be cleared in bpf_tracing_link_release, since tracing 3054 * programs cannot change attachment target. 3055 */ 3056 if (type == BPF_PROG_TYPE_TRACING && dst_prog && 3057 dst_prog->type == BPF_PROG_TYPE_TRACING) { 3058 prog->aux->attach_tracing_prog = true; 3059 } 3060 3061 /* find program type: socket_filter vs tracing_filter */ 3062 err = find_prog_type(type, prog); 3063 if (err < 0) 3064 goto free_prog; 3065 3066 prog->aux->load_time = ktime_get_boottime_ns(); 3067 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 3068 sizeof(attr->prog_name)); 3069 if (err < 0) 3070 goto free_prog; 3071 3072 err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 3073 if (err) 3074 goto free_prog_sec; 3075 3076 /* run eBPF verifier */ 3077 err = bpf_check(&prog, attr, uattr, uattr_size); 3078 if (err < 0) 3079 goto free_used_maps; 3080 3081 prog = bpf_prog_select_runtime(prog, &err); 3082 if (err < 0) 3083 goto free_used_maps; 3084 3085 err = bpf_prog_alloc_id(prog); 3086 if (err) 3087 goto free_used_maps; 3088 3089 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 3090 * effectively publicly exposed. However, retrieving via 3091 * bpf_prog_get_fd_by_id() will take another reference, 3092 * therefore it cannot be gone underneath us. 3093 * 3094 * Only for the time /after/ successful bpf_prog_new_fd() 3095 * and before returning to userspace, we might just hold 3096 * one reference and any parallel close on that fd could 3097 * rip everything out. Hence, below notifications must 3098 * happen before bpf_prog_new_fd(). 3099 * 3100 * Also, any failure handling from this point onwards must 3101 * be using bpf_prog_put() given the program is exposed. 3102 */ 3103 bpf_prog_kallsyms_add(prog); 3104 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 3105 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 3106 3107 err = bpf_prog_new_fd(prog); 3108 if (err < 0) 3109 bpf_prog_put(prog); 3110 return err; 3111 3112 free_used_maps: 3113 /* In case we have subprogs, we need to wait for a grace 3114 * period before we can tear down JIT memory since symbols 3115 * are already exposed under kallsyms. 3116 */ 3117 __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); 3118 return err; 3119 3120 free_prog_sec: 3121 security_bpf_prog_free(prog); 3122 free_prog: 3123 free_uid(prog->aux->user); 3124 if (prog->aux->attach_btf) 3125 btf_put(prog->aux->attach_btf); 3126 bpf_prog_free(prog); 3127 put_token: 3128 bpf_token_put(token); 3129 return err; 3130 } 3131 3132 #define BPF_OBJ_LAST_FIELD path_fd 3133 3134 static int bpf_obj_pin(const union bpf_attr *attr) 3135 { 3136 int path_fd; 3137 3138 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 3139 return -EINVAL; 3140 3141 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3142 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3143 return -EINVAL; 3144 3145 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3146 return bpf_obj_pin_user(attr->bpf_fd, path_fd, 3147 u64_to_user_ptr(attr->pathname)); 3148 } 3149 3150 static int bpf_obj_get(const union bpf_attr *attr) 3151 { 3152 int path_fd; 3153 3154 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 3155 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 3156 return -EINVAL; 3157 3158 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3159 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3160 return -EINVAL; 3161 3162 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3163 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 3164 attr->file_flags); 3165 } 3166 3167 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has 3168 * "sleepable" semantics, which normally would mean that BPF link's attach 3169 * hook can dereference link or link's underlying program for some time after 3170 * detachment due to RCU Tasks Trace-based lifetime protection scheme. 3171 * BPF program itself can be non-sleepable, yet, because it's transitively 3172 * reachable through BPF link, its freeing has to be delayed until after RCU 3173 * Tasks Trace GP. 3174 */ 3175 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, 3176 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3177 enum bpf_attach_type attach_type, bool sleepable) 3178 { 3179 WARN_ON(ops->dealloc && ops->dealloc_deferred); 3180 atomic64_set(&link->refcnt, 1); 3181 link->type = type; 3182 link->sleepable = sleepable; 3183 link->id = 0; 3184 link->ops = ops; 3185 link->prog = prog; 3186 link->attach_type = attach_type; 3187 } 3188 3189 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 3190 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3191 enum bpf_attach_type attach_type) 3192 { 3193 bpf_link_init_sleepable(link, type, ops, prog, attach_type, false); 3194 } 3195 3196 static void bpf_link_free_id(int id) 3197 { 3198 if (!id) 3199 return; 3200 3201 spin_lock_bh(&link_idr_lock); 3202 idr_remove(&link_idr, id); 3203 spin_unlock_bh(&link_idr_lock); 3204 } 3205 3206 /* Clean up bpf_link and corresponding anon_inode file and FD. After 3207 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 3208 * anon_inode's release() call. This helper marks bpf_link as 3209 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 3210 * is not decremented, it's the responsibility of a calling code that failed 3211 * to complete bpf_link initialization. 3212 * This helper eventually calls link's dealloc callback, but does not call 3213 * link's release callback. 3214 */ 3215 void bpf_link_cleanup(struct bpf_link_primer *primer) 3216 { 3217 primer->link->prog = NULL; 3218 bpf_link_free_id(primer->id); 3219 fput(primer->file); 3220 put_unused_fd(primer->fd); 3221 } 3222 3223 void bpf_link_inc(struct bpf_link *link) 3224 { 3225 atomic64_inc(&link->refcnt); 3226 } 3227 3228 static void bpf_link_dealloc(struct bpf_link *link) 3229 { 3230 /* now that we know that bpf_link itself can't be reached, put underlying BPF program */ 3231 if (link->prog) 3232 bpf_prog_put(link->prog); 3233 3234 /* free bpf_link and its containing memory */ 3235 if (link->ops->dealloc_deferred) 3236 link->ops->dealloc_deferred(link); 3237 else 3238 link->ops->dealloc(link); 3239 } 3240 3241 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) 3242 { 3243 struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); 3244 3245 bpf_link_dealloc(link); 3246 } 3247 3248 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3249 { 3250 if (rcu_trace_implies_rcu_gp()) 3251 bpf_link_defer_dealloc_rcu_gp(rcu); 3252 else 3253 call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3254 } 3255 3256 /* bpf_link_free is guaranteed to be called from process context */ 3257 static void bpf_link_free(struct bpf_link *link) 3258 { 3259 const struct bpf_link_ops *ops = link->ops; 3260 3261 bpf_link_free_id(link->id); 3262 /* detach BPF program, clean up used resources */ 3263 if (link->prog) 3264 ops->release(link); 3265 if (ops->dealloc_deferred) { 3266 /* Schedule BPF link deallocation, which will only then 3267 * trigger putting BPF program refcount. 3268 * If underlying BPF program is sleepable or BPF link's target 3269 * attach hookpoint is sleepable or otherwise requires RCU GPs 3270 * to ensure link and its underlying BPF program is not 3271 * reachable anymore, we need to first wait for RCU tasks 3272 * trace sync, and then go through "classic" RCU grace period 3273 */ 3274 if (link->sleepable || (link->prog && link->prog->sleepable)) 3275 call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3276 else 3277 call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3278 } else if (ops->dealloc) { 3279 bpf_link_dealloc(link); 3280 } 3281 } 3282 3283 static void bpf_link_put_deferred(struct work_struct *work) 3284 { 3285 struct bpf_link *link = container_of(work, struct bpf_link, work); 3286 3287 bpf_link_free(link); 3288 } 3289 3290 /* bpf_link_put might be called from atomic context. It needs to be called 3291 * from sleepable context in order to acquire sleeping locks during the process. 3292 */ 3293 void bpf_link_put(struct bpf_link *link) 3294 { 3295 if (!atomic64_dec_and_test(&link->refcnt)) 3296 return; 3297 3298 INIT_WORK(&link->work, bpf_link_put_deferred); 3299 schedule_work(&link->work); 3300 } 3301 EXPORT_SYMBOL(bpf_link_put); 3302 3303 static void bpf_link_put_direct(struct bpf_link *link) 3304 { 3305 if (!atomic64_dec_and_test(&link->refcnt)) 3306 return; 3307 bpf_link_free(link); 3308 } 3309 3310 static int bpf_link_release(struct inode *inode, struct file *filp) 3311 { 3312 struct bpf_link *link = filp->private_data; 3313 3314 bpf_link_put_direct(link); 3315 return 0; 3316 } 3317 3318 #ifdef CONFIG_PROC_FS 3319 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 3320 #define BPF_MAP_TYPE(_id, _ops) 3321 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 3322 static const char *bpf_link_type_strs[] = { 3323 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 3324 #include <linux/bpf_types.h> 3325 }; 3326 #undef BPF_PROG_TYPE 3327 #undef BPF_MAP_TYPE 3328 #undef BPF_LINK_TYPE 3329 3330 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 3331 { 3332 const struct bpf_link *link = filp->private_data; 3333 const struct bpf_prog *prog = link->prog; 3334 enum bpf_link_type type = link->type; 3335 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 3336 3337 if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { 3338 if (link->type == BPF_LINK_TYPE_KPROBE_MULTI) 3339 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? 3340 "kretprobe_multi" : "kprobe_multi"); 3341 else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) 3342 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? 3343 "uretprobe_multi" : "uprobe_multi"); 3344 else 3345 seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); 3346 } else { 3347 WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); 3348 seq_printf(m, "link_type:\t<%u>\n", type); 3349 } 3350 seq_printf(m, "link_id:\t%u\n", link->id); 3351 3352 if (prog) { 3353 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 3354 seq_printf(m, 3355 "prog_tag:\t%s\n" 3356 "prog_id:\t%u\n", 3357 prog_tag, 3358 prog->aux->id); 3359 } 3360 if (link->ops->show_fdinfo) 3361 link->ops->show_fdinfo(link, m); 3362 } 3363 #endif 3364 3365 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) 3366 { 3367 struct bpf_link *link = file->private_data; 3368 3369 return link->ops->poll(file, pts); 3370 } 3371 3372 static const struct file_operations bpf_link_fops = { 3373 #ifdef CONFIG_PROC_FS 3374 .show_fdinfo = bpf_link_show_fdinfo, 3375 #endif 3376 .release = bpf_link_release, 3377 .read = bpf_dummy_read, 3378 .write = bpf_dummy_write, 3379 }; 3380 3381 static const struct file_operations bpf_link_fops_poll = { 3382 #ifdef CONFIG_PROC_FS 3383 .show_fdinfo = bpf_link_show_fdinfo, 3384 #endif 3385 .release = bpf_link_release, 3386 .read = bpf_dummy_read, 3387 .write = bpf_dummy_write, 3388 .poll = bpf_link_poll, 3389 }; 3390 3391 static int bpf_link_alloc_id(struct bpf_link *link) 3392 { 3393 int id; 3394 3395 idr_preload(GFP_KERNEL); 3396 spin_lock_bh(&link_idr_lock); 3397 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 3398 spin_unlock_bh(&link_idr_lock); 3399 idr_preload_end(); 3400 3401 return id; 3402 } 3403 3404 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 3405 * reserving unused FD and allocating ID from link_idr. This is to be paired 3406 * with bpf_link_settle() to install FD and ID and expose bpf_link to 3407 * user-space, if bpf_link is successfully attached. If not, bpf_link and 3408 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 3409 * transient state is passed around in struct bpf_link_primer. 3410 * This is preferred way to create and initialize bpf_link, especially when 3411 * there are complicated and expensive operations in between creating bpf_link 3412 * itself and attaching it to BPF hook. By using bpf_link_prime() and 3413 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 3414 * expensive (and potentially failing) roll back operations in a rare case 3415 * that file, FD, or ID can't be allocated. 3416 */ 3417 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 3418 { 3419 struct file *file; 3420 int fd, id; 3421 3422 fd = get_unused_fd_flags(O_CLOEXEC); 3423 if (fd < 0) 3424 return fd; 3425 3426 3427 id = bpf_link_alloc_id(link); 3428 if (id < 0) { 3429 put_unused_fd(fd); 3430 return id; 3431 } 3432 3433 file = anon_inode_getfile("bpf_link", 3434 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3435 link, O_CLOEXEC); 3436 if (IS_ERR(file)) { 3437 bpf_link_free_id(id); 3438 put_unused_fd(fd); 3439 return PTR_ERR(file); 3440 } 3441 3442 primer->link = link; 3443 primer->file = file; 3444 primer->fd = fd; 3445 primer->id = id; 3446 return 0; 3447 } 3448 3449 int bpf_link_settle(struct bpf_link_primer *primer) 3450 { 3451 /* make bpf_link fetchable by ID */ 3452 spin_lock_bh(&link_idr_lock); 3453 primer->link->id = primer->id; 3454 spin_unlock_bh(&link_idr_lock); 3455 /* make bpf_link fetchable by FD */ 3456 fd_install(primer->fd, primer->file); 3457 /* pass through installed FD */ 3458 return primer->fd; 3459 } 3460 3461 int bpf_link_new_fd(struct bpf_link *link) 3462 { 3463 return anon_inode_getfd("bpf-link", 3464 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3465 link, O_CLOEXEC); 3466 } 3467 3468 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 3469 { 3470 CLASS(fd, f)(ufd); 3471 struct bpf_link *link; 3472 3473 if (fd_empty(f)) 3474 return ERR_PTR(-EBADF); 3475 if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) 3476 return ERR_PTR(-EINVAL); 3477 3478 link = fd_file(f)->private_data; 3479 bpf_link_inc(link); 3480 return link; 3481 } 3482 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); 3483 3484 static void bpf_tracing_link_release(struct bpf_link *link) 3485 { 3486 struct bpf_tracing_link *tr_link = 3487 container_of(link, struct bpf_tracing_link, link.link); 3488 3489 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3490 tr_link->trampoline, 3491 tr_link->tgt_prog)); 3492 3493 bpf_trampoline_put(tr_link->trampoline); 3494 3495 /* tgt_prog is NULL if target is a kernel function */ 3496 if (tr_link->tgt_prog) 3497 bpf_prog_put(tr_link->tgt_prog); 3498 } 3499 3500 static void bpf_tracing_link_dealloc(struct bpf_link *link) 3501 { 3502 struct bpf_tracing_link *tr_link = 3503 container_of(link, struct bpf_tracing_link, link.link); 3504 3505 kfree(tr_link); 3506 } 3507 3508 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 3509 struct seq_file *seq) 3510 { 3511 struct bpf_tracing_link *tr_link = 3512 container_of(link, struct bpf_tracing_link, link.link); 3513 u32 target_btf_id, target_obj_id; 3514 3515 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3516 &target_obj_id, &target_btf_id); 3517 seq_printf(seq, 3518 "attach_type:\t%d\n" 3519 "target_obj_id:\t%u\n" 3520 "target_btf_id:\t%u\n" 3521 "cookie:\t%llu\n", 3522 link->attach_type, 3523 target_obj_id, 3524 target_btf_id, 3525 tr_link->link.cookie); 3526 } 3527 3528 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 3529 struct bpf_link_info *info) 3530 { 3531 struct bpf_tracing_link *tr_link = 3532 container_of(link, struct bpf_tracing_link, link.link); 3533 3534 info->tracing.attach_type = link->attach_type; 3535 info->tracing.cookie = tr_link->link.cookie; 3536 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3537 &info->tracing.target_obj_id, 3538 &info->tracing.target_btf_id); 3539 3540 return 0; 3541 } 3542 3543 static const struct bpf_link_ops bpf_tracing_link_lops = { 3544 .release = bpf_tracing_link_release, 3545 .dealloc = bpf_tracing_link_dealloc, 3546 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3547 .fill_link_info = bpf_tracing_link_fill_link_info, 3548 }; 3549 3550 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3551 int tgt_prog_fd, 3552 u32 btf_id, 3553 u64 bpf_cookie, 3554 enum bpf_attach_type attach_type) 3555 { 3556 struct bpf_link_primer link_primer; 3557 struct bpf_prog *tgt_prog = NULL; 3558 struct bpf_trampoline *tr = NULL; 3559 struct bpf_tracing_link *link; 3560 u64 key = 0; 3561 int err; 3562 3563 switch (prog->type) { 3564 case BPF_PROG_TYPE_TRACING: 3565 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3566 prog->expected_attach_type != BPF_TRACE_FEXIT && 3567 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3568 err = -EINVAL; 3569 goto out_put_prog; 3570 } 3571 break; 3572 case BPF_PROG_TYPE_EXT: 3573 if (prog->expected_attach_type != 0) { 3574 err = -EINVAL; 3575 goto out_put_prog; 3576 } 3577 break; 3578 case BPF_PROG_TYPE_LSM: 3579 if (prog->expected_attach_type != BPF_LSM_MAC) { 3580 err = -EINVAL; 3581 goto out_put_prog; 3582 } 3583 break; 3584 default: 3585 err = -EINVAL; 3586 goto out_put_prog; 3587 } 3588 3589 if (!!tgt_prog_fd != !!btf_id) { 3590 err = -EINVAL; 3591 goto out_put_prog; 3592 } 3593 3594 if (tgt_prog_fd) { 3595 /* 3596 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this 3597 * part would be changed to implement the same for 3598 * BPF_PROG_TYPE_TRACING, do not forget to update the way how 3599 * attach_tracing_prog flag is set. 3600 */ 3601 if (prog->type != BPF_PROG_TYPE_EXT) { 3602 err = -EINVAL; 3603 goto out_put_prog; 3604 } 3605 3606 tgt_prog = bpf_prog_get(tgt_prog_fd); 3607 if (IS_ERR(tgt_prog)) { 3608 err = PTR_ERR(tgt_prog); 3609 tgt_prog = NULL; 3610 goto out_put_prog; 3611 } 3612 3613 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3614 } 3615 3616 link = kzalloc(sizeof(*link), GFP_USER); 3617 if (!link) { 3618 err = -ENOMEM; 3619 goto out_put_prog; 3620 } 3621 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3622 &bpf_tracing_link_lops, prog, attach_type); 3623 3624 link->link.cookie = bpf_cookie; 3625 3626 mutex_lock(&prog->aux->dst_mutex); 3627 3628 /* There are a few possible cases here: 3629 * 3630 * - if prog->aux->dst_trampoline is set, the program was just loaded 3631 * and not yet attached to anything, so we can use the values stored 3632 * in prog->aux 3633 * 3634 * - if prog->aux->dst_trampoline is NULL, the program has already been 3635 * attached to a target and its initial target was cleared (below) 3636 * 3637 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3638 * target_btf_id using the link_create API. 3639 * 3640 * - if tgt_prog == NULL when this function was called using the old 3641 * raw_tracepoint_open API, and we need a target from prog->aux 3642 * 3643 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3644 * was detached and is going for re-attachment. 3645 * 3646 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf 3647 * are NULL, then program was already attached and user did not provide 3648 * tgt_prog_fd so we have no way to find out or create trampoline 3649 */ 3650 if (!prog->aux->dst_trampoline && !tgt_prog) { 3651 /* 3652 * Allow re-attach for TRACING and LSM programs. If it's 3653 * currently linked, bpf_trampoline_link_prog will fail. 3654 * EXT programs need to specify tgt_prog_fd, so they 3655 * re-attach in separate code path. 3656 */ 3657 if (prog->type != BPF_PROG_TYPE_TRACING && 3658 prog->type != BPF_PROG_TYPE_LSM) { 3659 err = -EINVAL; 3660 goto out_unlock; 3661 } 3662 /* We can allow re-attach only if we have valid attach_btf. */ 3663 if (!prog->aux->attach_btf) { 3664 err = -EINVAL; 3665 goto out_unlock; 3666 } 3667 btf_id = prog->aux->attach_btf_id; 3668 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3669 } 3670 3671 if (!prog->aux->dst_trampoline || 3672 (key && key != prog->aux->dst_trampoline->key)) { 3673 /* If there is no saved target, or the specified target is 3674 * different from the destination specified at load time, we 3675 * need a new trampoline and a check for compatibility 3676 */ 3677 struct bpf_attach_target_info tgt_info = {}; 3678 3679 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3680 &tgt_info); 3681 if (err) 3682 goto out_unlock; 3683 3684 if (tgt_info.tgt_mod) { 3685 module_put(prog->aux->mod); 3686 prog->aux->mod = tgt_info.tgt_mod; 3687 } 3688 3689 tr = bpf_trampoline_get(key, &tgt_info); 3690 if (!tr) { 3691 err = -ENOMEM; 3692 goto out_unlock; 3693 } 3694 } else { 3695 /* The caller didn't specify a target, or the target was the 3696 * same as the destination supplied during program load. This 3697 * means we can reuse the trampoline and reference from program 3698 * load time, and there is no need to allocate a new one. This 3699 * can only happen once for any program, as the saved values in 3700 * prog->aux are cleared below. 3701 */ 3702 tr = prog->aux->dst_trampoline; 3703 tgt_prog = prog->aux->dst_prog; 3704 } 3705 3706 err = bpf_link_prime(&link->link.link, &link_primer); 3707 if (err) 3708 goto out_unlock; 3709 3710 err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3711 if (err) { 3712 bpf_link_cleanup(&link_primer); 3713 link = NULL; 3714 goto out_unlock; 3715 } 3716 3717 link->tgt_prog = tgt_prog; 3718 link->trampoline = tr; 3719 3720 /* Always clear the trampoline and target prog from prog->aux to make 3721 * sure the original attach destination is not kept alive after a 3722 * program is (re-)attached to another target. 3723 */ 3724 if (prog->aux->dst_prog && 3725 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3726 /* got extra prog ref from syscall, or attaching to different prog */ 3727 bpf_prog_put(prog->aux->dst_prog); 3728 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3729 /* we allocated a new trampoline, so free the old one */ 3730 bpf_trampoline_put(prog->aux->dst_trampoline); 3731 3732 prog->aux->dst_prog = NULL; 3733 prog->aux->dst_trampoline = NULL; 3734 mutex_unlock(&prog->aux->dst_mutex); 3735 3736 return bpf_link_settle(&link_primer); 3737 out_unlock: 3738 if (tr && tr != prog->aux->dst_trampoline) 3739 bpf_trampoline_put(tr); 3740 mutex_unlock(&prog->aux->dst_mutex); 3741 kfree(link); 3742 out_put_prog: 3743 if (tgt_prog_fd && tgt_prog) 3744 bpf_prog_put(tgt_prog); 3745 return err; 3746 } 3747 3748 static void bpf_raw_tp_link_release(struct bpf_link *link) 3749 { 3750 struct bpf_raw_tp_link *raw_tp = 3751 container_of(link, struct bpf_raw_tp_link, link); 3752 3753 bpf_probe_unregister(raw_tp->btp, raw_tp); 3754 bpf_put_raw_tracepoint(raw_tp->btp); 3755 } 3756 3757 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3758 { 3759 struct bpf_raw_tp_link *raw_tp = 3760 container_of(link, struct bpf_raw_tp_link, link); 3761 3762 kfree(raw_tp); 3763 } 3764 3765 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3766 struct seq_file *seq) 3767 { 3768 struct bpf_raw_tp_link *raw_tp_link = 3769 container_of(link, struct bpf_raw_tp_link, link); 3770 3771 seq_printf(seq, 3772 "tp_name:\t%s\n" 3773 "cookie:\t%llu\n", 3774 raw_tp_link->btp->tp->name, 3775 raw_tp_link->cookie); 3776 } 3777 3778 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, 3779 u32 len) 3780 { 3781 if (ulen >= len + 1) { 3782 if (copy_to_user(ubuf, buf, len + 1)) 3783 return -EFAULT; 3784 } else { 3785 char zero = '\0'; 3786 3787 if (copy_to_user(ubuf, buf, ulen - 1)) 3788 return -EFAULT; 3789 if (put_user(zero, ubuf + ulen - 1)) 3790 return -EFAULT; 3791 return -ENOSPC; 3792 } 3793 3794 return 0; 3795 } 3796 3797 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3798 struct bpf_link_info *info) 3799 { 3800 struct bpf_raw_tp_link *raw_tp_link = 3801 container_of(link, struct bpf_raw_tp_link, link); 3802 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3803 const char *tp_name = raw_tp_link->btp->tp->name; 3804 u32 ulen = info->raw_tracepoint.tp_name_len; 3805 size_t tp_len = strlen(tp_name); 3806 3807 if (!ulen ^ !ubuf) 3808 return -EINVAL; 3809 3810 info->raw_tracepoint.tp_name_len = tp_len + 1; 3811 info->raw_tracepoint.cookie = raw_tp_link->cookie; 3812 3813 if (!ubuf) 3814 return 0; 3815 3816 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len); 3817 } 3818 3819 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3820 .release = bpf_raw_tp_link_release, 3821 .dealloc_deferred = bpf_raw_tp_link_dealloc, 3822 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3823 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3824 }; 3825 3826 #ifdef CONFIG_PERF_EVENTS 3827 struct bpf_perf_link { 3828 struct bpf_link link; 3829 struct file *perf_file; 3830 }; 3831 3832 static void bpf_perf_link_release(struct bpf_link *link) 3833 { 3834 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3835 struct perf_event *event = perf_link->perf_file->private_data; 3836 3837 perf_event_free_bpf_prog(event); 3838 fput(perf_link->perf_file); 3839 } 3840 3841 static void bpf_perf_link_dealloc(struct bpf_link *link) 3842 { 3843 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3844 3845 kfree(perf_link); 3846 } 3847 3848 static int bpf_perf_link_fill_common(const struct perf_event *event, 3849 char __user *uname, u32 *ulenp, 3850 u64 *probe_offset, u64 *probe_addr, 3851 u32 *fd_type, unsigned long *missed) 3852 { 3853 const char *buf; 3854 u32 prog_id, ulen; 3855 size_t len; 3856 int err; 3857 3858 ulen = *ulenp; 3859 if (!ulen ^ !uname) 3860 return -EINVAL; 3861 3862 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, 3863 probe_offset, probe_addr, missed); 3864 if (err) 3865 return err; 3866 3867 if (buf) { 3868 len = strlen(buf); 3869 *ulenp = len + 1; 3870 } else { 3871 *ulenp = 1; 3872 } 3873 if (!uname) 3874 return 0; 3875 3876 if (buf) { 3877 err = bpf_copy_to_user(uname, buf, ulen, len); 3878 if (err) 3879 return err; 3880 } else { 3881 char zero = '\0'; 3882 3883 if (put_user(zero, uname)) 3884 return -EFAULT; 3885 } 3886 return 0; 3887 } 3888 3889 #ifdef CONFIG_KPROBE_EVENTS 3890 static int bpf_perf_link_fill_kprobe(const struct perf_event *event, 3891 struct bpf_link_info *info) 3892 { 3893 unsigned long missed; 3894 char __user *uname; 3895 u64 addr, offset; 3896 u32 ulen, type; 3897 int err; 3898 3899 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); 3900 ulen = info->perf_event.kprobe.name_len; 3901 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, 3902 &type, &missed); 3903 if (err) 3904 return err; 3905 if (type == BPF_FD_TYPE_KRETPROBE) 3906 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; 3907 else 3908 info->perf_event.type = BPF_PERF_EVENT_KPROBE; 3909 info->perf_event.kprobe.name_len = ulen; 3910 info->perf_event.kprobe.offset = offset; 3911 info->perf_event.kprobe.missed = missed; 3912 if (!kallsyms_show_value(current_cred())) 3913 addr = 0; 3914 info->perf_event.kprobe.addr = addr; 3915 info->perf_event.kprobe.cookie = event->bpf_cookie; 3916 return 0; 3917 } 3918 3919 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, 3920 struct seq_file *seq) 3921 { 3922 const char *name; 3923 int err; 3924 u32 prog_id, type; 3925 u64 offset, addr; 3926 unsigned long missed; 3927 3928 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3929 &offset, &addr, &missed); 3930 if (err) 3931 return; 3932 3933 seq_printf(seq, 3934 "name:\t%s\n" 3935 "offset:\t%#llx\n" 3936 "missed:\t%lu\n" 3937 "addr:\t%#llx\n" 3938 "event_type:\t%s\n" 3939 "cookie:\t%llu\n", 3940 name, offset, missed, addr, 3941 type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", 3942 event->bpf_cookie); 3943 } 3944 #endif 3945 3946 #ifdef CONFIG_UPROBE_EVENTS 3947 static int bpf_perf_link_fill_uprobe(const struct perf_event *event, 3948 struct bpf_link_info *info) 3949 { 3950 u64 ref_ctr_offset, offset; 3951 char __user *uname; 3952 u32 ulen, type; 3953 int err; 3954 3955 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); 3956 ulen = info->perf_event.uprobe.name_len; 3957 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, 3958 &type, NULL); 3959 if (err) 3960 return err; 3961 3962 if (type == BPF_FD_TYPE_URETPROBE) 3963 info->perf_event.type = BPF_PERF_EVENT_URETPROBE; 3964 else 3965 info->perf_event.type = BPF_PERF_EVENT_UPROBE; 3966 info->perf_event.uprobe.name_len = ulen; 3967 info->perf_event.uprobe.offset = offset; 3968 info->perf_event.uprobe.cookie = event->bpf_cookie; 3969 info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; 3970 return 0; 3971 } 3972 3973 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, 3974 struct seq_file *seq) 3975 { 3976 const char *name; 3977 int err; 3978 u32 prog_id, type; 3979 u64 offset, ref_ctr_offset; 3980 unsigned long missed; 3981 3982 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3983 &offset, &ref_ctr_offset, &missed); 3984 if (err) 3985 return; 3986 3987 seq_printf(seq, 3988 "name:\t%s\n" 3989 "offset:\t%#llx\n" 3990 "ref_ctr_offset:\t%#llx\n" 3991 "event_type:\t%s\n" 3992 "cookie:\t%llu\n", 3993 name, offset, ref_ctr_offset, 3994 type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", 3995 event->bpf_cookie); 3996 } 3997 #endif 3998 3999 static int bpf_perf_link_fill_probe(const struct perf_event *event, 4000 struct bpf_link_info *info) 4001 { 4002 #ifdef CONFIG_KPROBE_EVENTS 4003 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4004 return bpf_perf_link_fill_kprobe(event, info); 4005 #endif 4006 #ifdef CONFIG_UPROBE_EVENTS 4007 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4008 return bpf_perf_link_fill_uprobe(event, info); 4009 #endif 4010 return -EOPNOTSUPP; 4011 } 4012 4013 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, 4014 struct bpf_link_info *info) 4015 { 4016 char __user *uname; 4017 u32 ulen; 4018 int err; 4019 4020 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); 4021 ulen = info->perf_event.tracepoint.name_len; 4022 err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); 4023 if (err) 4024 return err; 4025 4026 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; 4027 info->perf_event.tracepoint.name_len = ulen; 4028 info->perf_event.tracepoint.cookie = event->bpf_cookie; 4029 return 0; 4030 } 4031 4032 static int bpf_perf_link_fill_perf_event(const struct perf_event *event, 4033 struct bpf_link_info *info) 4034 { 4035 info->perf_event.event.type = event->attr.type; 4036 info->perf_event.event.config = event->attr.config; 4037 info->perf_event.event.cookie = event->bpf_cookie; 4038 info->perf_event.type = BPF_PERF_EVENT_EVENT; 4039 return 0; 4040 } 4041 4042 static int bpf_perf_link_fill_link_info(const struct bpf_link *link, 4043 struct bpf_link_info *info) 4044 { 4045 struct bpf_perf_link *perf_link; 4046 const struct perf_event *event; 4047 4048 perf_link = container_of(link, struct bpf_perf_link, link); 4049 event = perf_get_event(perf_link->perf_file); 4050 if (IS_ERR(event)) 4051 return PTR_ERR(event); 4052 4053 switch (event->prog->type) { 4054 case BPF_PROG_TYPE_PERF_EVENT: 4055 return bpf_perf_link_fill_perf_event(event, info); 4056 case BPF_PROG_TYPE_TRACEPOINT: 4057 return bpf_perf_link_fill_tracepoint(event, info); 4058 case BPF_PROG_TYPE_KPROBE: 4059 return bpf_perf_link_fill_probe(event, info); 4060 default: 4061 return -EOPNOTSUPP; 4062 } 4063 } 4064 4065 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, 4066 struct seq_file *seq) 4067 { 4068 seq_printf(seq, 4069 "type:\t%u\n" 4070 "config:\t%llu\n" 4071 "event_type:\t%s\n" 4072 "cookie:\t%llu\n", 4073 event->attr.type, event->attr.config, 4074 "event", event->bpf_cookie); 4075 } 4076 4077 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, 4078 struct seq_file *seq) 4079 { 4080 int err; 4081 const char *name; 4082 u32 prog_id; 4083 4084 err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, 4085 NULL, NULL); 4086 if (err) 4087 return; 4088 4089 seq_printf(seq, 4090 "tp_name:\t%s\n" 4091 "event_type:\t%s\n" 4092 "cookie:\t%llu\n", 4093 name, "tracepoint", event->bpf_cookie); 4094 } 4095 4096 static void bpf_probe_link_show_fdinfo(const struct perf_event *event, 4097 struct seq_file *seq) 4098 { 4099 #ifdef CONFIG_KPROBE_EVENTS 4100 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4101 return bpf_perf_link_fdinfo_kprobe(event, seq); 4102 #endif 4103 4104 #ifdef CONFIG_UPROBE_EVENTS 4105 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4106 return bpf_perf_link_fdinfo_uprobe(event, seq); 4107 #endif 4108 } 4109 4110 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, 4111 struct seq_file *seq) 4112 { 4113 struct bpf_perf_link *perf_link; 4114 const struct perf_event *event; 4115 4116 perf_link = container_of(link, struct bpf_perf_link, link); 4117 event = perf_get_event(perf_link->perf_file); 4118 if (IS_ERR(event)) 4119 return; 4120 4121 switch (event->prog->type) { 4122 case BPF_PROG_TYPE_PERF_EVENT: 4123 return bpf_perf_event_link_show_fdinfo(event, seq); 4124 case BPF_PROG_TYPE_TRACEPOINT: 4125 return bpf_tracepoint_link_show_fdinfo(event, seq); 4126 case BPF_PROG_TYPE_KPROBE: 4127 return bpf_probe_link_show_fdinfo(event, seq); 4128 default: 4129 return; 4130 } 4131 } 4132 4133 static const struct bpf_link_ops bpf_perf_link_lops = { 4134 .release = bpf_perf_link_release, 4135 .dealloc = bpf_perf_link_dealloc, 4136 .fill_link_info = bpf_perf_link_fill_link_info, 4137 .show_fdinfo = bpf_perf_link_show_fdinfo, 4138 }; 4139 4140 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4141 { 4142 struct bpf_link_primer link_primer; 4143 struct bpf_perf_link *link; 4144 struct perf_event *event; 4145 struct file *perf_file; 4146 int err; 4147 4148 if (attr->link_create.flags) 4149 return -EINVAL; 4150 4151 perf_file = perf_event_get(attr->link_create.target_fd); 4152 if (IS_ERR(perf_file)) 4153 return PTR_ERR(perf_file); 4154 4155 link = kzalloc(sizeof(*link), GFP_USER); 4156 if (!link) { 4157 err = -ENOMEM; 4158 goto out_put_file; 4159 } 4160 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog, 4161 attr->link_create.attach_type); 4162 link->perf_file = perf_file; 4163 4164 err = bpf_link_prime(&link->link, &link_primer); 4165 if (err) { 4166 kfree(link); 4167 goto out_put_file; 4168 } 4169 4170 event = perf_file->private_data; 4171 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 4172 if (err) { 4173 bpf_link_cleanup(&link_primer); 4174 goto out_put_file; 4175 } 4176 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 4177 bpf_prog_inc(prog); 4178 4179 return bpf_link_settle(&link_primer); 4180 4181 out_put_file: 4182 fput(perf_file); 4183 return err; 4184 } 4185 #else 4186 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4187 { 4188 return -EOPNOTSUPP; 4189 } 4190 #endif /* CONFIG_PERF_EVENTS */ 4191 4192 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 4193 const char __user *user_tp_name, u64 cookie, 4194 enum bpf_attach_type attach_type) 4195 { 4196 struct bpf_link_primer link_primer; 4197 struct bpf_raw_tp_link *link; 4198 struct bpf_raw_event_map *btp; 4199 const char *tp_name; 4200 char buf[128]; 4201 int err; 4202 4203 switch (prog->type) { 4204 case BPF_PROG_TYPE_TRACING: 4205 case BPF_PROG_TYPE_EXT: 4206 case BPF_PROG_TYPE_LSM: 4207 if (user_tp_name) 4208 /* The attach point for this category of programs 4209 * should be specified via btf_id during program load. 4210 */ 4211 return -EINVAL; 4212 if (prog->type == BPF_PROG_TYPE_TRACING && 4213 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 4214 tp_name = prog->aux->attach_func_name; 4215 break; 4216 } 4217 return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type); 4218 case BPF_PROG_TYPE_RAW_TRACEPOINT: 4219 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 4220 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 4221 return -EFAULT; 4222 buf[sizeof(buf) - 1] = 0; 4223 tp_name = buf; 4224 break; 4225 default: 4226 return -EINVAL; 4227 } 4228 4229 btp = bpf_get_raw_tracepoint(tp_name); 4230 if (!btp) 4231 return -ENOENT; 4232 4233 link = kzalloc(sizeof(*link), GFP_USER); 4234 if (!link) { 4235 err = -ENOMEM; 4236 goto out_put_btp; 4237 } 4238 bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 4239 &bpf_raw_tp_link_lops, prog, attach_type, 4240 tracepoint_is_faultable(btp->tp)); 4241 link->btp = btp; 4242 link->cookie = cookie; 4243 4244 err = bpf_link_prime(&link->link, &link_primer); 4245 if (err) { 4246 kfree(link); 4247 goto out_put_btp; 4248 } 4249 4250 err = bpf_probe_register(link->btp, link); 4251 if (err) { 4252 bpf_link_cleanup(&link_primer); 4253 goto out_put_btp; 4254 } 4255 4256 return bpf_link_settle(&link_primer); 4257 4258 out_put_btp: 4259 bpf_put_raw_tracepoint(btp); 4260 return err; 4261 } 4262 4263 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie 4264 4265 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 4266 { 4267 struct bpf_prog *prog; 4268 void __user *tp_name; 4269 __u64 cookie; 4270 int fd; 4271 4272 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 4273 return -EINVAL; 4274 4275 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 4276 if (IS_ERR(prog)) 4277 return PTR_ERR(prog); 4278 4279 tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); 4280 cookie = attr->raw_tracepoint.cookie; 4281 fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type); 4282 if (fd < 0) 4283 bpf_prog_put(prog); 4284 return fd; 4285 } 4286 4287 static enum bpf_prog_type 4288 attach_type_to_prog_type(enum bpf_attach_type attach_type) 4289 { 4290 switch (attach_type) { 4291 case BPF_CGROUP_INET_INGRESS: 4292 case BPF_CGROUP_INET_EGRESS: 4293 return BPF_PROG_TYPE_CGROUP_SKB; 4294 case BPF_CGROUP_INET_SOCK_CREATE: 4295 case BPF_CGROUP_INET_SOCK_RELEASE: 4296 case BPF_CGROUP_INET4_POST_BIND: 4297 case BPF_CGROUP_INET6_POST_BIND: 4298 return BPF_PROG_TYPE_CGROUP_SOCK; 4299 case BPF_CGROUP_INET4_BIND: 4300 case BPF_CGROUP_INET6_BIND: 4301 case BPF_CGROUP_INET4_CONNECT: 4302 case BPF_CGROUP_INET6_CONNECT: 4303 case BPF_CGROUP_UNIX_CONNECT: 4304 case BPF_CGROUP_INET4_GETPEERNAME: 4305 case BPF_CGROUP_INET6_GETPEERNAME: 4306 case BPF_CGROUP_UNIX_GETPEERNAME: 4307 case BPF_CGROUP_INET4_GETSOCKNAME: 4308 case BPF_CGROUP_INET6_GETSOCKNAME: 4309 case BPF_CGROUP_UNIX_GETSOCKNAME: 4310 case BPF_CGROUP_UDP4_SENDMSG: 4311 case BPF_CGROUP_UDP6_SENDMSG: 4312 case BPF_CGROUP_UNIX_SENDMSG: 4313 case BPF_CGROUP_UDP4_RECVMSG: 4314 case BPF_CGROUP_UDP6_RECVMSG: 4315 case BPF_CGROUP_UNIX_RECVMSG: 4316 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 4317 case BPF_CGROUP_SOCK_OPS: 4318 return BPF_PROG_TYPE_SOCK_OPS; 4319 case BPF_CGROUP_DEVICE: 4320 return BPF_PROG_TYPE_CGROUP_DEVICE; 4321 case BPF_SK_MSG_VERDICT: 4322 return BPF_PROG_TYPE_SK_MSG; 4323 case BPF_SK_SKB_STREAM_PARSER: 4324 case BPF_SK_SKB_STREAM_VERDICT: 4325 case BPF_SK_SKB_VERDICT: 4326 return BPF_PROG_TYPE_SK_SKB; 4327 case BPF_LIRC_MODE2: 4328 return BPF_PROG_TYPE_LIRC_MODE2; 4329 case BPF_FLOW_DISSECTOR: 4330 return BPF_PROG_TYPE_FLOW_DISSECTOR; 4331 case BPF_CGROUP_SYSCTL: 4332 return BPF_PROG_TYPE_CGROUP_SYSCTL; 4333 case BPF_CGROUP_GETSOCKOPT: 4334 case BPF_CGROUP_SETSOCKOPT: 4335 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 4336 case BPF_TRACE_ITER: 4337 case BPF_TRACE_RAW_TP: 4338 case BPF_TRACE_FENTRY: 4339 case BPF_TRACE_FEXIT: 4340 case BPF_MODIFY_RETURN: 4341 return BPF_PROG_TYPE_TRACING; 4342 case BPF_LSM_MAC: 4343 return BPF_PROG_TYPE_LSM; 4344 case BPF_SK_LOOKUP: 4345 return BPF_PROG_TYPE_SK_LOOKUP; 4346 case BPF_XDP: 4347 return BPF_PROG_TYPE_XDP; 4348 case BPF_LSM_CGROUP: 4349 return BPF_PROG_TYPE_LSM; 4350 case BPF_TCX_INGRESS: 4351 case BPF_TCX_EGRESS: 4352 case BPF_NETKIT_PRIMARY: 4353 case BPF_NETKIT_PEER: 4354 return BPF_PROG_TYPE_SCHED_CLS; 4355 default: 4356 return BPF_PROG_TYPE_UNSPEC; 4357 } 4358 } 4359 4360 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 4361 enum bpf_attach_type attach_type) 4362 { 4363 enum bpf_prog_type ptype; 4364 4365 switch (prog->type) { 4366 case BPF_PROG_TYPE_CGROUP_SOCK: 4367 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4368 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4369 case BPF_PROG_TYPE_SK_LOOKUP: 4370 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 4371 case BPF_PROG_TYPE_CGROUP_SKB: 4372 if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) 4373 /* cg-skb progs can be loaded by unpriv user. 4374 * check permissions at attach time. 4375 */ 4376 return -EPERM; 4377 4378 ptype = attach_type_to_prog_type(attach_type); 4379 if (prog->type != ptype) 4380 return -EINVAL; 4381 4382 return prog->enforce_expected_attach_type && 4383 prog->expected_attach_type != attach_type ? 4384 -EINVAL : 0; 4385 case BPF_PROG_TYPE_EXT: 4386 return 0; 4387 case BPF_PROG_TYPE_NETFILTER: 4388 if (attach_type != BPF_NETFILTER) 4389 return -EINVAL; 4390 return 0; 4391 case BPF_PROG_TYPE_PERF_EVENT: 4392 case BPF_PROG_TYPE_TRACEPOINT: 4393 if (attach_type != BPF_PERF_EVENT) 4394 return -EINVAL; 4395 return 0; 4396 case BPF_PROG_TYPE_KPROBE: 4397 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 4398 attach_type != BPF_TRACE_KPROBE_MULTI) 4399 return -EINVAL; 4400 if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && 4401 attach_type != BPF_TRACE_KPROBE_SESSION) 4402 return -EINVAL; 4403 if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 4404 attach_type != BPF_TRACE_UPROBE_MULTI) 4405 return -EINVAL; 4406 if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION && 4407 attach_type != BPF_TRACE_UPROBE_SESSION) 4408 return -EINVAL; 4409 if (attach_type != BPF_PERF_EVENT && 4410 attach_type != BPF_TRACE_KPROBE_MULTI && 4411 attach_type != BPF_TRACE_KPROBE_SESSION && 4412 attach_type != BPF_TRACE_UPROBE_MULTI && 4413 attach_type != BPF_TRACE_UPROBE_SESSION) 4414 return -EINVAL; 4415 return 0; 4416 case BPF_PROG_TYPE_SCHED_CLS: 4417 if (attach_type != BPF_TCX_INGRESS && 4418 attach_type != BPF_TCX_EGRESS && 4419 attach_type != BPF_NETKIT_PRIMARY && 4420 attach_type != BPF_NETKIT_PEER) 4421 return -EINVAL; 4422 return 0; 4423 default: 4424 ptype = attach_type_to_prog_type(attach_type); 4425 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 4426 return -EINVAL; 4427 return 0; 4428 } 4429 } 4430 4431 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, 4432 bool check_atype) 4433 { 4434 switch (ptype) { 4435 case BPF_PROG_TYPE_CGROUP_DEVICE: 4436 case BPF_PROG_TYPE_CGROUP_SKB: 4437 case BPF_PROG_TYPE_CGROUP_SOCK: 4438 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4439 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4440 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4441 case BPF_PROG_TYPE_SOCK_OPS: 4442 return true; 4443 case BPF_PROG_TYPE_LSM: 4444 return check_atype ? atype == BPF_LSM_CGROUP : true; 4445 default: 4446 return false; 4447 } 4448 } 4449 4450 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision 4451 4452 #define BPF_F_ATTACH_MASK_BASE \ 4453 (BPF_F_ALLOW_OVERRIDE | \ 4454 BPF_F_ALLOW_MULTI | \ 4455 BPF_F_REPLACE | \ 4456 BPF_F_PREORDER) 4457 4458 #define BPF_F_ATTACH_MASK_MPROG \ 4459 (BPF_F_REPLACE | \ 4460 BPF_F_BEFORE | \ 4461 BPF_F_AFTER | \ 4462 BPF_F_ID | \ 4463 BPF_F_LINK) 4464 4465 static int bpf_prog_attach(const union bpf_attr *attr) 4466 { 4467 enum bpf_prog_type ptype; 4468 struct bpf_prog *prog; 4469 int ret; 4470 4471 if (CHECK_ATTR(BPF_PROG_ATTACH)) 4472 return -EINVAL; 4473 4474 ptype = attach_type_to_prog_type(attr->attach_type); 4475 if (ptype == BPF_PROG_TYPE_UNSPEC) 4476 return -EINVAL; 4477 if (bpf_mprog_supported(ptype)) { 4478 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4479 return -EINVAL; 4480 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4481 if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) 4482 return -EINVAL; 4483 } else { 4484 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) 4485 return -EINVAL; 4486 if (attr->relative_fd || 4487 attr->expected_revision) 4488 return -EINVAL; 4489 } 4490 4491 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4492 if (IS_ERR(prog)) 4493 return PTR_ERR(prog); 4494 4495 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 4496 bpf_prog_put(prog); 4497 return -EINVAL; 4498 } 4499 4500 if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { 4501 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 4502 goto out; 4503 } 4504 4505 switch (ptype) { 4506 case BPF_PROG_TYPE_SK_SKB: 4507 case BPF_PROG_TYPE_SK_MSG: 4508 ret = sock_map_get_from_fd(attr, prog); 4509 break; 4510 case BPF_PROG_TYPE_LIRC_MODE2: 4511 ret = lirc_prog_attach(attr, prog); 4512 break; 4513 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4514 ret = netns_bpf_prog_attach(attr, prog); 4515 break; 4516 case BPF_PROG_TYPE_SCHED_CLS: 4517 if (attr->attach_type == BPF_TCX_INGRESS || 4518 attr->attach_type == BPF_TCX_EGRESS) 4519 ret = tcx_prog_attach(attr, prog); 4520 else 4521 ret = netkit_prog_attach(attr, prog); 4522 break; 4523 default: 4524 ret = -EINVAL; 4525 } 4526 out: 4527 if (ret) 4528 bpf_prog_put(prog); 4529 return ret; 4530 } 4531 4532 #define BPF_PROG_DETACH_LAST_FIELD expected_revision 4533 4534 static int bpf_prog_detach(const union bpf_attr *attr) 4535 { 4536 struct bpf_prog *prog = NULL; 4537 enum bpf_prog_type ptype; 4538 int ret; 4539 4540 if (CHECK_ATTR(BPF_PROG_DETACH)) 4541 return -EINVAL; 4542 4543 ptype = attach_type_to_prog_type(attr->attach_type); 4544 if (bpf_mprog_supported(ptype)) { 4545 if (ptype == BPF_PROG_TYPE_UNSPEC) 4546 return -EINVAL; 4547 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4548 return -EINVAL; 4549 if (attr->attach_bpf_fd) { 4550 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4551 if (IS_ERR(prog)) 4552 return PTR_ERR(prog); 4553 } 4554 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4555 if (attr->attach_flags || attr->relative_fd) 4556 return -EINVAL; 4557 } else if (attr->attach_flags || 4558 attr->relative_fd || 4559 attr->expected_revision) { 4560 return -EINVAL; 4561 } 4562 4563 switch (ptype) { 4564 case BPF_PROG_TYPE_SK_MSG: 4565 case BPF_PROG_TYPE_SK_SKB: 4566 ret = sock_map_prog_detach(attr, ptype); 4567 break; 4568 case BPF_PROG_TYPE_LIRC_MODE2: 4569 ret = lirc_prog_detach(attr); 4570 break; 4571 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4572 ret = netns_bpf_prog_detach(attr, ptype); 4573 break; 4574 case BPF_PROG_TYPE_CGROUP_DEVICE: 4575 case BPF_PROG_TYPE_CGROUP_SKB: 4576 case BPF_PROG_TYPE_CGROUP_SOCK: 4577 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4578 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4579 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4580 case BPF_PROG_TYPE_SOCK_OPS: 4581 case BPF_PROG_TYPE_LSM: 4582 ret = cgroup_bpf_prog_detach(attr, ptype); 4583 break; 4584 case BPF_PROG_TYPE_SCHED_CLS: 4585 if (attr->attach_type == BPF_TCX_INGRESS || 4586 attr->attach_type == BPF_TCX_EGRESS) 4587 ret = tcx_prog_detach(attr, prog); 4588 else 4589 ret = netkit_prog_detach(attr, prog); 4590 break; 4591 default: 4592 ret = -EINVAL; 4593 } 4594 4595 if (prog) 4596 bpf_prog_put(prog); 4597 return ret; 4598 } 4599 4600 #define BPF_PROG_QUERY_LAST_FIELD query.revision 4601 4602 static int bpf_prog_query(const union bpf_attr *attr, 4603 union bpf_attr __user *uattr) 4604 { 4605 if (!bpf_net_capable()) 4606 return -EPERM; 4607 if (CHECK_ATTR(BPF_PROG_QUERY)) 4608 return -EINVAL; 4609 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 4610 return -EINVAL; 4611 4612 switch (attr->query.attach_type) { 4613 case BPF_CGROUP_INET_INGRESS: 4614 case BPF_CGROUP_INET_EGRESS: 4615 case BPF_CGROUP_INET_SOCK_CREATE: 4616 case BPF_CGROUP_INET_SOCK_RELEASE: 4617 case BPF_CGROUP_INET4_BIND: 4618 case BPF_CGROUP_INET6_BIND: 4619 case BPF_CGROUP_INET4_POST_BIND: 4620 case BPF_CGROUP_INET6_POST_BIND: 4621 case BPF_CGROUP_INET4_CONNECT: 4622 case BPF_CGROUP_INET6_CONNECT: 4623 case BPF_CGROUP_UNIX_CONNECT: 4624 case BPF_CGROUP_INET4_GETPEERNAME: 4625 case BPF_CGROUP_INET6_GETPEERNAME: 4626 case BPF_CGROUP_UNIX_GETPEERNAME: 4627 case BPF_CGROUP_INET4_GETSOCKNAME: 4628 case BPF_CGROUP_INET6_GETSOCKNAME: 4629 case BPF_CGROUP_UNIX_GETSOCKNAME: 4630 case BPF_CGROUP_UDP4_SENDMSG: 4631 case BPF_CGROUP_UDP6_SENDMSG: 4632 case BPF_CGROUP_UNIX_SENDMSG: 4633 case BPF_CGROUP_UDP4_RECVMSG: 4634 case BPF_CGROUP_UDP6_RECVMSG: 4635 case BPF_CGROUP_UNIX_RECVMSG: 4636 case BPF_CGROUP_SOCK_OPS: 4637 case BPF_CGROUP_DEVICE: 4638 case BPF_CGROUP_SYSCTL: 4639 case BPF_CGROUP_GETSOCKOPT: 4640 case BPF_CGROUP_SETSOCKOPT: 4641 case BPF_LSM_CGROUP: 4642 return cgroup_bpf_prog_query(attr, uattr); 4643 case BPF_LIRC_MODE2: 4644 return lirc_prog_query(attr, uattr); 4645 case BPF_FLOW_DISSECTOR: 4646 case BPF_SK_LOOKUP: 4647 return netns_bpf_prog_query(attr, uattr); 4648 case BPF_SK_SKB_STREAM_PARSER: 4649 case BPF_SK_SKB_STREAM_VERDICT: 4650 case BPF_SK_MSG_VERDICT: 4651 case BPF_SK_SKB_VERDICT: 4652 return sock_map_bpf_prog_query(attr, uattr); 4653 case BPF_TCX_INGRESS: 4654 case BPF_TCX_EGRESS: 4655 return tcx_prog_query(attr, uattr); 4656 case BPF_NETKIT_PRIMARY: 4657 case BPF_NETKIT_PEER: 4658 return netkit_prog_query(attr, uattr); 4659 default: 4660 return -EINVAL; 4661 } 4662 } 4663 4664 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 4665 4666 static int bpf_prog_test_run(const union bpf_attr *attr, 4667 union bpf_attr __user *uattr) 4668 { 4669 struct bpf_prog *prog; 4670 int ret = -ENOTSUPP; 4671 4672 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 4673 return -EINVAL; 4674 4675 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 4676 (!attr->test.ctx_size_in && attr->test.ctx_in)) 4677 return -EINVAL; 4678 4679 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 4680 (!attr->test.ctx_size_out && attr->test.ctx_out)) 4681 return -EINVAL; 4682 4683 prog = bpf_prog_get(attr->test.prog_fd); 4684 if (IS_ERR(prog)) 4685 return PTR_ERR(prog); 4686 4687 if (prog->aux->ops->test_run) 4688 ret = prog->aux->ops->test_run(prog, attr, uattr); 4689 4690 bpf_prog_put(prog); 4691 return ret; 4692 } 4693 4694 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 4695 4696 static int bpf_obj_get_next_id(const union bpf_attr *attr, 4697 union bpf_attr __user *uattr, 4698 struct idr *idr, 4699 spinlock_t *lock) 4700 { 4701 u32 next_id = attr->start_id; 4702 int err = 0; 4703 4704 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 4705 return -EINVAL; 4706 4707 if (!capable(CAP_SYS_ADMIN)) 4708 return -EPERM; 4709 4710 next_id++; 4711 spin_lock_bh(lock); 4712 if (!idr_get_next(idr, &next_id)) 4713 err = -ENOENT; 4714 spin_unlock_bh(lock); 4715 4716 if (!err) 4717 err = put_user(next_id, &uattr->next_id); 4718 4719 return err; 4720 } 4721 4722 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 4723 { 4724 struct bpf_map *map; 4725 4726 spin_lock_bh(&map_idr_lock); 4727 again: 4728 map = idr_get_next(&map_idr, id); 4729 if (map) { 4730 map = __bpf_map_inc_not_zero(map, false); 4731 if (IS_ERR(map)) { 4732 (*id)++; 4733 goto again; 4734 } 4735 } 4736 spin_unlock_bh(&map_idr_lock); 4737 4738 return map; 4739 } 4740 4741 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 4742 { 4743 struct bpf_prog *prog; 4744 4745 spin_lock_bh(&prog_idr_lock); 4746 again: 4747 prog = idr_get_next(&prog_idr, id); 4748 if (prog) { 4749 prog = bpf_prog_inc_not_zero(prog); 4750 if (IS_ERR(prog)) { 4751 (*id)++; 4752 goto again; 4753 } 4754 } 4755 spin_unlock_bh(&prog_idr_lock); 4756 4757 return prog; 4758 } 4759 4760 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 4761 4762 struct bpf_prog *bpf_prog_by_id(u32 id) 4763 { 4764 struct bpf_prog *prog; 4765 4766 if (!id) 4767 return ERR_PTR(-ENOENT); 4768 4769 spin_lock_bh(&prog_idr_lock); 4770 prog = idr_find(&prog_idr, id); 4771 if (prog) 4772 prog = bpf_prog_inc_not_zero(prog); 4773 else 4774 prog = ERR_PTR(-ENOENT); 4775 spin_unlock_bh(&prog_idr_lock); 4776 return prog; 4777 } 4778 4779 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 4780 { 4781 struct bpf_prog *prog; 4782 u32 id = attr->prog_id; 4783 int fd; 4784 4785 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 4786 return -EINVAL; 4787 4788 if (!capable(CAP_SYS_ADMIN)) 4789 return -EPERM; 4790 4791 prog = bpf_prog_by_id(id); 4792 if (IS_ERR(prog)) 4793 return PTR_ERR(prog); 4794 4795 fd = bpf_prog_new_fd(prog); 4796 if (fd < 0) 4797 bpf_prog_put(prog); 4798 4799 return fd; 4800 } 4801 4802 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 4803 4804 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 4805 { 4806 struct bpf_map *map; 4807 u32 id = attr->map_id; 4808 int f_flags; 4809 int fd; 4810 4811 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 4812 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 4813 return -EINVAL; 4814 4815 if (!capable(CAP_SYS_ADMIN)) 4816 return -EPERM; 4817 4818 f_flags = bpf_get_file_flag(attr->open_flags); 4819 if (f_flags < 0) 4820 return f_flags; 4821 4822 spin_lock_bh(&map_idr_lock); 4823 map = idr_find(&map_idr, id); 4824 if (map) 4825 map = __bpf_map_inc_not_zero(map, true); 4826 else 4827 map = ERR_PTR(-ENOENT); 4828 spin_unlock_bh(&map_idr_lock); 4829 4830 if (IS_ERR(map)) 4831 return PTR_ERR(map); 4832 4833 fd = bpf_map_new_fd(map, f_flags); 4834 if (fd < 0) 4835 bpf_map_put_with_uref(map); 4836 4837 return fd; 4838 } 4839 4840 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 4841 unsigned long addr, u32 *off, 4842 u32 *type) 4843 { 4844 const struct bpf_map *map; 4845 int i; 4846 4847 mutex_lock(&prog->aux->used_maps_mutex); 4848 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 4849 map = prog->aux->used_maps[i]; 4850 if (map == (void *)addr) { 4851 *type = BPF_PSEUDO_MAP_FD; 4852 goto out; 4853 } 4854 if (!map->ops->map_direct_value_meta) 4855 continue; 4856 if (!map->ops->map_direct_value_meta(map, addr, off)) { 4857 *type = BPF_PSEUDO_MAP_VALUE; 4858 goto out; 4859 } 4860 } 4861 map = NULL; 4862 4863 out: 4864 mutex_unlock(&prog->aux->used_maps_mutex); 4865 return map; 4866 } 4867 4868 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 4869 const struct cred *f_cred) 4870 { 4871 const struct bpf_map *map; 4872 struct bpf_insn *insns; 4873 u32 off, type; 4874 u64 imm; 4875 u8 code; 4876 int i; 4877 4878 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 4879 GFP_USER); 4880 if (!insns) 4881 return insns; 4882 4883 for (i = 0; i < prog->len; i++) { 4884 code = insns[i].code; 4885 4886 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 4887 insns[i].code = BPF_JMP | BPF_CALL; 4888 insns[i].imm = BPF_FUNC_tail_call; 4889 /* fall-through */ 4890 } 4891 if (code == (BPF_JMP | BPF_CALL) || 4892 code == (BPF_JMP | BPF_CALL_ARGS)) { 4893 if (code == (BPF_JMP | BPF_CALL_ARGS)) 4894 insns[i].code = BPF_JMP | BPF_CALL; 4895 if (!bpf_dump_raw_ok(f_cred)) 4896 insns[i].imm = 0; 4897 continue; 4898 } 4899 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 4900 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 4901 continue; 4902 } 4903 4904 if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX || 4905 BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) { 4906 insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM; 4907 continue; 4908 } 4909 4910 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 4911 continue; 4912 4913 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 4914 map = bpf_map_from_imm(prog, imm, &off, &type); 4915 if (map) { 4916 insns[i].src_reg = type; 4917 insns[i].imm = map->id; 4918 insns[i + 1].imm = off; 4919 continue; 4920 } 4921 } 4922 4923 return insns; 4924 } 4925 4926 static int set_info_rec_size(struct bpf_prog_info *info) 4927 { 4928 /* 4929 * Ensure info.*_rec_size is the same as kernel expected size 4930 * 4931 * or 4932 * 4933 * Only allow zero *_rec_size if both _rec_size and _cnt are 4934 * zero. In this case, the kernel will set the expected 4935 * _rec_size back to the info. 4936 */ 4937 4938 if ((info->nr_func_info || info->func_info_rec_size) && 4939 info->func_info_rec_size != sizeof(struct bpf_func_info)) 4940 return -EINVAL; 4941 4942 if ((info->nr_line_info || info->line_info_rec_size) && 4943 info->line_info_rec_size != sizeof(struct bpf_line_info)) 4944 return -EINVAL; 4945 4946 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 4947 info->jited_line_info_rec_size != sizeof(__u64)) 4948 return -EINVAL; 4949 4950 info->func_info_rec_size = sizeof(struct bpf_func_info); 4951 info->line_info_rec_size = sizeof(struct bpf_line_info); 4952 info->jited_line_info_rec_size = sizeof(__u64); 4953 4954 return 0; 4955 } 4956 4957 static int bpf_prog_get_info_by_fd(struct file *file, 4958 struct bpf_prog *prog, 4959 const union bpf_attr *attr, 4960 union bpf_attr __user *uattr) 4961 { 4962 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4963 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 4964 struct bpf_prog_info info; 4965 u32 info_len = attr->info.info_len; 4966 struct bpf_prog_kstats stats; 4967 char __user *uinsns; 4968 u32 ulen; 4969 int err; 4970 4971 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4972 if (err) 4973 return err; 4974 info_len = min_t(u32, sizeof(info), info_len); 4975 4976 memset(&info, 0, sizeof(info)); 4977 if (copy_from_user(&info, uinfo, info_len)) 4978 return -EFAULT; 4979 4980 info.type = prog->type; 4981 info.id = prog->aux->id; 4982 info.load_time = prog->aux->load_time; 4983 info.created_by_uid = from_kuid_munged(current_user_ns(), 4984 prog->aux->user->uid); 4985 info.gpl_compatible = prog->gpl_compatible; 4986 4987 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 4988 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 4989 4990 mutex_lock(&prog->aux->used_maps_mutex); 4991 ulen = info.nr_map_ids; 4992 info.nr_map_ids = prog->aux->used_map_cnt; 4993 ulen = min_t(u32, info.nr_map_ids, ulen); 4994 if (ulen) { 4995 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 4996 u32 i; 4997 4998 for (i = 0; i < ulen; i++) 4999 if (put_user(prog->aux->used_maps[i]->id, 5000 &user_map_ids[i])) { 5001 mutex_unlock(&prog->aux->used_maps_mutex); 5002 return -EFAULT; 5003 } 5004 } 5005 mutex_unlock(&prog->aux->used_maps_mutex); 5006 5007 err = set_info_rec_size(&info); 5008 if (err) 5009 return err; 5010 5011 bpf_prog_get_stats(prog, &stats); 5012 info.run_time_ns = stats.nsecs; 5013 info.run_cnt = stats.cnt; 5014 info.recursion_misses = stats.misses; 5015 5016 info.verified_insns = prog->aux->verified_insns; 5017 if (prog->aux->btf) 5018 info.btf_id = btf_obj_id(prog->aux->btf); 5019 5020 if (!bpf_capable()) { 5021 info.jited_prog_len = 0; 5022 info.xlated_prog_len = 0; 5023 info.nr_jited_ksyms = 0; 5024 info.nr_jited_func_lens = 0; 5025 info.nr_func_info = 0; 5026 info.nr_line_info = 0; 5027 info.nr_jited_line_info = 0; 5028 goto done; 5029 } 5030 5031 ulen = info.xlated_prog_len; 5032 info.xlated_prog_len = bpf_prog_insn_size(prog); 5033 if (info.xlated_prog_len && ulen) { 5034 struct bpf_insn *insns_sanitized; 5035 bool fault; 5036 5037 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { 5038 info.xlated_prog_insns = 0; 5039 goto done; 5040 } 5041 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 5042 if (!insns_sanitized) 5043 return -ENOMEM; 5044 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 5045 ulen = min_t(u32, info.xlated_prog_len, ulen); 5046 fault = copy_to_user(uinsns, insns_sanitized, ulen); 5047 kfree(insns_sanitized); 5048 if (fault) 5049 return -EFAULT; 5050 } 5051 5052 if (bpf_prog_is_offloaded(prog->aux)) { 5053 err = bpf_prog_offload_info_fill(&info, prog); 5054 if (err) 5055 return err; 5056 goto done; 5057 } 5058 5059 /* NOTE: the following code is supposed to be skipped for offload. 5060 * bpf_prog_offload_info_fill() is the place to fill similar fields 5061 * for offload. 5062 */ 5063 ulen = info.jited_prog_len; 5064 if (prog->aux->func_cnt) { 5065 u32 i; 5066 5067 info.jited_prog_len = 0; 5068 for (i = 0; i < prog->aux->func_cnt; i++) 5069 info.jited_prog_len += prog->aux->func[i]->jited_len; 5070 } else { 5071 info.jited_prog_len = prog->jited_len; 5072 } 5073 5074 if (info.jited_prog_len && ulen) { 5075 if (bpf_dump_raw_ok(file->f_cred)) { 5076 uinsns = u64_to_user_ptr(info.jited_prog_insns); 5077 ulen = min_t(u32, info.jited_prog_len, ulen); 5078 5079 /* for multi-function programs, copy the JITed 5080 * instructions for all the functions 5081 */ 5082 if (prog->aux->func_cnt) { 5083 u32 len, free, i; 5084 u8 *img; 5085 5086 free = ulen; 5087 for (i = 0; i < prog->aux->func_cnt; i++) { 5088 len = prog->aux->func[i]->jited_len; 5089 len = min_t(u32, len, free); 5090 img = (u8 *) prog->aux->func[i]->bpf_func; 5091 if (copy_to_user(uinsns, img, len)) 5092 return -EFAULT; 5093 uinsns += len; 5094 free -= len; 5095 if (!free) 5096 break; 5097 } 5098 } else { 5099 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 5100 return -EFAULT; 5101 } 5102 } else { 5103 info.jited_prog_insns = 0; 5104 } 5105 } 5106 5107 ulen = info.nr_jited_ksyms; 5108 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 5109 if (ulen) { 5110 if (bpf_dump_raw_ok(file->f_cred)) { 5111 unsigned long ksym_addr; 5112 u64 __user *user_ksyms; 5113 u32 i; 5114 5115 /* copy the address of the kernel symbol 5116 * corresponding to each function 5117 */ 5118 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 5119 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 5120 if (prog->aux->func_cnt) { 5121 for (i = 0; i < ulen; i++) { 5122 ksym_addr = (unsigned long) 5123 prog->aux->func[i]->bpf_func; 5124 if (put_user((u64) ksym_addr, 5125 &user_ksyms[i])) 5126 return -EFAULT; 5127 } 5128 } else { 5129 ksym_addr = (unsigned long) prog->bpf_func; 5130 if (put_user((u64) ksym_addr, &user_ksyms[0])) 5131 return -EFAULT; 5132 } 5133 } else { 5134 info.jited_ksyms = 0; 5135 } 5136 } 5137 5138 ulen = info.nr_jited_func_lens; 5139 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 5140 if (ulen) { 5141 if (bpf_dump_raw_ok(file->f_cred)) { 5142 u32 __user *user_lens; 5143 u32 func_len, i; 5144 5145 /* copy the JITed image lengths for each function */ 5146 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 5147 user_lens = u64_to_user_ptr(info.jited_func_lens); 5148 if (prog->aux->func_cnt) { 5149 for (i = 0; i < ulen; i++) { 5150 func_len = 5151 prog->aux->func[i]->jited_len; 5152 if (put_user(func_len, &user_lens[i])) 5153 return -EFAULT; 5154 } 5155 } else { 5156 func_len = prog->jited_len; 5157 if (put_user(func_len, &user_lens[0])) 5158 return -EFAULT; 5159 } 5160 } else { 5161 info.jited_func_lens = 0; 5162 } 5163 } 5164 5165 info.attach_btf_id = prog->aux->attach_btf_id; 5166 if (attach_btf) 5167 info.attach_btf_obj_id = btf_obj_id(attach_btf); 5168 5169 ulen = info.nr_func_info; 5170 info.nr_func_info = prog->aux->func_info_cnt; 5171 if (info.nr_func_info && ulen) { 5172 char __user *user_finfo; 5173 5174 user_finfo = u64_to_user_ptr(info.func_info); 5175 ulen = min_t(u32, info.nr_func_info, ulen); 5176 if (copy_to_user(user_finfo, prog->aux->func_info, 5177 info.func_info_rec_size * ulen)) 5178 return -EFAULT; 5179 } 5180 5181 ulen = info.nr_line_info; 5182 info.nr_line_info = prog->aux->nr_linfo; 5183 if (info.nr_line_info && ulen) { 5184 __u8 __user *user_linfo; 5185 5186 user_linfo = u64_to_user_ptr(info.line_info); 5187 ulen = min_t(u32, info.nr_line_info, ulen); 5188 if (copy_to_user(user_linfo, prog->aux->linfo, 5189 info.line_info_rec_size * ulen)) 5190 return -EFAULT; 5191 } 5192 5193 ulen = info.nr_jited_line_info; 5194 if (prog->aux->jited_linfo) 5195 info.nr_jited_line_info = prog->aux->nr_linfo; 5196 else 5197 info.nr_jited_line_info = 0; 5198 if (info.nr_jited_line_info && ulen) { 5199 if (bpf_dump_raw_ok(file->f_cred)) { 5200 unsigned long line_addr; 5201 __u64 __user *user_linfo; 5202 u32 i; 5203 5204 user_linfo = u64_to_user_ptr(info.jited_line_info); 5205 ulen = min_t(u32, info.nr_jited_line_info, ulen); 5206 for (i = 0; i < ulen; i++) { 5207 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 5208 if (put_user((__u64)line_addr, &user_linfo[i])) 5209 return -EFAULT; 5210 } 5211 } else { 5212 info.jited_line_info = 0; 5213 } 5214 } 5215 5216 ulen = info.nr_prog_tags; 5217 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 5218 if (ulen) { 5219 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 5220 u32 i; 5221 5222 user_prog_tags = u64_to_user_ptr(info.prog_tags); 5223 ulen = min_t(u32, info.nr_prog_tags, ulen); 5224 if (prog->aux->func_cnt) { 5225 for (i = 0; i < ulen; i++) { 5226 if (copy_to_user(user_prog_tags[i], 5227 prog->aux->func[i]->tag, 5228 BPF_TAG_SIZE)) 5229 return -EFAULT; 5230 } 5231 } else { 5232 if (copy_to_user(user_prog_tags[0], 5233 prog->tag, BPF_TAG_SIZE)) 5234 return -EFAULT; 5235 } 5236 } 5237 5238 done: 5239 if (copy_to_user(uinfo, &info, info_len) || 5240 put_user(info_len, &uattr->info.info_len)) 5241 return -EFAULT; 5242 5243 return 0; 5244 } 5245 5246 static int bpf_map_get_info_by_fd(struct file *file, 5247 struct bpf_map *map, 5248 const union bpf_attr *attr, 5249 union bpf_attr __user *uattr) 5250 { 5251 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5252 struct bpf_map_info info; 5253 u32 info_len = attr->info.info_len; 5254 int err; 5255 5256 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5257 if (err) 5258 return err; 5259 info_len = min_t(u32, sizeof(info), info_len); 5260 5261 memset(&info, 0, sizeof(info)); 5262 if (copy_from_user(&info, uinfo, info_len)) 5263 return -EFAULT; 5264 5265 info.type = map->map_type; 5266 info.id = map->id; 5267 info.key_size = map->key_size; 5268 info.value_size = map->value_size; 5269 info.max_entries = map->max_entries; 5270 info.map_flags = map->map_flags; 5271 info.map_extra = map->map_extra; 5272 memcpy(info.name, map->name, sizeof(map->name)); 5273 5274 if (map->btf) { 5275 info.btf_id = btf_obj_id(map->btf); 5276 info.btf_key_type_id = map->btf_key_type_id; 5277 info.btf_value_type_id = map->btf_value_type_id; 5278 } 5279 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5280 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) 5281 bpf_map_struct_ops_info_fill(&info, map); 5282 5283 if (bpf_map_is_offloaded(map)) { 5284 err = bpf_map_offload_info_fill(&info, map); 5285 if (err) 5286 return err; 5287 } 5288 5289 if (info.hash) { 5290 char __user *uhash = u64_to_user_ptr(info.hash); 5291 5292 if (!map->ops->map_get_hash) 5293 return -EINVAL; 5294 5295 if (info.hash_size != SHA256_DIGEST_SIZE) 5296 return -EINVAL; 5297 5298 err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); 5299 if (err != 0) 5300 return err; 5301 5302 if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) 5303 return -EFAULT; 5304 } else if (info.hash_size) { 5305 return -EINVAL; 5306 } 5307 5308 if (copy_to_user(uinfo, &info, info_len) || 5309 put_user(info_len, &uattr->info.info_len)) 5310 return -EFAULT; 5311 5312 return 0; 5313 } 5314 5315 static int bpf_btf_get_info_by_fd(struct file *file, 5316 struct btf *btf, 5317 const union bpf_attr *attr, 5318 union bpf_attr __user *uattr) 5319 { 5320 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5321 u32 info_len = attr->info.info_len; 5322 int err; 5323 5324 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5325 if (err) 5326 return err; 5327 5328 return btf_get_info_by_fd(btf, attr, uattr); 5329 } 5330 5331 static int bpf_link_get_info_by_fd(struct file *file, 5332 struct bpf_link *link, 5333 const union bpf_attr *attr, 5334 union bpf_attr __user *uattr) 5335 { 5336 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5337 struct bpf_link_info info; 5338 u32 info_len = attr->info.info_len; 5339 int err; 5340 5341 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5342 if (err) 5343 return err; 5344 info_len = min_t(u32, sizeof(info), info_len); 5345 5346 memset(&info, 0, sizeof(info)); 5347 if (copy_from_user(&info, uinfo, info_len)) 5348 return -EFAULT; 5349 5350 info.type = link->type; 5351 info.id = link->id; 5352 if (link->prog) 5353 info.prog_id = link->prog->aux->id; 5354 5355 if (link->ops->fill_link_info) { 5356 err = link->ops->fill_link_info(link, &info); 5357 if (err) 5358 return err; 5359 } 5360 5361 if (copy_to_user(uinfo, &info, info_len) || 5362 put_user(info_len, &uattr->info.info_len)) 5363 return -EFAULT; 5364 5365 return 0; 5366 } 5367 5368 5369 static int token_get_info_by_fd(struct file *file, 5370 struct bpf_token *token, 5371 const union bpf_attr *attr, 5372 union bpf_attr __user *uattr) 5373 { 5374 struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5375 u32 info_len = attr->info.info_len; 5376 int err; 5377 5378 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5379 if (err) 5380 return err; 5381 return bpf_token_get_info_by_fd(token, attr, uattr); 5382 } 5383 5384 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 5385 5386 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 5387 union bpf_attr __user *uattr) 5388 { 5389 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 5390 return -EINVAL; 5391 5392 CLASS(fd, f)(attr->info.bpf_fd); 5393 if (fd_empty(f)) 5394 return -EBADFD; 5395 5396 if (fd_file(f)->f_op == &bpf_prog_fops) 5397 return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5398 uattr); 5399 else if (fd_file(f)->f_op == &bpf_map_fops) 5400 return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5401 uattr); 5402 else if (fd_file(f)->f_op == &btf_fops) 5403 return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); 5404 else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) 5405 return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5406 attr, uattr); 5407 else if (fd_file(f)->f_op == &bpf_token_fops) 5408 return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5409 attr, uattr); 5410 return -EINVAL; 5411 } 5412 5413 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd 5414 5415 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 5416 { 5417 struct bpf_token *token = NULL; 5418 5419 if (CHECK_ATTR(BPF_BTF_LOAD)) 5420 return -EINVAL; 5421 5422 if (attr->btf_flags & ~BPF_F_TOKEN_FD) 5423 return -EINVAL; 5424 5425 if (attr->btf_flags & BPF_F_TOKEN_FD) { 5426 token = bpf_token_get_from_fd(attr->btf_token_fd); 5427 if (IS_ERR(token)) 5428 return PTR_ERR(token); 5429 if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { 5430 bpf_token_put(token); 5431 token = NULL; 5432 } 5433 } 5434 5435 if (!bpf_token_capable(token, CAP_BPF)) { 5436 bpf_token_put(token); 5437 return -EPERM; 5438 } 5439 5440 bpf_token_put(token); 5441 5442 return btf_new_fd(attr, uattr, uattr_size); 5443 } 5444 5445 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5446 5447 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5448 { 5449 struct bpf_token *token = NULL; 5450 5451 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5452 return -EINVAL; 5453 5454 if (attr->open_flags & ~BPF_F_TOKEN_FD) 5455 return -EINVAL; 5456 5457 if (attr->open_flags & BPF_F_TOKEN_FD) { 5458 token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5459 if (IS_ERR(token)) 5460 return PTR_ERR(token); 5461 if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5462 bpf_token_put(token); 5463 token = NULL; 5464 } 5465 } 5466 5467 if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5468 bpf_token_put(token); 5469 return -EPERM; 5470 } 5471 5472 bpf_token_put(token); 5473 5474 return btf_get_fd_by_id(attr->btf_id); 5475 } 5476 5477 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 5478 union bpf_attr __user *uattr, 5479 u32 prog_id, u32 fd_type, 5480 const char *buf, u64 probe_offset, 5481 u64 probe_addr) 5482 { 5483 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 5484 u32 len = buf ? strlen(buf) : 0, input_len; 5485 int err = 0; 5486 5487 if (put_user(len, &uattr->task_fd_query.buf_len)) 5488 return -EFAULT; 5489 input_len = attr->task_fd_query.buf_len; 5490 if (input_len && ubuf) { 5491 if (!len) { 5492 /* nothing to copy, just make ubuf NULL terminated */ 5493 char zero = '\0'; 5494 5495 if (put_user(zero, ubuf)) 5496 return -EFAULT; 5497 } else { 5498 err = bpf_copy_to_user(ubuf, buf, input_len, len); 5499 if (err == -EFAULT) 5500 return err; 5501 } 5502 } 5503 5504 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 5505 put_user(fd_type, &uattr->task_fd_query.fd_type) || 5506 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 5507 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 5508 return -EFAULT; 5509 5510 return err; 5511 } 5512 5513 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 5514 5515 static int bpf_task_fd_query(const union bpf_attr *attr, 5516 union bpf_attr __user *uattr) 5517 { 5518 pid_t pid = attr->task_fd_query.pid; 5519 u32 fd = attr->task_fd_query.fd; 5520 const struct perf_event *event; 5521 struct task_struct *task; 5522 struct file *file; 5523 int err; 5524 5525 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 5526 return -EINVAL; 5527 5528 if (!capable(CAP_SYS_ADMIN)) 5529 return -EPERM; 5530 5531 if (attr->task_fd_query.flags != 0) 5532 return -EINVAL; 5533 5534 rcu_read_lock(); 5535 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 5536 rcu_read_unlock(); 5537 if (!task) 5538 return -ENOENT; 5539 5540 err = 0; 5541 file = fget_task(task, fd); 5542 put_task_struct(task); 5543 if (!file) 5544 return -EBADF; 5545 5546 if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { 5547 struct bpf_link *link = file->private_data; 5548 5549 if (link->ops == &bpf_raw_tp_link_lops) { 5550 struct bpf_raw_tp_link *raw_tp = 5551 container_of(link, struct bpf_raw_tp_link, link); 5552 struct bpf_raw_event_map *btp = raw_tp->btp; 5553 5554 err = bpf_task_fd_query_copy(attr, uattr, 5555 raw_tp->link.prog->aux->id, 5556 BPF_FD_TYPE_RAW_TRACEPOINT, 5557 btp->tp->name, 0, 0); 5558 goto put_file; 5559 } 5560 goto out_not_supp; 5561 } 5562 5563 event = perf_get_event(file); 5564 if (!IS_ERR(event)) { 5565 u64 probe_offset, probe_addr; 5566 u32 prog_id, fd_type; 5567 const char *buf; 5568 5569 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 5570 &buf, &probe_offset, 5571 &probe_addr, NULL); 5572 if (!err) 5573 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 5574 fd_type, buf, 5575 probe_offset, 5576 probe_addr); 5577 goto put_file; 5578 } 5579 5580 out_not_supp: 5581 err = -ENOTSUPP; 5582 put_file: 5583 fput(file); 5584 return err; 5585 } 5586 5587 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 5588 5589 #define BPF_DO_BATCH(fn, ...) \ 5590 do { \ 5591 if (!fn) { \ 5592 err = -ENOTSUPP; \ 5593 goto err_put; \ 5594 } \ 5595 err = fn(__VA_ARGS__); \ 5596 } while (0) 5597 5598 static int bpf_map_do_batch(const union bpf_attr *attr, 5599 union bpf_attr __user *uattr, 5600 int cmd) 5601 { 5602 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 5603 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 5604 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 5605 struct bpf_map *map; 5606 int err; 5607 5608 if (CHECK_ATTR(BPF_MAP_BATCH)) 5609 return -EINVAL; 5610 5611 CLASS(fd, f)(attr->batch.map_fd); 5612 5613 map = __bpf_map_get(f); 5614 if (IS_ERR(map)) 5615 return PTR_ERR(map); 5616 if (has_write) 5617 bpf_map_write_active_inc(map); 5618 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 5619 err = -EPERM; 5620 goto err_put; 5621 } 5622 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 5623 err = -EPERM; 5624 goto err_put; 5625 } 5626 5627 if (cmd == BPF_MAP_LOOKUP_BATCH) 5628 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 5629 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 5630 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 5631 else if (cmd == BPF_MAP_UPDATE_BATCH) 5632 BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr); 5633 else 5634 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 5635 err_put: 5636 if (has_write) { 5637 maybe_wait_bpf_programs(map); 5638 bpf_map_write_active_dec(map); 5639 } 5640 return err; 5641 } 5642 5643 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 5644 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 5645 { 5646 struct bpf_prog *prog; 5647 int ret; 5648 5649 if (CHECK_ATTR(BPF_LINK_CREATE)) 5650 return -EINVAL; 5651 5652 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 5653 return bpf_struct_ops_link_create(attr); 5654 5655 prog = bpf_prog_get(attr->link_create.prog_fd); 5656 if (IS_ERR(prog)) 5657 return PTR_ERR(prog); 5658 5659 ret = bpf_prog_attach_check_attach_type(prog, 5660 attr->link_create.attach_type); 5661 if (ret) 5662 goto out; 5663 5664 switch (prog->type) { 5665 case BPF_PROG_TYPE_CGROUP_SKB: 5666 case BPF_PROG_TYPE_CGROUP_SOCK: 5667 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5668 case BPF_PROG_TYPE_SOCK_OPS: 5669 case BPF_PROG_TYPE_CGROUP_DEVICE: 5670 case BPF_PROG_TYPE_CGROUP_SYSCTL: 5671 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 5672 ret = cgroup_bpf_link_attach(attr, prog); 5673 break; 5674 case BPF_PROG_TYPE_EXT: 5675 ret = bpf_tracing_prog_attach(prog, 5676 attr->link_create.target_fd, 5677 attr->link_create.target_btf_id, 5678 attr->link_create.tracing.cookie, 5679 attr->link_create.attach_type); 5680 break; 5681 case BPF_PROG_TYPE_LSM: 5682 case BPF_PROG_TYPE_TRACING: 5683 if (attr->link_create.attach_type != prog->expected_attach_type) { 5684 ret = -EINVAL; 5685 goto out; 5686 } 5687 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 5688 ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie, 5689 attr->link_create.attach_type); 5690 else if (prog->expected_attach_type == BPF_TRACE_ITER) 5691 ret = bpf_iter_link_attach(attr, uattr, prog); 5692 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 5693 ret = cgroup_bpf_link_attach(attr, prog); 5694 else 5695 ret = bpf_tracing_prog_attach(prog, 5696 attr->link_create.target_fd, 5697 attr->link_create.target_btf_id, 5698 attr->link_create.tracing.cookie, 5699 attr->link_create.attach_type); 5700 break; 5701 case BPF_PROG_TYPE_FLOW_DISSECTOR: 5702 case BPF_PROG_TYPE_SK_LOOKUP: 5703 ret = netns_bpf_link_create(attr, prog); 5704 break; 5705 case BPF_PROG_TYPE_SK_MSG: 5706 case BPF_PROG_TYPE_SK_SKB: 5707 ret = sock_map_link_create(attr, prog); 5708 break; 5709 #ifdef CONFIG_NET 5710 case BPF_PROG_TYPE_XDP: 5711 ret = bpf_xdp_link_attach(attr, prog); 5712 break; 5713 case BPF_PROG_TYPE_SCHED_CLS: 5714 if (attr->link_create.attach_type == BPF_TCX_INGRESS || 5715 attr->link_create.attach_type == BPF_TCX_EGRESS) 5716 ret = tcx_link_attach(attr, prog); 5717 else 5718 ret = netkit_link_attach(attr, prog); 5719 break; 5720 case BPF_PROG_TYPE_NETFILTER: 5721 ret = bpf_nf_link_attach(attr, prog); 5722 break; 5723 #endif 5724 case BPF_PROG_TYPE_PERF_EVENT: 5725 case BPF_PROG_TYPE_TRACEPOINT: 5726 ret = bpf_perf_link_attach(attr, prog); 5727 break; 5728 case BPF_PROG_TYPE_KPROBE: 5729 if (attr->link_create.attach_type == BPF_PERF_EVENT) 5730 ret = bpf_perf_link_attach(attr, prog); 5731 else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || 5732 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) 5733 ret = bpf_kprobe_multi_link_attach(attr, prog); 5734 else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI || 5735 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION) 5736 ret = bpf_uprobe_multi_link_attach(attr, prog); 5737 break; 5738 default: 5739 ret = -EINVAL; 5740 } 5741 5742 out: 5743 if (ret < 0) 5744 bpf_prog_put(prog); 5745 return ret; 5746 } 5747 5748 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 5749 { 5750 struct bpf_map *new_map, *old_map = NULL; 5751 int ret; 5752 5753 new_map = bpf_map_get(attr->link_update.new_map_fd); 5754 if (IS_ERR(new_map)) 5755 return PTR_ERR(new_map); 5756 5757 if (attr->link_update.flags & BPF_F_REPLACE) { 5758 old_map = bpf_map_get(attr->link_update.old_map_fd); 5759 if (IS_ERR(old_map)) { 5760 ret = PTR_ERR(old_map); 5761 goto out_put; 5762 } 5763 } else if (attr->link_update.old_map_fd) { 5764 ret = -EINVAL; 5765 goto out_put; 5766 } 5767 5768 ret = link->ops->update_map(link, new_map, old_map); 5769 5770 if (old_map) 5771 bpf_map_put(old_map); 5772 out_put: 5773 bpf_map_put(new_map); 5774 return ret; 5775 } 5776 5777 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 5778 5779 static int link_update(union bpf_attr *attr) 5780 { 5781 struct bpf_prog *old_prog = NULL, *new_prog; 5782 struct bpf_link *link; 5783 u32 flags; 5784 int ret; 5785 5786 if (CHECK_ATTR(BPF_LINK_UPDATE)) 5787 return -EINVAL; 5788 5789 flags = attr->link_update.flags; 5790 if (flags & ~BPF_F_REPLACE) 5791 return -EINVAL; 5792 5793 link = bpf_link_get_from_fd(attr->link_update.link_fd); 5794 if (IS_ERR(link)) 5795 return PTR_ERR(link); 5796 5797 if (link->ops->update_map) { 5798 ret = link_update_map(link, attr); 5799 goto out_put_link; 5800 } 5801 5802 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 5803 if (IS_ERR(new_prog)) { 5804 ret = PTR_ERR(new_prog); 5805 goto out_put_link; 5806 } 5807 5808 if (flags & BPF_F_REPLACE) { 5809 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 5810 if (IS_ERR(old_prog)) { 5811 ret = PTR_ERR(old_prog); 5812 old_prog = NULL; 5813 goto out_put_progs; 5814 } 5815 } else if (attr->link_update.old_prog_fd) { 5816 ret = -EINVAL; 5817 goto out_put_progs; 5818 } 5819 5820 if (link->ops->update_prog) 5821 ret = link->ops->update_prog(link, new_prog, old_prog); 5822 else 5823 ret = -EINVAL; 5824 5825 out_put_progs: 5826 if (old_prog) 5827 bpf_prog_put(old_prog); 5828 if (ret) 5829 bpf_prog_put(new_prog); 5830 out_put_link: 5831 bpf_link_put_direct(link); 5832 return ret; 5833 } 5834 5835 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 5836 5837 static int link_detach(union bpf_attr *attr) 5838 { 5839 struct bpf_link *link; 5840 int ret; 5841 5842 if (CHECK_ATTR(BPF_LINK_DETACH)) 5843 return -EINVAL; 5844 5845 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 5846 if (IS_ERR(link)) 5847 return PTR_ERR(link); 5848 5849 if (link->ops->detach) 5850 ret = link->ops->detach(link); 5851 else 5852 ret = -EOPNOTSUPP; 5853 5854 bpf_link_put_direct(link); 5855 return ret; 5856 } 5857 5858 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 5859 { 5860 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 5861 } 5862 EXPORT_SYMBOL(bpf_link_inc_not_zero); 5863 5864 struct bpf_link *bpf_link_by_id(u32 id) 5865 { 5866 struct bpf_link *link; 5867 5868 if (!id) 5869 return ERR_PTR(-ENOENT); 5870 5871 spin_lock_bh(&link_idr_lock); 5872 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 5873 link = idr_find(&link_idr, id); 5874 if (link) { 5875 if (link->id) 5876 link = bpf_link_inc_not_zero(link); 5877 else 5878 link = ERR_PTR(-EAGAIN); 5879 } else { 5880 link = ERR_PTR(-ENOENT); 5881 } 5882 spin_unlock_bh(&link_idr_lock); 5883 return link; 5884 } 5885 5886 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 5887 { 5888 struct bpf_link *link; 5889 5890 spin_lock_bh(&link_idr_lock); 5891 again: 5892 link = idr_get_next(&link_idr, id); 5893 if (link) { 5894 link = bpf_link_inc_not_zero(link); 5895 if (IS_ERR(link)) { 5896 (*id)++; 5897 goto again; 5898 } 5899 } 5900 spin_unlock_bh(&link_idr_lock); 5901 5902 return link; 5903 } 5904 5905 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 5906 5907 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 5908 { 5909 struct bpf_link *link; 5910 u32 id = attr->link_id; 5911 int fd; 5912 5913 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 5914 return -EINVAL; 5915 5916 if (!capable(CAP_SYS_ADMIN)) 5917 return -EPERM; 5918 5919 link = bpf_link_by_id(id); 5920 if (IS_ERR(link)) 5921 return PTR_ERR(link); 5922 5923 fd = bpf_link_new_fd(link); 5924 if (fd < 0) 5925 bpf_link_put_direct(link); 5926 5927 return fd; 5928 } 5929 5930 DEFINE_MUTEX(bpf_stats_enabled_mutex); 5931 5932 static int bpf_stats_release(struct inode *inode, struct file *file) 5933 { 5934 mutex_lock(&bpf_stats_enabled_mutex); 5935 static_key_slow_dec(&bpf_stats_enabled_key.key); 5936 mutex_unlock(&bpf_stats_enabled_mutex); 5937 return 0; 5938 } 5939 5940 static const struct file_operations bpf_stats_fops = { 5941 .release = bpf_stats_release, 5942 }; 5943 5944 static int bpf_enable_runtime_stats(void) 5945 { 5946 int fd; 5947 5948 mutex_lock(&bpf_stats_enabled_mutex); 5949 5950 /* Set a very high limit to avoid overflow */ 5951 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 5952 mutex_unlock(&bpf_stats_enabled_mutex); 5953 return -EBUSY; 5954 } 5955 5956 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 5957 if (fd >= 0) 5958 static_key_slow_inc(&bpf_stats_enabled_key.key); 5959 5960 mutex_unlock(&bpf_stats_enabled_mutex); 5961 return fd; 5962 } 5963 5964 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 5965 5966 static int bpf_enable_stats(union bpf_attr *attr) 5967 { 5968 5969 if (CHECK_ATTR(BPF_ENABLE_STATS)) 5970 return -EINVAL; 5971 5972 if (!capable(CAP_SYS_ADMIN)) 5973 return -EPERM; 5974 5975 switch (attr->enable_stats.type) { 5976 case BPF_STATS_RUN_TIME: 5977 return bpf_enable_runtime_stats(); 5978 default: 5979 break; 5980 } 5981 return -EINVAL; 5982 } 5983 5984 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 5985 5986 static int bpf_iter_create(union bpf_attr *attr) 5987 { 5988 struct bpf_link *link; 5989 int err; 5990 5991 if (CHECK_ATTR(BPF_ITER_CREATE)) 5992 return -EINVAL; 5993 5994 if (attr->iter_create.flags) 5995 return -EINVAL; 5996 5997 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 5998 if (IS_ERR(link)) 5999 return PTR_ERR(link); 6000 6001 err = bpf_iter_new_fd(link); 6002 bpf_link_put_direct(link); 6003 6004 return err; 6005 } 6006 6007 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 6008 6009 static int bpf_prog_bind_map(union bpf_attr *attr) 6010 { 6011 struct bpf_prog *prog; 6012 struct bpf_map *map; 6013 struct bpf_map **used_maps_old, **used_maps_new; 6014 int i, ret = 0; 6015 6016 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 6017 return -EINVAL; 6018 6019 if (attr->prog_bind_map.flags) 6020 return -EINVAL; 6021 6022 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 6023 if (IS_ERR(prog)) 6024 return PTR_ERR(prog); 6025 6026 map = bpf_map_get(attr->prog_bind_map.map_fd); 6027 if (IS_ERR(map)) { 6028 ret = PTR_ERR(map); 6029 goto out_prog_put; 6030 } 6031 6032 mutex_lock(&prog->aux->used_maps_mutex); 6033 6034 used_maps_old = prog->aux->used_maps; 6035 6036 for (i = 0; i < prog->aux->used_map_cnt; i++) 6037 if (used_maps_old[i] == map) { 6038 bpf_map_put(map); 6039 goto out_unlock; 6040 } 6041 6042 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 6043 sizeof(used_maps_new[0]), 6044 GFP_KERNEL); 6045 if (!used_maps_new) { 6046 ret = -ENOMEM; 6047 goto out_unlock; 6048 } 6049 6050 /* The bpf program will not access the bpf map, but for the sake of 6051 * simplicity, increase sleepable_refcnt for sleepable program as well. 6052 */ 6053 if (prog->sleepable) 6054 atomic64_inc(&map->sleepable_refcnt); 6055 memcpy(used_maps_new, used_maps_old, 6056 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 6057 used_maps_new[prog->aux->used_map_cnt] = map; 6058 6059 prog->aux->used_map_cnt++; 6060 prog->aux->used_maps = used_maps_new; 6061 6062 kfree(used_maps_old); 6063 6064 out_unlock: 6065 mutex_unlock(&prog->aux->used_maps_mutex); 6066 6067 if (ret) 6068 bpf_map_put(map); 6069 out_prog_put: 6070 bpf_prog_put(prog); 6071 return ret; 6072 } 6073 6074 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd 6075 6076 static int token_create(union bpf_attr *attr) 6077 { 6078 if (CHECK_ATTR(BPF_TOKEN_CREATE)) 6079 return -EINVAL; 6080 6081 /* no flags are supported yet */ 6082 if (attr->token_create.flags) 6083 return -EINVAL; 6084 6085 return bpf_token_create(attr); 6086 } 6087 6088 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd 6089 6090 static int prog_stream_read(union bpf_attr *attr) 6091 { 6092 char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); 6093 u32 len = attr->prog_stream_read.stream_buf_len; 6094 struct bpf_prog *prog; 6095 int ret; 6096 6097 if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) 6098 return -EINVAL; 6099 6100 prog = bpf_prog_get(attr->prog_stream_read.prog_fd); 6101 if (IS_ERR(prog)) 6102 return PTR_ERR(prog); 6103 6104 ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); 6105 bpf_prog_put(prog); 6106 6107 return ret; 6108 } 6109 6110 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) 6111 { 6112 union bpf_attr attr; 6113 int err; 6114 6115 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 6116 if (err) 6117 return err; 6118 size = min_t(u32, size, sizeof(attr)); 6119 6120 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 6121 memset(&attr, 0, sizeof(attr)); 6122 if (copy_from_bpfptr(&attr, uattr, size) != 0) 6123 return -EFAULT; 6124 6125 err = security_bpf(cmd, &attr, size, uattr.is_kernel); 6126 if (err < 0) 6127 return err; 6128 6129 switch (cmd) { 6130 case BPF_MAP_CREATE: 6131 err = map_create(&attr, uattr); 6132 break; 6133 case BPF_MAP_LOOKUP_ELEM: 6134 err = map_lookup_elem(&attr); 6135 break; 6136 case BPF_MAP_UPDATE_ELEM: 6137 err = map_update_elem(&attr, uattr); 6138 break; 6139 case BPF_MAP_DELETE_ELEM: 6140 err = map_delete_elem(&attr, uattr); 6141 break; 6142 case BPF_MAP_GET_NEXT_KEY: 6143 err = map_get_next_key(&attr); 6144 break; 6145 case BPF_MAP_FREEZE: 6146 err = map_freeze(&attr); 6147 break; 6148 case BPF_PROG_LOAD: 6149 err = bpf_prog_load(&attr, uattr, size); 6150 break; 6151 case BPF_OBJ_PIN: 6152 err = bpf_obj_pin(&attr); 6153 break; 6154 case BPF_OBJ_GET: 6155 err = bpf_obj_get(&attr); 6156 break; 6157 case BPF_PROG_ATTACH: 6158 err = bpf_prog_attach(&attr); 6159 break; 6160 case BPF_PROG_DETACH: 6161 err = bpf_prog_detach(&attr); 6162 break; 6163 case BPF_PROG_QUERY: 6164 err = bpf_prog_query(&attr, uattr.user); 6165 break; 6166 case BPF_PROG_TEST_RUN: 6167 err = bpf_prog_test_run(&attr, uattr.user); 6168 break; 6169 case BPF_PROG_GET_NEXT_ID: 6170 err = bpf_obj_get_next_id(&attr, uattr.user, 6171 &prog_idr, &prog_idr_lock); 6172 break; 6173 case BPF_MAP_GET_NEXT_ID: 6174 err = bpf_obj_get_next_id(&attr, uattr.user, 6175 &map_idr, &map_idr_lock); 6176 break; 6177 case BPF_BTF_GET_NEXT_ID: 6178 err = bpf_obj_get_next_id(&attr, uattr.user, 6179 &btf_idr, &btf_idr_lock); 6180 break; 6181 case BPF_PROG_GET_FD_BY_ID: 6182 err = bpf_prog_get_fd_by_id(&attr); 6183 break; 6184 case BPF_MAP_GET_FD_BY_ID: 6185 err = bpf_map_get_fd_by_id(&attr); 6186 break; 6187 case BPF_OBJ_GET_INFO_BY_FD: 6188 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 6189 break; 6190 case BPF_RAW_TRACEPOINT_OPEN: 6191 err = bpf_raw_tracepoint_open(&attr); 6192 break; 6193 case BPF_BTF_LOAD: 6194 err = bpf_btf_load(&attr, uattr, size); 6195 break; 6196 case BPF_BTF_GET_FD_BY_ID: 6197 err = bpf_btf_get_fd_by_id(&attr); 6198 break; 6199 case BPF_TASK_FD_QUERY: 6200 err = bpf_task_fd_query(&attr, uattr.user); 6201 break; 6202 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 6203 err = map_lookup_and_delete_elem(&attr); 6204 break; 6205 case BPF_MAP_LOOKUP_BATCH: 6206 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 6207 break; 6208 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 6209 err = bpf_map_do_batch(&attr, uattr.user, 6210 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 6211 break; 6212 case BPF_MAP_UPDATE_BATCH: 6213 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 6214 break; 6215 case BPF_MAP_DELETE_BATCH: 6216 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 6217 break; 6218 case BPF_LINK_CREATE: 6219 err = link_create(&attr, uattr); 6220 break; 6221 case BPF_LINK_UPDATE: 6222 err = link_update(&attr); 6223 break; 6224 case BPF_LINK_GET_FD_BY_ID: 6225 err = bpf_link_get_fd_by_id(&attr); 6226 break; 6227 case BPF_LINK_GET_NEXT_ID: 6228 err = bpf_obj_get_next_id(&attr, uattr.user, 6229 &link_idr, &link_idr_lock); 6230 break; 6231 case BPF_ENABLE_STATS: 6232 err = bpf_enable_stats(&attr); 6233 break; 6234 case BPF_ITER_CREATE: 6235 err = bpf_iter_create(&attr); 6236 break; 6237 case BPF_LINK_DETACH: 6238 err = link_detach(&attr); 6239 break; 6240 case BPF_PROG_BIND_MAP: 6241 err = bpf_prog_bind_map(&attr); 6242 break; 6243 case BPF_TOKEN_CREATE: 6244 err = token_create(&attr); 6245 break; 6246 case BPF_PROG_STREAM_READ_BY_FD: 6247 err = prog_stream_read(&attr); 6248 break; 6249 default: 6250 err = -EINVAL; 6251 break; 6252 } 6253 6254 return err; 6255 } 6256 6257 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 6258 { 6259 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 6260 } 6261 6262 static bool syscall_prog_is_valid_access(int off, int size, 6263 enum bpf_access_type type, 6264 const struct bpf_prog *prog, 6265 struct bpf_insn_access_aux *info) 6266 { 6267 if (off < 0 || off >= U16_MAX) 6268 return false; 6269 if (off % size != 0) 6270 return false; 6271 return true; 6272 } 6273 6274 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 6275 { 6276 switch (cmd) { 6277 case BPF_MAP_CREATE: 6278 case BPF_MAP_DELETE_ELEM: 6279 case BPF_MAP_UPDATE_ELEM: 6280 case BPF_MAP_FREEZE: 6281 case BPF_MAP_GET_FD_BY_ID: 6282 case BPF_PROG_LOAD: 6283 case BPF_BTF_LOAD: 6284 case BPF_LINK_CREATE: 6285 case BPF_RAW_TRACEPOINT_OPEN: 6286 break; 6287 default: 6288 return -EINVAL; 6289 } 6290 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 6291 } 6292 6293 6294 /* To shut up -Wmissing-prototypes. 6295 * This function is used by the kernel light skeleton 6296 * to load bpf programs when modules are loaded or during kernel boot. 6297 * See tools/lib/bpf/skel_internal.h 6298 */ 6299 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 6300 6301 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 6302 { 6303 struct bpf_prog * __maybe_unused prog; 6304 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 6305 6306 switch (cmd) { 6307 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 6308 case BPF_PROG_TEST_RUN: 6309 if (attr->test.data_in || attr->test.data_out || 6310 attr->test.ctx_out || attr->test.duration || 6311 attr->test.repeat || attr->test.flags) 6312 return -EINVAL; 6313 6314 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 6315 if (IS_ERR(prog)) 6316 return PTR_ERR(prog); 6317 6318 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 6319 attr->test.ctx_size_in > U16_MAX) { 6320 bpf_prog_put(prog); 6321 return -EINVAL; 6322 } 6323 6324 run_ctx.bpf_cookie = 0; 6325 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 6326 /* recursion detected */ 6327 __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx); 6328 bpf_prog_put(prog); 6329 return -EBUSY; 6330 } 6331 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 6332 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 6333 &run_ctx); 6334 bpf_prog_put(prog); 6335 return 0; 6336 #endif 6337 default: 6338 return ____bpf_sys_bpf(cmd, attr, size); 6339 } 6340 } 6341 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); 6342 6343 static const struct bpf_func_proto bpf_sys_bpf_proto = { 6344 .func = bpf_sys_bpf, 6345 .gpl_only = false, 6346 .ret_type = RET_INTEGER, 6347 .arg1_type = ARG_ANYTHING, 6348 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6349 .arg3_type = ARG_CONST_SIZE, 6350 }; 6351 6352 const struct bpf_func_proto * __weak 6353 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6354 { 6355 return bpf_base_func_proto(func_id, prog); 6356 } 6357 6358 BPF_CALL_1(bpf_sys_close, u32, fd) 6359 { 6360 /* When bpf program calls this helper there should not be 6361 * an fdget() without matching completed fdput(). 6362 * This helper is allowed in the following callchain only: 6363 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 6364 */ 6365 return close_fd(fd); 6366 } 6367 6368 static const struct bpf_func_proto bpf_sys_close_proto = { 6369 .func = bpf_sys_close, 6370 .gpl_only = false, 6371 .ret_type = RET_INTEGER, 6372 .arg1_type = ARG_ANYTHING, 6373 }; 6374 6375 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 6376 { 6377 *res = 0; 6378 if (flags) 6379 return -EINVAL; 6380 6381 if (name_sz <= 1 || name[name_sz - 1]) 6382 return -EINVAL; 6383 6384 if (!bpf_dump_raw_ok(current_cred())) 6385 return -EPERM; 6386 6387 *res = kallsyms_lookup_name(name); 6388 return *res ? 0 : -ENOENT; 6389 } 6390 6391 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 6392 .func = bpf_kallsyms_lookup_name, 6393 .gpl_only = false, 6394 .ret_type = RET_INTEGER, 6395 .arg1_type = ARG_PTR_TO_MEM, 6396 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 6397 .arg3_type = ARG_ANYTHING, 6398 .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 6399 .arg4_size = sizeof(u64), 6400 }; 6401 6402 static const struct bpf_func_proto * 6403 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6404 { 6405 switch (func_id) { 6406 case BPF_FUNC_sys_bpf: 6407 return !bpf_token_capable(prog->aux->token, CAP_PERFMON) 6408 ? NULL : &bpf_sys_bpf_proto; 6409 case BPF_FUNC_btf_find_by_name_kind: 6410 return &bpf_btf_find_by_name_kind_proto; 6411 case BPF_FUNC_sys_close: 6412 return &bpf_sys_close_proto; 6413 case BPF_FUNC_kallsyms_lookup_name: 6414 return &bpf_kallsyms_lookup_name_proto; 6415 default: 6416 return tracing_prog_func_proto(func_id, prog); 6417 } 6418 } 6419 6420 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 6421 .get_func_proto = syscall_prog_func_proto, 6422 .is_valid_access = syscall_prog_is_valid_access, 6423 }; 6424 6425 const struct bpf_prog_ops bpf_syscall_prog_ops = { 6426 .test_run = bpf_prog_test_run_syscall, 6427 }; 6428 6429 #ifdef CONFIG_SYSCTL 6430 static int bpf_stats_handler(const struct ctl_table *table, int write, 6431 void *buffer, size_t *lenp, loff_t *ppos) 6432 { 6433 struct static_key *key = (struct static_key *)table->data; 6434 static int saved_val; 6435 int val, ret; 6436 struct ctl_table tmp = { 6437 .data = &val, 6438 .maxlen = sizeof(val), 6439 .mode = table->mode, 6440 .extra1 = SYSCTL_ZERO, 6441 .extra2 = SYSCTL_ONE, 6442 }; 6443 6444 if (write && !capable(CAP_SYS_ADMIN)) 6445 return -EPERM; 6446 6447 mutex_lock(&bpf_stats_enabled_mutex); 6448 val = saved_val; 6449 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6450 if (write && !ret && val != saved_val) { 6451 if (val) 6452 static_key_slow_inc(key); 6453 else 6454 static_key_slow_dec(key); 6455 saved_val = val; 6456 } 6457 mutex_unlock(&bpf_stats_enabled_mutex); 6458 return ret; 6459 } 6460 6461 void __weak unpriv_ebpf_notify(int new_state) 6462 { 6463 } 6464 6465 static int bpf_unpriv_handler(const struct ctl_table *table, int write, 6466 void *buffer, size_t *lenp, loff_t *ppos) 6467 { 6468 int ret, unpriv_enable = *(int *)table->data; 6469 bool locked_state = unpriv_enable == 1; 6470 struct ctl_table tmp = *table; 6471 6472 if (write && !capable(CAP_SYS_ADMIN)) 6473 return -EPERM; 6474 6475 tmp.data = &unpriv_enable; 6476 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6477 if (write && !ret) { 6478 if (locked_state && unpriv_enable != 1) 6479 return -EPERM; 6480 *(int *)table->data = unpriv_enable; 6481 } 6482 6483 if (write) 6484 unpriv_ebpf_notify(unpriv_enable); 6485 6486 return ret; 6487 } 6488 6489 static const struct ctl_table bpf_syscall_table[] = { 6490 { 6491 .procname = "unprivileged_bpf_disabled", 6492 .data = &sysctl_unprivileged_bpf_disabled, 6493 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 6494 .mode = 0644, 6495 .proc_handler = bpf_unpriv_handler, 6496 .extra1 = SYSCTL_ZERO, 6497 .extra2 = SYSCTL_TWO, 6498 }, 6499 { 6500 .procname = "bpf_stats_enabled", 6501 .data = &bpf_stats_enabled_key.key, 6502 .mode = 0644, 6503 .proc_handler = bpf_stats_handler, 6504 }, 6505 }; 6506 6507 static int __init bpf_syscall_sysctl_init(void) 6508 { 6509 register_sysctl_init("kernel", bpf_syscall_table); 6510 return 0; 6511 } 6512 late_initcall(bpf_syscall_sysctl_init); 6513 #endif /* CONFIG_SYSCTL */ 6514