1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <crypto/sha2.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf-cgroup.h> 7 #include <linux/bpf_trace.h> 8 #include <linux/bpf_lirc.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bsearch.h> 11 #include <linux/btf.h> 12 #include <linux/hex.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/sched/signal.h> 16 #include <linux/vmalloc.h> 17 #include <linux/mmzone.h> 18 #include <linux/anon_inodes.h> 19 #include <linux/fdtable.h> 20 #include <linux/file.h> 21 #include <linux/fs.h> 22 #include <linux/license.h> 23 #include <linux/filter.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 #include <linux/nospec.h> 30 #include <linux/audit.h> 31 #include <uapi/linux/btf.h> 32 #include <linux/pgtable.h> 33 #include <linux/bpf_lsm.h> 34 #include <linux/poll.h> 35 #include <linux/sort.h> 36 #include <linux/bpf-netns.h> 37 #include <linux/rcupdate_trace.h> 38 #include <linux/memcontrol.h> 39 #include <linux/trace_events.h> 40 #include <linux/tracepoint.h> 41 #include <linux/overflow.h> 42 #include <linux/cookie.h> 43 #include <linux/verification.h> 44 45 #include <net/netfilter/nf_bpf_link.h> 46 #include <net/netkit.h> 47 #include <net/tcx.h> 48 49 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 50 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 51 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 52 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 53 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 54 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 55 IS_FD_HASH(map)) 56 57 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 58 59 DEFINE_PER_CPU(int, bpf_prog_active); 60 DEFINE_COOKIE(bpf_map_cookie); 61 static DEFINE_IDR(prog_idr); 62 static DEFINE_SPINLOCK(prog_idr_lock); 63 static DEFINE_IDR(map_idr); 64 static DEFINE_SPINLOCK(map_idr_lock); 65 static DEFINE_IDR(link_idr); 66 static DEFINE_SPINLOCK(link_idr_lock); 67 68 int sysctl_unprivileged_bpf_disabled __read_mostly = 69 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 70 71 static const struct bpf_map_ops * const bpf_map_types[] = { 72 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 73 #define BPF_MAP_TYPE(_id, _ops) \ 74 [_id] = &_ops, 75 #define BPF_LINK_TYPE(_id, _name) 76 #include <linux/bpf_types.h> 77 #undef BPF_PROG_TYPE 78 #undef BPF_MAP_TYPE 79 #undef BPF_LINK_TYPE 80 }; 81 82 /* 83 * If we're handed a bigger struct than we know of, ensure all the unknown bits 84 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 85 * we don't know about yet. 86 * 87 * There is a ToCToU between this function call and the following 88 * copy_from_user() call. However, this is not a concern since this function is 89 * meant to be a future-proofing of bits. 90 */ 91 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 92 size_t expected_size, 93 size_t actual_size) 94 { 95 int res; 96 97 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 98 return -E2BIG; 99 100 if (actual_size <= expected_size) 101 return 0; 102 103 if (uaddr.is_kernel) 104 res = memchr_inv(uaddr.kernel + expected_size, 0, 105 actual_size - expected_size) == NULL; 106 else 107 res = check_zeroed_user(uaddr.user + expected_size, 108 actual_size - expected_size); 109 if (res < 0) 110 return res; 111 return res ? 0 : -E2BIG; 112 } 113 114 const struct bpf_map_ops bpf_map_offload_ops = { 115 .map_meta_equal = bpf_map_meta_equal, 116 .map_alloc = bpf_map_offload_map_alloc, 117 .map_free = bpf_map_offload_map_free, 118 .map_check_btf = map_check_no_btf, 119 .map_mem_usage = bpf_map_offload_map_mem_usage, 120 }; 121 122 static void bpf_map_write_active_inc(struct bpf_map *map) 123 { 124 atomic64_inc(&map->writecnt); 125 } 126 127 static void bpf_map_write_active_dec(struct bpf_map *map) 128 { 129 atomic64_dec(&map->writecnt); 130 } 131 132 bool bpf_map_write_active(const struct bpf_map *map) 133 { 134 return atomic64_read(&map->writecnt) != 0; 135 } 136 137 static u32 bpf_map_value_size(const struct bpf_map *map) 138 { 139 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 140 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 141 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 142 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 143 return round_up(map->value_size, 8) * num_possible_cpus(); 144 else if (IS_FD_MAP(map)) 145 return sizeof(u32); 146 else 147 return map->value_size; 148 } 149 150 static void maybe_wait_bpf_programs(struct bpf_map *map) 151 { 152 /* Wait for any running non-sleepable BPF programs to complete so that 153 * userspace, when we return to it, knows that all non-sleepable 154 * programs that could be running use the new map value. For sleepable 155 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait 156 * for the completions of these programs, but considering the waiting 157 * time can be very long and userspace may think it will hang forever, 158 * so don't handle sleepable BPF programs now. 159 */ 160 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 161 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 162 synchronize_rcu_expedited(); 163 } 164 165 static void unpin_uptr_kaddr(void *kaddr) 166 { 167 if (kaddr) 168 unpin_user_page(virt_to_page(kaddr)); 169 } 170 171 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) 172 { 173 const struct btf_field *field; 174 void **uptr_addr; 175 int i; 176 177 for (i = 0, field = rec->fields; i < cnt; i++, field++) { 178 if (field->type != BPF_UPTR) 179 continue; 180 181 uptr_addr = obj + field->offset; 182 unpin_uptr_kaddr(*uptr_addr); 183 } 184 } 185 186 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) 187 { 188 if (!btf_record_has_field(rec, BPF_UPTR)) 189 return; 190 191 __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); 192 } 193 194 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) 195 { 196 const struct btf_field *field; 197 const struct btf_type *t; 198 unsigned long start, end; 199 struct page *page; 200 void **uptr_addr; 201 int i, err; 202 203 if (!btf_record_has_field(rec, BPF_UPTR)) 204 return 0; 205 206 for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { 207 if (field->type != BPF_UPTR) 208 continue; 209 210 uptr_addr = obj + field->offset; 211 start = *(unsigned long *)uptr_addr; 212 if (!start) 213 continue; 214 215 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 216 /* t->size was checked for zero before */ 217 if (check_add_overflow(start, t->size - 1, &end)) { 218 err = -EFAULT; 219 goto unpin_all; 220 } 221 222 /* The uptr's struct cannot span across two pages */ 223 if ((start & PAGE_MASK) != (end & PAGE_MASK)) { 224 err = -EOPNOTSUPP; 225 goto unpin_all; 226 } 227 228 err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); 229 if (err != 1) 230 goto unpin_all; 231 232 if (PageHighMem(page)) { 233 err = -EOPNOTSUPP; 234 unpin_user_page(page); 235 goto unpin_all; 236 } 237 238 *uptr_addr = page_address(page) + offset_in_page(start); 239 } 240 241 return 0; 242 243 unpin_all: 244 __bpf_obj_unpin_uptrs(rec, i, obj); 245 return err; 246 } 247 248 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 249 void *key, void *value, __u64 flags) 250 { 251 int err; 252 253 /* Need to create a kthread, thus must support schedule */ 254 if (bpf_map_is_offloaded(map)) { 255 return bpf_map_offload_update_elem(map, key, value, flags); 256 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 257 map->map_type == BPF_MAP_TYPE_ARENA || 258 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 259 return map->ops->map_update_elem(map, key, value, flags); 260 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 261 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 262 return sock_map_update_elem_sys(map, key, value, flags); 263 } else if (IS_FD_PROG_ARRAY(map)) { 264 return bpf_fd_array_map_update_elem(map, map_file, key, value, 265 flags); 266 } 267 268 bpf_disable_instrumentation(); 269 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 270 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 271 err = bpf_percpu_hash_update(map, key, value, flags); 272 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 273 err = bpf_percpu_array_update(map, key, value, flags); 274 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 275 err = bpf_percpu_cgroup_storage_update(map, key, value, 276 flags); 277 } else if (IS_FD_ARRAY(map)) { 278 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 279 flags); 280 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 281 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 282 flags); 283 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 284 /* rcu_read_lock() is not needed */ 285 err = bpf_fd_reuseport_array_update_elem(map, key, value, 286 flags); 287 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 288 map->map_type == BPF_MAP_TYPE_STACK || 289 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 290 err = map->ops->map_push_elem(map, value, flags); 291 } else { 292 err = bpf_obj_pin_uptrs(map->record, value); 293 if (!err) { 294 rcu_read_lock(); 295 err = map->ops->map_update_elem(map, key, value, flags); 296 rcu_read_unlock(); 297 if (err) 298 bpf_obj_unpin_uptrs(map->record, value); 299 } 300 } 301 bpf_enable_instrumentation(); 302 303 return err; 304 } 305 306 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 307 __u64 flags) 308 { 309 void *ptr; 310 int err; 311 312 if (bpf_map_is_offloaded(map)) 313 return bpf_map_offload_lookup_elem(map, key, value); 314 315 bpf_disable_instrumentation(); 316 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 317 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 318 err = bpf_percpu_hash_copy(map, key, value); 319 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 320 err = bpf_percpu_array_copy(map, key, value); 321 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 322 err = bpf_percpu_cgroup_storage_copy(map, key, value); 323 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 324 err = bpf_stackmap_extract(map, key, value, false); 325 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 326 err = bpf_fd_array_map_lookup_elem(map, key, value); 327 } else if (IS_FD_HASH(map)) { 328 err = bpf_fd_htab_map_lookup_elem(map, key, value); 329 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 330 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 331 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 332 map->map_type == BPF_MAP_TYPE_STACK || 333 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 334 err = map->ops->map_peek_elem(map, value); 335 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 336 /* struct_ops map requires directly updating "value" */ 337 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 338 } else { 339 rcu_read_lock(); 340 if (map->ops->map_lookup_elem_sys_only) 341 ptr = map->ops->map_lookup_elem_sys_only(map, key); 342 else 343 ptr = map->ops->map_lookup_elem(map, key); 344 if (IS_ERR(ptr)) { 345 err = PTR_ERR(ptr); 346 } else if (!ptr) { 347 err = -ENOENT; 348 } else { 349 err = 0; 350 if (flags & BPF_F_LOCK) 351 /* lock 'ptr' and copy everything but lock */ 352 copy_map_value_locked(map, value, ptr, true); 353 else 354 copy_map_value(map, value, ptr); 355 /* mask lock and timer, since value wasn't zero inited */ 356 check_and_init_map_value(map, value); 357 } 358 rcu_read_unlock(); 359 } 360 361 bpf_enable_instrumentation(); 362 363 return err; 364 } 365 366 /* Please, do not use this function outside from the map creation path 367 * (e.g. in map update path) without taking care of setting the active 368 * memory cgroup (see at bpf_map_kmalloc_node() for example). 369 */ 370 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 371 { 372 /* We really just want to fail instead of triggering OOM killer 373 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 374 * which is used for lower order allocation requests. 375 * 376 * It has been observed that higher order allocation requests done by 377 * vmalloc with __GFP_NORETRY being set might fail due to not trying 378 * to reclaim memory from the page cache, thus we set 379 * __GFP_RETRY_MAYFAIL to avoid such situations. 380 */ 381 382 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 383 unsigned int flags = 0; 384 unsigned long align = 1; 385 void *area; 386 387 if (size >= SIZE_MAX) 388 return NULL; 389 390 /* kmalloc()'ed memory can't be mmap()'ed */ 391 if (mmapable) { 392 BUG_ON(!PAGE_ALIGNED(size)); 393 align = SHMLBA; 394 flags = VM_USERMAP; 395 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 396 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 397 numa_node); 398 if (area != NULL) 399 return area; 400 } 401 402 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 403 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 404 flags, numa_node, __builtin_return_address(0)); 405 } 406 407 void *bpf_map_area_alloc(u64 size, int numa_node) 408 { 409 return __bpf_map_area_alloc(size, numa_node, false); 410 } 411 412 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 413 { 414 return __bpf_map_area_alloc(size, numa_node, true); 415 } 416 417 void bpf_map_area_free(void *area) 418 { 419 kvfree(area); 420 } 421 422 static u32 bpf_map_flags_retain_permanent(u32 flags) 423 { 424 /* Some map creation flags are not tied to the map object but 425 * rather to the map fd instead, so they have no meaning upon 426 * map object inspection since multiple file descriptors with 427 * different (access) properties can exist here. Thus, given 428 * this has zero meaning for the map itself, lets clear these 429 * from here. 430 */ 431 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 432 } 433 434 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 435 { 436 map->map_type = attr->map_type; 437 map->key_size = attr->key_size; 438 map->value_size = attr->value_size; 439 map->max_entries = attr->max_entries; 440 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 441 map->numa_node = bpf_map_attr_numa_node(attr); 442 map->map_extra = attr->map_extra; 443 } 444 445 static int bpf_map_alloc_id(struct bpf_map *map) 446 { 447 int id; 448 449 idr_preload(GFP_KERNEL); 450 spin_lock_bh(&map_idr_lock); 451 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 452 if (id > 0) 453 map->id = id; 454 spin_unlock_bh(&map_idr_lock); 455 idr_preload_end(); 456 457 if (WARN_ON_ONCE(!id)) 458 return -ENOSPC; 459 460 return id > 0 ? 0 : id; 461 } 462 463 void bpf_map_free_id(struct bpf_map *map) 464 { 465 unsigned long flags; 466 467 /* Offloaded maps are removed from the IDR store when their device 468 * disappears - even if someone holds an fd to them they are unusable, 469 * the memory is gone, all ops will fail; they are simply waiting for 470 * refcnt to drop to be freed. 471 */ 472 if (!map->id) 473 return; 474 475 spin_lock_irqsave(&map_idr_lock, flags); 476 477 idr_remove(&map_idr, map->id); 478 map->id = 0; 479 480 spin_unlock_irqrestore(&map_idr_lock, flags); 481 } 482 483 #ifdef CONFIG_MEMCG 484 static void bpf_map_save_memcg(struct bpf_map *map) 485 { 486 /* Currently if a map is created by a process belonging to the root 487 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 488 * So we have to check map->objcg for being NULL each time it's 489 * being used. 490 */ 491 if (memcg_bpf_enabled()) 492 map->objcg = get_obj_cgroup_from_current(); 493 } 494 495 static void bpf_map_release_memcg(struct bpf_map *map) 496 { 497 if (map->objcg) 498 obj_cgroup_put(map->objcg); 499 } 500 501 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 502 { 503 if (map->objcg) 504 return get_mem_cgroup_from_objcg(map->objcg); 505 506 return root_mem_cgroup; 507 } 508 509 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 510 int node) 511 { 512 struct mem_cgroup *memcg, *old_memcg; 513 void *ptr; 514 515 memcg = bpf_map_get_memcg(map); 516 old_memcg = set_active_memcg(memcg); 517 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 518 set_active_memcg(old_memcg); 519 mem_cgroup_put(memcg); 520 521 return ptr; 522 } 523 524 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 525 int node) 526 { 527 struct mem_cgroup *memcg, *old_memcg; 528 void *ptr; 529 530 memcg = bpf_map_get_memcg(map); 531 old_memcg = set_active_memcg(memcg); 532 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 533 set_active_memcg(old_memcg); 534 mem_cgroup_put(memcg); 535 536 return ptr; 537 } 538 539 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 540 { 541 struct mem_cgroup *memcg, *old_memcg; 542 void *ptr; 543 544 memcg = bpf_map_get_memcg(map); 545 old_memcg = set_active_memcg(memcg); 546 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 547 set_active_memcg(old_memcg); 548 mem_cgroup_put(memcg); 549 550 return ptr; 551 } 552 553 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 554 gfp_t flags) 555 { 556 struct mem_cgroup *memcg, *old_memcg; 557 void *ptr; 558 559 memcg = bpf_map_get_memcg(map); 560 old_memcg = set_active_memcg(memcg); 561 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 562 set_active_memcg(old_memcg); 563 mem_cgroup_put(memcg); 564 565 return ptr; 566 } 567 568 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 569 size_t align, gfp_t flags) 570 { 571 struct mem_cgroup *memcg, *old_memcg; 572 void __percpu *ptr; 573 574 memcg = bpf_map_get_memcg(map); 575 old_memcg = set_active_memcg(memcg); 576 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 577 set_active_memcg(old_memcg); 578 mem_cgroup_put(memcg); 579 580 return ptr; 581 } 582 583 #else 584 static void bpf_map_save_memcg(struct bpf_map *map) 585 { 586 } 587 588 static void bpf_map_release_memcg(struct bpf_map *map) 589 { 590 } 591 #endif 592 593 static bool can_alloc_pages(void) 594 { 595 return preempt_count() == 0 && !irqs_disabled() && 596 !IS_ENABLED(CONFIG_PREEMPT_RT); 597 } 598 599 static struct page *__bpf_alloc_page(int nid) 600 { 601 if (!can_alloc_pages()) 602 return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); 603 604 return alloc_pages_node(nid, 605 GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT 606 | __GFP_NOWARN, 607 0); 608 } 609 610 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 611 unsigned long nr_pages, struct page **pages) 612 { 613 unsigned long i, j; 614 struct page *pg; 615 int ret = 0; 616 #ifdef CONFIG_MEMCG 617 struct mem_cgroup *memcg, *old_memcg; 618 619 memcg = bpf_map_get_memcg(map); 620 old_memcg = set_active_memcg(memcg); 621 #endif 622 for (i = 0; i < nr_pages; i++) { 623 pg = __bpf_alloc_page(nid); 624 625 if (pg) { 626 pages[i] = pg; 627 continue; 628 } 629 for (j = 0; j < i; j++) 630 free_pages_nolock(pages[j], 0); 631 ret = -ENOMEM; 632 break; 633 } 634 635 #ifdef CONFIG_MEMCG 636 set_active_memcg(old_memcg); 637 mem_cgroup_put(memcg); 638 #endif 639 return ret; 640 } 641 642 643 static int btf_field_cmp(const void *a, const void *b) 644 { 645 const struct btf_field *f1 = a, *f2 = b; 646 647 if (f1->offset < f2->offset) 648 return -1; 649 else if (f1->offset > f2->offset) 650 return 1; 651 return 0; 652 } 653 654 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 655 u32 field_mask) 656 { 657 struct btf_field *field; 658 659 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 660 return NULL; 661 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 662 if (!field || !(field->type & field_mask)) 663 return NULL; 664 return field; 665 } 666 667 void btf_record_free(struct btf_record *rec) 668 { 669 int i; 670 671 if (IS_ERR_OR_NULL(rec)) 672 return; 673 for (i = 0; i < rec->cnt; i++) { 674 switch (rec->fields[i].type) { 675 case BPF_KPTR_UNREF: 676 case BPF_KPTR_REF: 677 case BPF_KPTR_PERCPU: 678 case BPF_UPTR: 679 if (rec->fields[i].kptr.module) 680 module_put(rec->fields[i].kptr.module); 681 if (btf_is_kernel(rec->fields[i].kptr.btf)) 682 btf_put(rec->fields[i].kptr.btf); 683 break; 684 case BPF_LIST_HEAD: 685 case BPF_LIST_NODE: 686 case BPF_RB_ROOT: 687 case BPF_RB_NODE: 688 case BPF_SPIN_LOCK: 689 case BPF_RES_SPIN_LOCK: 690 case BPF_TIMER: 691 case BPF_REFCOUNT: 692 case BPF_WORKQUEUE: 693 case BPF_TASK_WORK: 694 /* Nothing to release */ 695 break; 696 default: 697 WARN_ON_ONCE(1); 698 continue; 699 } 700 } 701 kfree(rec); 702 } 703 704 void bpf_map_free_record(struct bpf_map *map) 705 { 706 btf_record_free(map->record); 707 map->record = NULL; 708 } 709 710 struct btf_record *btf_record_dup(const struct btf_record *rec) 711 { 712 const struct btf_field *fields; 713 struct btf_record *new_rec; 714 int ret, size, i; 715 716 if (IS_ERR_OR_NULL(rec)) 717 return NULL; 718 size = struct_size(rec, fields, rec->cnt); 719 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 720 if (!new_rec) 721 return ERR_PTR(-ENOMEM); 722 /* Do a deep copy of the btf_record */ 723 fields = rec->fields; 724 new_rec->cnt = 0; 725 for (i = 0; i < rec->cnt; i++) { 726 switch (fields[i].type) { 727 case BPF_KPTR_UNREF: 728 case BPF_KPTR_REF: 729 case BPF_KPTR_PERCPU: 730 case BPF_UPTR: 731 if (btf_is_kernel(fields[i].kptr.btf)) 732 btf_get(fields[i].kptr.btf); 733 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 734 ret = -ENXIO; 735 goto free; 736 } 737 break; 738 case BPF_LIST_HEAD: 739 case BPF_LIST_NODE: 740 case BPF_RB_ROOT: 741 case BPF_RB_NODE: 742 case BPF_SPIN_LOCK: 743 case BPF_RES_SPIN_LOCK: 744 case BPF_TIMER: 745 case BPF_REFCOUNT: 746 case BPF_WORKQUEUE: 747 case BPF_TASK_WORK: 748 /* Nothing to acquire */ 749 break; 750 default: 751 ret = -EFAULT; 752 WARN_ON_ONCE(1); 753 goto free; 754 } 755 new_rec->cnt++; 756 } 757 return new_rec; 758 free: 759 btf_record_free(new_rec); 760 return ERR_PTR(ret); 761 } 762 763 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 764 { 765 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 766 int size; 767 768 if (!a_has_fields && !b_has_fields) 769 return true; 770 if (a_has_fields != b_has_fields) 771 return false; 772 if (rec_a->cnt != rec_b->cnt) 773 return false; 774 size = struct_size(rec_a, fields, rec_a->cnt); 775 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 776 * members are zeroed out. So memcmp is safe to do without worrying 777 * about padding/unused fields. 778 * 779 * While spin_lock, timer, and kptr have no relation to map BTF, 780 * list_head metadata is specific to map BTF, the btf and value_rec 781 * members in particular. btf is the map BTF, while value_rec points to 782 * btf_record in that map BTF. 783 * 784 * So while by default, we don't rely on the map BTF (which the records 785 * were parsed from) matching for both records, which is not backwards 786 * compatible, in case list_head is part of it, we implicitly rely on 787 * that by way of depending on memcmp succeeding for it. 788 */ 789 return !memcmp(rec_a, rec_b, size); 790 } 791 792 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 793 { 794 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 795 return; 796 bpf_timer_cancel_and_free(obj + rec->timer_off); 797 } 798 799 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) 800 { 801 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) 802 return; 803 bpf_wq_cancel_and_free(obj + rec->wq_off); 804 } 805 806 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) 807 { 808 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) 809 return; 810 bpf_task_work_cancel_and_free(obj + rec->task_work_off); 811 } 812 813 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 814 { 815 const struct btf_field *fields; 816 int i; 817 818 if (IS_ERR_OR_NULL(rec)) 819 return; 820 fields = rec->fields; 821 for (i = 0; i < rec->cnt; i++) { 822 struct btf_struct_meta *pointee_struct_meta; 823 const struct btf_field *field = &fields[i]; 824 void *field_ptr = obj + field->offset; 825 void *xchgd_field; 826 827 switch (fields[i].type) { 828 case BPF_SPIN_LOCK: 829 case BPF_RES_SPIN_LOCK: 830 break; 831 case BPF_TIMER: 832 bpf_timer_cancel_and_free(field_ptr); 833 break; 834 case BPF_WORKQUEUE: 835 bpf_wq_cancel_and_free(field_ptr); 836 break; 837 case BPF_TASK_WORK: 838 bpf_task_work_cancel_and_free(field_ptr); 839 break; 840 case BPF_KPTR_UNREF: 841 WRITE_ONCE(*(u64 *)field_ptr, 0); 842 break; 843 case BPF_KPTR_REF: 844 case BPF_KPTR_PERCPU: 845 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 846 if (!xchgd_field) 847 break; 848 849 if (!btf_is_kernel(field->kptr.btf)) { 850 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 851 field->kptr.btf_id); 852 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 853 pointee_struct_meta->record : NULL, 854 fields[i].type == BPF_KPTR_PERCPU); 855 } else { 856 field->kptr.dtor(xchgd_field); 857 } 858 break; 859 case BPF_UPTR: 860 /* The caller ensured that no one is using the uptr */ 861 unpin_uptr_kaddr(*(void **)field_ptr); 862 break; 863 case BPF_LIST_HEAD: 864 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 865 continue; 866 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 867 break; 868 case BPF_RB_ROOT: 869 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 870 continue; 871 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 872 break; 873 case BPF_LIST_NODE: 874 case BPF_RB_NODE: 875 case BPF_REFCOUNT: 876 break; 877 default: 878 WARN_ON_ONCE(1); 879 continue; 880 } 881 } 882 } 883 884 static void bpf_map_free(struct bpf_map *map) 885 { 886 struct btf_record *rec = map->record; 887 struct btf *btf = map->btf; 888 889 /* implementation dependent freeing. Disabling migration to simplify 890 * the free of values or special fields allocated from bpf memory 891 * allocator. 892 */ 893 kfree(map->excl_prog_sha); 894 migrate_disable(); 895 map->ops->map_free(map); 896 migrate_enable(); 897 898 /* Delay freeing of btf_record for maps, as map_free 899 * callback usually needs access to them. It is better to do it here 900 * than require each callback to do the free itself manually. 901 * 902 * Note that the btf_record stashed in map->inner_map_meta->record was 903 * already freed using the map_free callback for map in map case which 904 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 905 * template bpf_map struct used during verification. 906 */ 907 btf_record_free(rec); 908 /* Delay freeing of btf for maps, as map_free callback may need 909 * struct_meta info which will be freed with btf_put(). 910 */ 911 btf_put(btf); 912 } 913 914 /* called from workqueue */ 915 static void bpf_map_free_deferred(struct work_struct *work) 916 { 917 struct bpf_map *map = container_of(work, struct bpf_map, work); 918 919 security_bpf_map_free(map); 920 bpf_map_release_memcg(map); 921 bpf_map_owner_free(map); 922 bpf_map_free(map); 923 } 924 925 static void bpf_map_put_uref(struct bpf_map *map) 926 { 927 if (atomic64_dec_and_test(&map->usercnt)) { 928 if (map->ops->map_release_uref) 929 map->ops->map_release_uref(map); 930 } 931 } 932 933 static void bpf_map_free_in_work(struct bpf_map *map) 934 { 935 INIT_WORK(&map->work, bpf_map_free_deferred); 936 /* Avoid spawning kworkers, since they all might contend 937 * for the same mutex like slab_mutex. 938 */ 939 queue_work(system_dfl_wq, &map->work); 940 } 941 942 static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 943 { 944 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 945 } 946 947 static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 948 { 949 if (rcu_trace_implies_rcu_gp()) 950 bpf_map_free_rcu_gp(rcu); 951 else 952 call_rcu(rcu, bpf_map_free_rcu_gp); 953 } 954 955 /* decrement map refcnt and schedule it for freeing via workqueue 956 * (underlying map implementation ops->map_free() might sleep) 957 */ 958 void bpf_map_put(struct bpf_map *map) 959 { 960 if (atomic64_dec_and_test(&map->refcnt)) { 961 /* bpf_map_free_id() must be called first */ 962 bpf_map_free_id(map); 963 964 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 965 if (READ_ONCE(map->free_after_mult_rcu_gp)) 966 call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 967 else if (READ_ONCE(map->free_after_rcu_gp)) 968 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 969 else 970 bpf_map_free_in_work(map); 971 } 972 } 973 EXPORT_SYMBOL_GPL(bpf_map_put); 974 975 void bpf_map_put_with_uref(struct bpf_map *map) 976 { 977 bpf_map_put_uref(map); 978 bpf_map_put(map); 979 } 980 981 static int bpf_map_release(struct inode *inode, struct file *filp) 982 { 983 struct bpf_map *map = filp->private_data; 984 985 if (map->ops->map_release) 986 map->ops->map_release(map, filp); 987 988 bpf_map_put_with_uref(map); 989 return 0; 990 } 991 992 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 993 { 994 fmode_t mode = fd_file(f)->f_mode; 995 996 /* Our file permissions may have been overridden by global 997 * map permissions facing syscall side. 998 */ 999 if (READ_ONCE(map->frozen)) 1000 mode &= ~FMODE_CAN_WRITE; 1001 return mode; 1002 } 1003 1004 #ifdef CONFIG_PROC_FS 1005 /* Show the memory usage of a bpf map */ 1006 static u64 bpf_map_memory_usage(const struct bpf_map *map) 1007 { 1008 return map->ops->map_mem_usage(map); 1009 } 1010 1011 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 1012 { 1013 struct bpf_map *map = filp->private_data; 1014 u32 type = 0, jited = 0; 1015 1016 spin_lock(&map->owner_lock); 1017 if (map->owner) { 1018 type = map->owner->type; 1019 jited = map->owner->jited; 1020 } 1021 spin_unlock(&map->owner_lock); 1022 1023 seq_printf(m, 1024 "map_type:\t%u\n" 1025 "key_size:\t%u\n" 1026 "value_size:\t%u\n" 1027 "max_entries:\t%u\n" 1028 "map_flags:\t%#x\n" 1029 "map_extra:\t%#llx\n" 1030 "memlock:\t%llu\n" 1031 "map_id:\t%u\n" 1032 "frozen:\t%u\n", 1033 map->map_type, 1034 map->key_size, 1035 map->value_size, 1036 map->max_entries, 1037 map->map_flags, 1038 (unsigned long long)map->map_extra, 1039 bpf_map_memory_usage(map), 1040 map->id, 1041 READ_ONCE(map->frozen)); 1042 if (type) { 1043 seq_printf(m, "owner_prog_type:\t%u\n", type); 1044 seq_printf(m, "owner_jited:\t%u\n", jited); 1045 } 1046 } 1047 #endif 1048 1049 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 1050 loff_t *ppos) 1051 { 1052 /* We need this handler such that alloc_file() enables 1053 * f_mode with FMODE_CAN_READ. 1054 */ 1055 return -EINVAL; 1056 } 1057 1058 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 1059 size_t siz, loff_t *ppos) 1060 { 1061 /* We need this handler such that alloc_file() enables 1062 * f_mode with FMODE_CAN_WRITE. 1063 */ 1064 return -EINVAL; 1065 } 1066 1067 /* called for any extra memory-mapped regions (except initial) */ 1068 static void bpf_map_mmap_open(struct vm_area_struct *vma) 1069 { 1070 struct bpf_map *map = vma->vm_file->private_data; 1071 1072 if (vma->vm_flags & VM_MAYWRITE) 1073 bpf_map_write_active_inc(map); 1074 } 1075 1076 /* called for all unmapped memory region (including initial) */ 1077 static void bpf_map_mmap_close(struct vm_area_struct *vma) 1078 { 1079 struct bpf_map *map = vma->vm_file->private_data; 1080 1081 if (vma->vm_flags & VM_MAYWRITE) 1082 bpf_map_write_active_dec(map); 1083 } 1084 1085 static const struct vm_operations_struct bpf_map_default_vmops = { 1086 .open = bpf_map_mmap_open, 1087 .close = bpf_map_mmap_close, 1088 }; 1089 1090 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 1091 { 1092 struct bpf_map *map = filp->private_data; 1093 int err = 0; 1094 1095 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 1096 return -ENOTSUPP; 1097 1098 if (!(vma->vm_flags & VM_SHARED)) 1099 return -EINVAL; 1100 1101 mutex_lock(&map->freeze_mutex); 1102 1103 if (vma->vm_flags & VM_WRITE) { 1104 if (map->frozen) { 1105 err = -EPERM; 1106 goto out; 1107 } 1108 /* map is meant to be read-only, so do not allow mapping as 1109 * writable, because it's possible to leak a writable page 1110 * reference and allows user-space to still modify it after 1111 * freezing, while verifier will assume contents do not change 1112 */ 1113 if (map->map_flags & BPF_F_RDONLY_PROG) { 1114 err = -EACCES; 1115 goto out; 1116 } 1117 bpf_map_write_active_inc(map); 1118 } 1119 out: 1120 mutex_unlock(&map->freeze_mutex); 1121 if (err) 1122 return err; 1123 1124 /* set default open/close callbacks */ 1125 vma->vm_ops = &bpf_map_default_vmops; 1126 vma->vm_private_data = map; 1127 vm_flags_clear(vma, VM_MAYEXEC); 1128 /* If mapping is read-only, then disallow potentially re-mapping with 1129 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing 1130 * means that as far as BPF map's memory-mapped VMAs are concerned, 1131 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, 1132 * both should be set, so we can forget about VM_MAYWRITE and always 1133 * check just VM_WRITE 1134 */ 1135 if (!(vma->vm_flags & VM_WRITE)) 1136 vm_flags_clear(vma, VM_MAYWRITE); 1137 1138 err = map->ops->map_mmap(map, vma); 1139 if (err) { 1140 if (vma->vm_flags & VM_WRITE) 1141 bpf_map_write_active_dec(map); 1142 } 1143 1144 return err; 1145 } 1146 1147 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 1148 { 1149 struct bpf_map *map = filp->private_data; 1150 1151 if (map->ops->map_poll) 1152 return map->ops->map_poll(map, filp, pts); 1153 1154 return EPOLLERR; 1155 } 1156 1157 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, 1158 unsigned long len, unsigned long pgoff, 1159 unsigned long flags) 1160 { 1161 struct bpf_map *map = filp->private_data; 1162 1163 if (map->ops->map_get_unmapped_area) 1164 return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); 1165 #ifdef CONFIG_MMU 1166 return mm_get_unmapped_area(filp, addr, len, pgoff, flags); 1167 #else 1168 return addr; 1169 #endif 1170 } 1171 1172 const struct file_operations bpf_map_fops = { 1173 #ifdef CONFIG_PROC_FS 1174 .show_fdinfo = bpf_map_show_fdinfo, 1175 #endif 1176 .release = bpf_map_release, 1177 .read = bpf_dummy_read, 1178 .write = bpf_dummy_write, 1179 .mmap = bpf_map_mmap, 1180 .poll = bpf_map_poll, 1181 .get_unmapped_area = bpf_get_unmapped_area, 1182 }; 1183 1184 int bpf_map_new_fd(struct bpf_map *map, int flags) 1185 { 1186 int ret; 1187 1188 ret = security_bpf_map(map, OPEN_FMODE(flags)); 1189 if (ret < 0) 1190 return ret; 1191 1192 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 1193 flags | O_CLOEXEC); 1194 } 1195 1196 int bpf_get_file_flag(int flags) 1197 { 1198 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 1199 return -EINVAL; 1200 if (flags & BPF_F_RDONLY) 1201 return O_RDONLY; 1202 if (flags & BPF_F_WRONLY) 1203 return O_WRONLY; 1204 return O_RDWR; 1205 } 1206 1207 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 1208 #define CHECK_ATTR(CMD) \ 1209 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 1210 sizeof(attr->CMD##_LAST_FIELD), 0, \ 1211 sizeof(*attr) - \ 1212 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 1213 sizeof(attr->CMD##_LAST_FIELD)) != NULL 1214 1215 /* dst and src must have at least "size" number of bytes. 1216 * Return strlen on success and < 0 on error. 1217 */ 1218 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 1219 { 1220 const char *end = src + size; 1221 const char *orig_src = src; 1222 1223 memset(dst, 0, size); 1224 /* Copy all isalnum(), '_' and '.' chars. */ 1225 while (src < end && *src) { 1226 if (!isalnum(*src) && 1227 *src != '_' && *src != '.') 1228 return -EINVAL; 1229 *dst++ = *src++; 1230 } 1231 1232 /* No '\0' found in "size" number of bytes */ 1233 if (src == end) 1234 return -EINVAL; 1235 1236 return src - orig_src; 1237 } 1238 EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); 1239 1240 int map_check_no_btf(const struct bpf_map *map, 1241 const struct btf *btf, 1242 const struct btf_type *key_type, 1243 const struct btf_type *value_type) 1244 { 1245 return -ENOTSUPP; 1246 } 1247 1248 static int map_check_btf(struct bpf_map *map, struct bpf_token *token, 1249 const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 1250 { 1251 const struct btf_type *key_type, *value_type; 1252 u32 key_size, value_size; 1253 int ret = 0; 1254 1255 /* Some maps allow key to be unspecified. */ 1256 if (btf_key_id) { 1257 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1258 if (!key_type || key_size != map->key_size) 1259 return -EINVAL; 1260 } else { 1261 key_type = btf_type_by_id(btf, 0); 1262 if (!map->ops->map_check_btf) 1263 return -EINVAL; 1264 } 1265 1266 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1267 if (!value_type || value_size != map->value_size) 1268 return -EINVAL; 1269 1270 map->record = btf_parse_fields(btf, value_type, 1271 BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1272 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | 1273 BPF_TASK_WORK, 1274 map->value_size); 1275 if (!IS_ERR_OR_NULL(map->record)) { 1276 int i; 1277 1278 if (!bpf_token_capable(token, CAP_BPF)) { 1279 ret = -EPERM; 1280 goto free_map_tab; 1281 } 1282 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1283 ret = -EACCES; 1284 goto free_map_tab; 1285 } 1286 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1287 switch (map->record->field_mask & (1 << i)) { 1288 case 0: 1289 continue; 1290 case BPF_SPIN_LOCK: 1291 case BPF_RES_SPIN_LOCK: 1292 if (map->map_type != BPF_MAP_TYPE_HASH && 1293 map->map_type != BPF_MAP_TYPE_ARRAY && 1294 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1295 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1296 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1297 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1298 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1299 ret = -EOPNOTSUPP; 1300 goto free_map_tab; 1301 } 1302 break; 1303 case BPF_TIMER: 1304 case BPF_WORKQUEUE: 1305 case BPF_TASK_WORK: 1306 if (map->map_type != BPF_MAP_TYPE_HASH && 1307 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1308 map->map_type != BPF_MAP_TYPE_ARRAY) { 1309 ret = -EOPNOTSUPP; 1310 goto free_map_tab; 1311 } 1312 break; 1313 case BPF_KPTR_UNREF: 1314 case BPF_KPTR_REF: 1315 case BPF_KPTR_PERCPU: 1316 case BPF_REFCOUNT: 1317 if (map->map_type != BPF_MAP_TYPE_HASH && 1318 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1319 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1320 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1321 map->map_type != BPF_MAP_TYPE_ARRAY && 1322 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1323 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1324 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1325 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1326 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1327 ret = -EOPNOTSUPP; 1328 goto free_map_tab; 1329 } 1330 break; 1331 case BPF_UPTR: 1332 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { 1333 ret = -EOPNOTSUPP; 1334 goto free_map_tab; 1335 } 1336 break; 1337 case BPF_LIST_HEAD: 1338 case BPF_RB_ROOT: 1339 if (map->map_type != BPF_MAP_TYPE_HASH && 1340 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1341 map->map_type != BPF_MAP_TYPE_ARRAY) { 1342 ret = -EOPNOTSUPP; 1343 goto free_map_tab; 1344 } 1345 break; 1346 default: 1347 /* Fail if map_type checks are missing for a field type */ 1348 ret = -EOPNOTSUPP; 1349 goto free_map_tab; 1350 } 1351 } 1352 } 1353 1354 ret = btf_check_and_fixup_fields(btf, map->record); 1355 if (ret < 0) 1356 goto free_map_tab; 1357 1358 if (map->ops->map_check_btf) { 1359 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1360 if (ret < 0) 1361 goto free_map_tab; 1362 } 1363 1364 return ret; 1365 free_map_tab: 1366 bpf_map_free_record(map); 1367 return ret; 1368 } 1369 1370 static bool bpf_net_capable(void) 1371 { 1372 return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); 1373 } 1374 1375 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size 1376 /* called via syscall */ 1377 static int map_create(union bpf_attr *attr, bpfptr_t uattr) 1378 { 1379 const struct bpf_map_ops *ops; 1380 struct bpf_token *token = NULL; 1381 int numa_node = bpf_map_attr_numa_node(attr); 1382 u32 map_type = attr->map_type; 1383 struct bpf_map *map; 1384 bool token_flag; 1385 int f_flags; 1386 int err; 1387 1388 err = CHECK_ATTR(BPF_MAP_CREATE); 1389 if (err) 1390 return -EINVAL; 1391 1392 /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it 1393 * to avoid per-map type checks tripping on unknown flag 1394 */ 1395 token_flag = attr->map_flags & BPF_F_TOKEN_FD; 1396 attr->map_flags &= ~BPF_F_TOKEN_FD; 1397 1398 if (attr->btf_vmlinux_value_type_id) { 1399 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1400 attr->btf_key_type_id || attr->btf_value_type_id) 1401 return -EINVAL; 1402 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1403 return -EINVAL; 1404 } 1405 1406 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1407 attr->map_type != BPF_MAP_TYPE_ARENA && 1408 attr->map_extra != 0) 1409 return -EINVAL; 1410 1411 f_flags = bpf_get_file_flag(attr->map_flags); 1412 if (f_flags < 0) 1413 return f_flags; 1414 1415 if (numa_node != NUMA_NO_NODE && 1416 ((unsigned int)numa_node >= nr_node_ids || 1417 !node_online(numa_node))) 1418 return -EINVAL; 1419 1420 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1421 map_type = attr->map_type; 1422 if (map_type >= ARRAY_SIZE(bpf_map_types)) 1423 return -EINVAL; 1424 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1425 ops = bpf_map_types[map_type]; 1426 if (!ops) 1427 return -EINVAL; 1428 1429 if (ops->map_alloc_check) { 1430 err = ops->map_alloc_check(attr); 1431 if (err) 1432 return err; 1433 } 1434 if (attr->map_ifindex) 1435 ops = &bpf_map_offload_ops; 1436 if (!ops->map_mem_usage) 1437 return -EINVAL; 1438 1439 if (token_flag) { 1440 token = bpf_token_get_from_fd(attr->map_token_fd); 1441 if (IS_ERR(token)) 1442 return PTR_ERR(token); 1443 1444 /* if current token doesn't grant map creation permissions, 1445 * then we can't use this token, so ignore it and rely on 1446 * system-wide capabilities checks 1447 */ 1448 if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || 1449 !bpf_token_allow_map_type(token, attr->map_type)) { 1450 bpf_token_put(token); 1451 token = NULL; 1452 } 1453 } 1454 1455 err = -EPERM; 1456 1457 /* Intent here is for unprivileged_bpf_disabled to block BPF map 1458 * creation for unprivileged users; other actions depend 1459 * on fd availability and access to bpffs, so are dependent on 1460 * object creation success. Even with unprivileged BPF disabled, 1461 * capability checks are still carried out. 1462 */ 1463 if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) 1464 goto put_token; 1465 1466 /* check privileged map type permissions */ 1467 switch (map_type) { 1468 case BPF_MAP_TYPE_ARRAY: 1469 case BPF_MAP_TYPE_PERCPU_ARRAY: 1470 case BPF_MAP_TYPE_PROG_ARRAY: 1471 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1472 case BPF_MAP_TYPE_CGROUP_ARRAY: 1473 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1474 case BPF_MAP_TYPE_HASH: 1475 case BPF_MAP_TYPE_PERCPU_HASH: 1476 case BPF_MAP_TYPE_HASH_OF_MAPS: 1477 case BPF_MAP_TYPE_RINGBUF: 1478 case BPF_MAP_TYPE_USER_RINGBUF: 1479 case BPF_MAP_TYPE_CGROUP_STORAGE: 1480 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1481 /* unprivileged */ 1482 break; 1483 case BPF_MAP_TYPE_SK_STORAGE: 1484 case BPF_MAP_TYPE_INODE_STORAGE: 1485 case BPF_MAP_TYPE_TASK_STORAGE: 1486 case BPF_MAP_TYPE_CGRP_STORAGE: 1487 case BPF_MAP_TYPE_BLOOM_FILTER: 1488 case BPF_MAP_TYPE_LPM_TRIE: 1489 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1490 case BPF_MAP_TYPE_STACK_TRACE: 1491 case BPF_MAP_TYPE_QUEUE: 1492 case BPF_MAP_TYPE_STACK: 1493 case BPF_MAP_TYPE_LRU_HASH: 1494 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1495 case BPF_MAP_TYPE_STRUCT_OPS: 1496 case BPF_MAP_TYPE_CPUMAP: 1497 case BPF_MAP_TYPE_ARENA: 1498 case BPF_MAP_TYPE_INSN_ARRAY: 1499 if (!bpf_token_capable(token, CAP_BPF)) 1500 goto put_token; 1501 break; 1502 case BPF_MAP_TYPE_SOCKMAP: 1503 case BPF_MAP_TYPE_SOCKHASH: 1504 case BPF_MAP_TYPE_DEVMAP: 1505 case BPF_MAP_TYPE_DEVMAP_HASH: 1506 case BPF_MAP_TYPE_XSKMAP: 1507 if (!bpf_token_capable(token, CAP_NET_ADMIN)) 1508 goto put_token; 1509 break; 1510 default: 1511 WARN(1, "unsupported map type %d", map_type); 1512 goto put_token; 1513 } 1514 1515 map = ops->map_alloc(attr); 1516 if (IS_ERR(map)) { 1517 err = PTR_ERR(map); 1518 goto put_token; 1519 } 1520 map->ops = ops; 1521 map->map_type = map_type; 1522 1523 err = bpf_obj_name_cpy(map->name, attr->map_name, 1524 sizeof(attr->map_name)); 1525 if (err < 0) 1526 goto free_map; 1527 1528 preempt_disable(); 1529 map->cookie = gen_cookie_next(&bpf_map_cookie); 1530 preempt_enable(); 1531 1532 atomic64_set(&map->refcnt, 1); 1533 atomic64_set(&map->usercnt, 1); 1534 mutex_init(&map->freeze_mutex); 1535 spin_lock_init(&map->owner_lock); 1536 1537 if (attr->btf_key_type_id || attr->btf_value_type_id || 1538 /* Even the map's value is a kernel's struct, 1539 * the bpf_prog.o must have BTF to begin with 1540 * to figure out the corresponding kernel's 1541 * counter part. Thus, attr->btf_fd has 1542 * to be valid also. 1543 */ 1544 attr->btf_vmlinux_value_type_id) { 1545 struct btf *btf; 1546 1547 btf = btf_get_by_fd(attr->btf_fd); 1548 if (IS_ERR(btf)) { 1549 err = PTR_ERR(btf); 1550 goto free_map; 1551 } 1552 if (btf_is_kernel(btf)) { 1553 btf_put(btf); 1554 err = -EACCES; 1555 goto free_map; 1556 } 1557 map->btf = btf; 1558 1559 if (attr->btf_value_type_id) { 1560 err = map_check_btf(map, token, btf, attr->btf_key_type_id, 1561 attr->btf_value_type_id); 1562 if (err) 1563 goto free_map; 1564 } 1565 1566 map->btf_key_type_id = attr->btf_key_type_id; 1567 map->btf_value_type_id = attr->btf_value_type_id; 1568 map->btf_vmlinux_value_type_id = 1569 attr->btf_vmlinux_value_type_id; 1570 } 1571 1572 if (attr->excl_prog_hash) { 1573 bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); 1574 1575 if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { 1576 err = -EINVAL; 1577 goto free_map; 1578 } 1579 1580 map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 1581 if (!map->excl_prog_sha) { 1582 err = -ENOMEM; 1583 goto free_map; 1584 } 1585 1586 if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { 1587 err = -EFAULT; 1588 goto free_map; 1589 } 1590 } else if (attr->excl_prog_hash_size) { 1591 err = -EINVAL; 1592 goto free_map; 1593 } 1594 1595 err = security_bpf_map_create(map, attr, token, uattr.is_kernel); 1596 if (err) 1597 goto free_map_sec; 1598 1599 err = bpf_map_alloc_id(map); 1600 if (err) 1601 goto free_map_sec; 1602 1603 bpf_map_save_memcg(map); 1604 bpf_token_put(token); 1605 1606 err = bpf_map_new_fd(map, f_flags); 1607 if (err < 0) { 1608 /* failed to allocate fd. 1609 * bpf_map_put_with_uref() is needed because the above 1610 * bpf_map_alloc_id() has published the map 1611 * to the userspace and the userspace may 1612 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1613 */ 1614 bpf_map_put_with_uref(map); 1615 return err; 1616 } 1617 1618 return err; 1619 1620 free_map_sec: 1621 security_bpf_map_free(map); 1622 free_map: 1623 bpf_map_free(map); 1624 put_token: 1625 bpf_token_put(token); 1626 return err; 1627 } 1628 1629 void bpf_map_inc(struct bpf_map *map) 1630 { 1631 atomic64_inc(&map->refcnt); 1632 } 1633 EXPORT_SYMBOL_GPL(bpf_map_inc); 1634 1635 void bpf_map_inc_with_uref(struct bpf_map *map) 1636 { 1637 atomic64_inc(&map->refcnt); 1638 atomic64_inc(&map->usercnt); 1639 } 1640 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1641 1642 struct bpf_map *bpf_map_get(u32 ufd) 1643 { 1644 CLASS(fd, f)(ufd); 1645 struct bpf_map *map = __bpf_map_get(f); 1646 1647 if (!IS_ERR(map)) 1648 bpf_map_inc(map); 1649 1650 return map; 1651 } 1652 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); 1653 1654 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1655 { 1656 CLASS(fd, f)(ufd); 1657 struct bpf_map *map = __bpf_map_get(f); 1658 1659 if (!IS_ERR(map)) 1660 bpf_map_inc_with_uref(map); 1661 1662 return map; 1663 } 1664 1665 /* map_idr_lock should have been held or the map should have been 1666 * protected by rcu read lock. 1667 */ 1668 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1669 { 1670 int refold; 1671 1672 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1673 if (!refold) 1674 return ERR_PTR(-ENOENT); 1675 if (uref) 1676 atomic64_inc(&map->usercnt); 1677 1678 return map; 1679 } 1680 1681 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1682 { 1683 lockdep_assert(rcu_read_lock_held()); 1684 return __bpf_map_inc_not_zero(map, false); 1685 } 1686 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1687 1688 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 1689 bool delete) 1690 { 1691 return -ENOTSUPP; 1692 } 1693 1694 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1695 { 1696 if (key_size) 1697 return vmemdup_user(ukey, key_size); 1698 1699 if (ukey) 1700 return ERR_PTR(-EINVAL); 1701 1702 return NULL; 1703 } 1704 1705 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1706 { 1707 if (key_size) 1708 return kvmemdup_bpfptr(ukey, key_size); 1709 1710 if (!bpfptr_is_null(ukey)) 1711 return ERR_PTR(-EINVAL); 1712 1713 return NULL; 1714 } 1715 1716 /* last field in 'union bpf_attr' used by this command */ 1717 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1718 1719 static int map_lookup_elem(union bpf_attr *attr) 1720 { 1721 void __user *ukey = u64_to_user_ptr(attr->key); 1722 void __user *uvalue = u64_to_user_ptr(attr->value); 1723 struct bpf_map *map; 1724 void *key, *value; 1725 u32 value_size; 1726 int err; 1727 1728 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1729 return -EINVAL; 1730 1731 CLASS(fd, f)(attr->map_fd); 1732 map = __bpf_map_get(f); 1733 if (IS_ERR(map)) 1734 return PTR_ERR(map); 1735 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1736 return -EPERM; 1737 1738 err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK); 1739 if (err) 1740 return err; 1741 1742 key = __bpf_copy_key(ukey, map->key_size); 1743 if (IS_ERR(key)) 1744 return PTR_ERR(key); 1745 1746 value_size = bpf_map_value_size(map); 1747 1748 err = -ENOMEM; 1749 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1750 if (!value) 1751 goto free_key; 1752 1753 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1754 if (copy_from_user(value, uvalue, value_size)) 1755 err = -EFAULT; 1756 else 1757 err = bpf_map_copy_value(map, key, value, attr->flags); 1758 goto free_value; 1759 } 1760 1761 err = bpf_map_copy_value(map, key, value, attr->flags); 1762 if (err) 1763 goto free_value; 1764 1765 err = -EFAULT; 1766 if (copy_to_user(uvalue, value, value_size) != 0) 1767 goto free_value; 1768 1769 err = 0; 1770 1771 free_value: 1772 kvfree(value); 1773 free_key: 1774 kvfree(key); 1775 return err; 1776 } 1777 1778 1779 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1780 1781 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1782 { 1783 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1784 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1785 struct bpf_map *map; 1786 void *key, *value; 1787 u32 value_size; 1788 int err; 1789 1790 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1791 return -EINVAL; 1792 1793 CLASS(fd, f)(attr->map_fd); 1794 map = __bpf_map_get(f); 1795 if (IS_ERR(map)) 1796 return PTR_ERR(map); 1797 bpf_map_write_active_inc(map); 1798 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1799 err = -EPERM; 1800 goto err_put; 1801 } 1802 1803 err = bpf_map_check_op_flags(map, attr->flags, ~0); 1804 if (err) 1805 goto err_put; 1806 1807 key = ___bpf_copy_key(ukey, map->key_size); 1808 if (IS_ERR(key)) { 1809 err = PTR_ERR(key); 1810 goto err_put; 1811 } 1812 1813 value_size = bpf_map_value_size(map); 1814 value = kvmemdup_bpfptr(uvalue, value_size); 1815 if (IS_ERR(value)) { 1816 err = PTR_ERR(value); 1817 goto free_key; 1818 } 1819 1820 err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags); 1821 if (!err) 1822 maybe_wait_bpf_programs(map); 1823 1824 kvfree(value); 1825 free_key: 1826 kvfree(key); 1827 err_put: 1828 bpf_map_write_active_dec(map); 1829 return err; 1830 } 1831 1832 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1833 1834 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1835 { 1836 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1837 struct bpf_map *map; 1838 void *key; 1839 int err; 1840 1841 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1842 return -EINVAL; 1843 1844 CLASS(fd, f)(attr->map_fd); 1845 map = __bpf_map_get(f); 1846 if (IS_ERR(map)) 1847 return PTR_ERR(map); 1848 bpf_map_write_active_inc(map); 1849 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1850 err = -EPERM; 1851 goto err_put; 1852 } 1853 1854 key = ___bpf_copy_key(ukey, map->key_size); 1855 if (IS_ERR(key)) { 1856 err = PTR_ERR(key); 1857 goto err_put; 1858 } 1859 1860 if (bpf_map_is_offloaded(map)) { 1861 err = bpf_map_offload_delete_elem(map, key); 1862 goto out; 1863 } else if (IS_FD_PROG_ARRAY(map) || 1864 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1865 /* These maps require sleepable context */ 1866 err = map->ops->map_delete_elem(map, key); 1867 goto out; 1868 } 1869 1870 bpf_disable_instrumentation(); 1871 rcu_read_lock(); 1872 err = map->ops->map_delete_elem(map, key); 1873 rcu_read_unlock(); 1874 bpf_enable_instrumentation(); 1875 if (!err) 1876 maybe_wait_bpf_programs(map); 1877 out: 1878 kvfree(key); 1879 err_put: 1880 bpf_map_write_active_dec(map); 1881 return err; 1882 } 1883 1884 /* last field in 'union bpf_attr' used by this command */ 1885 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1886 1887 static int map_get_next_key(union bpf_attr *attr) 1888 { 1889 void __user *ukey = u64_to_user_ptr(attr->key); 1890 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1891 struct bpf_map *map; 1892 void *key, *next_key; 1893 int err; 1894 1895 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1896 return -EINVAL; 1897 1898 CLASS(fd, f)(attr->map_fd); 1899 map = __bpf_map_get(f); 1900 if (IS_ERR(map)) 1901 return PTR_ERR(map); 1902 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1903 return -EPERM; 1904 1905 if (ukey) { 1906 key = __bpf_copy_key(ukey, map->key_size); 1907 if (IS_ERR(key)) 1908 return PTR_ERR(key); 1909 } else { 1910 key = NULL; 1911 } 1912 1913 err = -ENOMEM; 1914 next_key = kvmalloc(map->key_size, GFP_USER); 1915 if (!next_key) 1916 goto free_key; 1917 1918 if (bpf_map_is_offloaded(map)) { 1919 err = bpf_map_offload_get_next_key(map, key, next_key); 1920 goto out; 1921 } 1922 1923 rcu_read_lock(); 1924 err = map->ops->map_get_next_key(map, key, next_key); 1925 rcu_read_unlock(); 1926 out: 1927 if (err) 1928 goto free_next_key; 1929 1930 err = -EFAULT; 1931 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1932 goto free_next_key; 1933 1934 err = 0; 1935 1936 free_next_key: 1937 kvfree(next_key); 1938 free_key: 1939 kvfree(key); 1940 return err; 1941 } 1942 1943 int generic_map_delete_batch(struct bpf_map *map, 1944 const union bpf_attr *attr, 1945 union bpf_attr __user *uattr) 1946 { 1947 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1948 u32 cp, max_count; 1949 int err = 0; 1950 void *key; 1951 1952 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1953 return -EINVAL; 1954 1955 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1956 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1957 return -EINVAL; 1958 } 1959 1960 max_count = attr->batch.count; 1961 if (!max_count) 1962 return 0; 1963 1964 if (put_user(0, &uattr->batch.count)) 1965 return -EFAULT; 1966 1967 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1968 if (!key) 1969 return -ENOMEM; 1970 1971 for (cp = 0; cp < max_count; cp++) { 1972 err = -EFAULT; 1973 if (copy_from_user(key, keys + cp * map->key_size, 1974 map->key_size)) 1975 break; 1976 1977 if (bpf_map_is_offloaded(map)) { 1978 err = bpf_map_offload_delete_elem(map, key); 1979 break; 1980 } 1981 1982 bpf_disable_instrumentation(); 1983 rcu_read_lock(); 1984 err = map->ops->map_delete_elem(map, key); 1985 rcu_read_unlock(); 1986 bpf_enable_instrumentation(); 1987 if (err) 1988 break; 1989 cond_resched(); 1990 } 1991 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1992 err = -EFAULT; 1993 1994 kvfree(key); 1995 1996 return err; 1997 } 1998 1999 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 2000 const union bpf_attr *attr, 2001 union bpf_attr __user *uattr) 2002 { 2003 void __user *values = u64_to_user_ptr(attr->batch.values); 2004 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2005 u32 value_size, cp, max_count; 2006 void *key, *value; 2007 int err = 0; 2008 2009 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); 2010 if (err) 2011 return err; 2012 2013 value_size = bpf_map_value_size(map); 2014 2015 max_count = attr->batch.count; 2016 if (!max_count) 2017 return 0; 2018 2019 if (put_user(0, &uattr->batch.count)) 2020 return -EFAULT; 2021 2022 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2023 if (!key) 2024 return -ENOMEM; 2025 2026 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2027 if (!value) { 2028 kvfree(key); 2029 return -ENOMEM; 2030 } 2031 2032 for (cp = 0; cp < max_count; cp++) { 2033 err = -EFAULT; 2034 if (copy_from_user(key, keys + cp * map->key_size, 2035 map->key_size) || 2036 copy_from_user(value, values + cp * value_size, value_size)) 2037 break; 2038 2039 err = bpf_map_update_value(map, map_file, key, value, 2040 attr->batch.elem_flags); 2041 2042 if (err) 2043 break; 2044 cond_resched(); 2045 } 2046 2047 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 2048 err = -EFAULT; 2049 2050 kvfree(value); 2051 kvfree(key); 2052 2053 return err; 2054 } 2055 2056 int generic_map_lookup_batch(struct bpf_map *map, 2057 const union bpf_attr *attr, 2058 union bpf_attr __user *uattr) 2059 { 2060 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 2061 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 2062 void __user *values = u64_to_user_ptr(attr->batch.values); 2063 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2064 void *buf, *buf_prevkey, *prev_key, *key, *value; 2065 u32 value_size, cp, max_count; 2066 int err; 2067 2068 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); 2069 if (err) 2070 return err; 2071 2072 value_size = bpf_map_value_size(map); 2073 2074 max_count = attr->batch.count; 2075 if (!max_count) 2076 return 0; 2077 2078 if (put_user(0, &uattr->batch.count)) 2079 return -EFAULT; 2080 2081 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2082 if (!buf_prevkey) 2083 return -ENOMEM; 2084 2085 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 2086 if (!buf) { 2087 kvfree(buf_prevkey); 2088 return -ENOMEM; 2089 } 2090 2091 err = -EFAULT; 2092 prev_key = NULL; 2093 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 2094 goto free_buf; 2095 key = buf; 2096 value = key + map->key_size; 2097 if (ubatch) 2098 prev_key = buf_prevkey; 2099 2100 for (cp = 0; cp < max_count;) { 2101 rcu_read_lock(); 2102 err = map->ops->map_get_next_key(map, prev_key, key); 2103 rcu_read_unlock(); 2104 if (err) 2105 break; 2106 err = bpf_map_copy_value(map, key, value, 2107 attr->batch.elem_flags); 2108 2109 if (err == -ENOENT) 2110 goto next_key; 2111 2112 if (err) 2113 goto free_buf; 2114 2115 if (copy_to_user(keys + cp * map->key_size, key, 2116 map->key_size)) { 2117 err = -EFAULT; 2118 goto free_buf; 2119 } 2120 if (copy_to_user(values + cp * value_size, value, value_size)) { 2121 err = -EFAULT; 2122 goto free_buf; 2123 } 2124 2125 cp++; 2126 next_key: 2127 if (!prev_key) 2128 prev_key = buf_prevkey; 2129 2130 swap(prev_key, key); 2131 cond_resched(); 2132 } 2133 2134 if (err == -EFAULT) 2135 goto free_buf; 2136 2137 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 2138 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 2139 err = -EFAULT; 2140 2141 free_buf: 2142 kvfree(buf_prevkey); 2143 kvfree(buf); 2144 return err; 2145 } 2146 2147 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 2148 2149 static int map_lookup_and_delete_elem(union bpf_attr *attr) 2150 { 2151 void __user *ukey = u64_to_user_ptr(attr->key); 2152 void __user *uvalue = u64_to_user_ptr(attr->value); 2153 struct bpf_map *map; 2154 void *key, *value; 2155 u32 value_size; 2156 int err; 2157 2158 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 2159 return -EINVAL; 2160 2161 if (attr->flags & ~BPF_F_LOCK) 2162 return -EINVAL; 2163 2164 CLASS(fd, f)(attr->map_fd); 2165 map = __bpf_map_get(f); 2166 if (IS_ERR(map)) 2167 return PTR_ERR(map); 2168 bpf_map_write_active_inc(map); 2169 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 2170 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 2171 err = -EPERM; 2172 goto err_put; 2173 } 2174 2175 if (attr->flags && 2176 (map->map_type == BPF_MAP_TYPE_QUEUE || 2177 map->map_type == BPF_MAP_TYPE_STACK)) { 2178 err = -EINVAL; 2179 goto err_put; 2180 } 2181 2182 if ((attr->flags & BPF_F_LOCK) && 2183 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2184 err = -EINVAL; 2185 goto err_put; 2186 } 2187 2188 key = __bpf_copy_key(ukey, map->key_size); 2189 if (IS_ERR(key)) { 2190 err = PTR_ERR(key); 2191 goto err_put; 2192 } 2193 2194 value_size = bpf_map_value_size(map); 2195 2196 err = -ENOMEM; 2197 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2198 if (!value) 2199 goto free_key; 2200 2201 err = -ENOTSUPP; 2202 if (map->map_type == BPF_MAP_TYPE_QUEUE || 2203 map->map_type == BPF_MAP_TYPE_STACK) { 2204 err = map->ops->map_pop_elem(map, value); 2205 } else if (map->map_type == BPF_MAP_TYPE_HASH || 2206 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2207 map->map_type == BPF_MAP_TYPE_LRU_HASH || 2208 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 2209 map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 2210 if (!bpf_map_is_offloaded(map)) { 2211 bpf_disable_instrumentation(); 2212 rcu_read_lock(); 2213 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 2214 rcu_read_unlock(); 2215 bpf_enable_instrumentation(); 2216 } 2217 } 2218 2219 if (err) 2220 goto free_value; 2221 2222 if (copy_to_user(uvalue, value, value_size) != 0) { 2223 err = -EFAULT; 2224 goto free_value; 2225 } 2226 2227 err = 0; 2228 2229 free_value: 2230 kvfree(value); 2231 free_key: 2232 kvfree(key); 2233 err_put: 2234 bpf_map_write_active_dec(map); 2235 return err; 2236 } 2237 2238 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 2239 2240 static int map_freeze(const union bpf_attr *attr) 2241 { 2242 int err = 0; 2243 struct bpf_map *map; 2244 2245 if (CHECK_ATTR(BPF_MAP_FREEZE)) 2246 return -EINVAL; 2247 2248 CLASS(fd, f)(attr->map_fd); 2249 map = __bpf_map_get(f); 2250 if (IS_ERR(map)) 2251 return PTR_ERR(map); 2252 2253 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) 2254 return -ENOTSUPP; 2255 2256 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) 2257 return -EPERM; 2258 2259 mutex_lock(&map->freeze_mutex); 2260 if (bpf_map_write_active(map)) { 2261 err = -EBUSY; 2262 goto err_put; 2263 } 2264 if (READ_ONCE(map->frozen)) { 2265 err = -EBUSY; 2266 goto err_put; 2267 } 2268 2269 WRITE_ONCE(map->frozen, true); 2270 err_put: 2271 mutex_unlock(&map->freeze_mutex); 2272 return err; 2273 } 2274 2275 static const struct bpf_prog_ops * const bpf_prog_types[] = { 2276 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 2277 [_id] = & _name ## _prog_ops, 2278 #define BPF_MAP_TYPE(_id, _ops) 2279 #define BPF_LINK_TYPE(_id, _name) 2280 #include <linux/bpf_types.h> 2281 #undef BPF_PROG_TYPE 2282 #undef BPF_MAP_TYPE 2283 #undef BPF_LINK_TYPE 2284 }; 2285 2286 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 2287 { 2288 const struct bpf_prog_ops *ops; 2289 2290 if (type >= ARRAY_SIZE(bpf_prog_types)) 2291 return -EINVAL; 2292 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 2293 ops = bpf_prog_types[type]; 2294 if (!ops) 2295 return -EINVAL; 2296 2297 if (!bpf_prog_is_offloaded(prog->aux)) 2298 prog->aux->ops = ops; 2299 else 2300 prog->aux->ops = &bpf_offload_prog_ops; 2301 prog->type = type; 2302 return 0; 2303 } 2304 2305 enum bpf_audit { 2306 BPF_AUDIT_LOAD, 2307 BPF_AUDIT_UNLOAD, 2308 BPF_AUDIT_MAX, 2309 }; 2310 2311 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2312 [BPF_AUDIT_LOAD] = "LOAD", 2313 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2314 }; 2315 2316 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2317 { 2318 struct audit_context *ctx = NULL; 2319 struct audit_buffer *ab; 2320 2321 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2322 return; 2323 if (audit_enabled == AUDIT_OFF) 2324 return; 2325 if (!in_hardirq() && !irqs_disabled()) 2326 ctx = audit_context(); 2327 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2328 if (unlikely(!ab)) 2329 return; 2330 audit_log_format(ab, "prog-id=%u op=%s", 2331 prog->aux->id, bpf_audit_str[op]); 2332 audit_log_end(ab); 2333 } 2334 2335 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2336 { 2337 int id; 2338 2339 idr_preload(GFP_KERNEL); 2340 spin_lock_bh(&prog_idr_lock); 2341 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2342 if (id > 0) 2343 prog->aux->id = id; 2344 spin_unlock_bh(&prog_idr_lock); 2345 idr_preload_end(); 2346 2347 /* id is in [1, INT_MAX) */ 2348 if (WARN_ON_ONCE(!id)) 2349 return -ENOSPC; 2350 2351 return id > 0 ? 0 : id; 2352 } 2353 2354 void bpf_prog_free_id(struct bpf_prog *prog) 2355 { 2356 unsigned long flags; 2357 2358 /* cBPF to eBPF migrations are currently not in the idr store. 2359 * Offloaded programs are removed from the store when their device 2360 * disappears - even if someone grabs an fd to them they are unusable, 2361 * simply waiting for refcnt to drop to be freed. 2362 */ 2363 if (!prog->aux->id) 2364 return; 2365 2366 spin_lock_irqsave(&prog_idr_lock, flags); 2367 idr_remove(&prog_idr, prog->aux->id); 2368 prog->aux->id = 0; 2369 spin_unlock_irqrestore(&prog_idr_lock, flags); 2370 } 2371 2372 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2373 { 2374 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2375 2376 kvfree(aux->func_info); 2377 kfree(aux->func_info_aux); 2378 free_uid(aux->user); 2379 security_bpf_prog_free(aux->prog); 2380 bpf_prog_free(aux->prog); 2381 } 2382 2383 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2384 { 2385 bpf_prog_kallsyms_del_all(prog); 2386 btf_put(prog->aux->btf); 2387 module_put(prog->aux->mod); 2388 kvfree(prog->aux->jited_linfo); 2389 kvfree(prog->aux->linfo); 2390 kfree(prog->aux->kfunc_tab); 2391 kfree(prog->aux->ctx_arg_info); 2392 if (prog->aux->attach_btf) 2393 btf_put(prog->aux->attach_btf); 2394 2395 if (deferred) { 2396 if (prog->sleepable) 2397 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2398 else 2399 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2400 } else { 2401 __bpf_prog_put_rcu(&prog->aux->rcu); 2402 } 2403 } 2404 2405 static void bpf_prog_put_deferred(struct work_struct *work) 2406 { 2407 struct bpf_prog_aux *aux; 2408 struct bpf_prog *prog; 2409 2410 aux = container_of(work, struct bpf_prog_aux, work); 2411 prog = aux->prog; 2412 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2413 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2414 bpf_prog_free_id(prog); 2415 __bpf_prog_put_noref(prog, true); 2416 } 2417 2418 static void __bpf_prog_put(struct bpf_prog *prog) 2419 { 2420 struct bpf_prog_aux *aux = prog->aux; 2421 2422 if (atomic64_dec_and_test(&aux->refcnt)) { 2423 if (in_hardirq() || irqs_disabled()) { 2424 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2425 schedule_work(&aux->work); 2426 } else { 2427 bpf_prog_put_deferred(&aux->work); 2428 } 2429 } 2430 } 2431 2432 void bpf_prog_put(struct bpf_prog *prog) 2433 { 2434 __bpf_prog_put(prog); 2435 } 2436 EXPORT_SYMBOL_GPL(bpf_prog_put); 2437 2438 static int bpf_prog_release(struct inode *inode, struct file *filp) 2439 { 2440 struct bpf_prog *prog = filp->private_data; 2441 2442 bpf_prog_put(prog); 2443 return 0; 2444 } 2445 2446 struct bpf_prog_kstats { 2447 u64 nsecs; 2448 u64 cnt; 2449 u64 misses; 2450 }; 2451 2452 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2453 { 2454 struct bpf_prog_stats *stats; 2455 unsigned int flags; 2456 2457 if (unlikely(!prog->stats)) 2458 return; 2459 2460 stats = this_cpu_ptr(prog->stats); 2461 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2462 u64_stats_inc(&stats->misses); 2463 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2464 } 2465 2466 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2467 struct bpf_prog_kstats *stats) 2468 { 2469 u64 nsecs = 0, cnt = 0, misses = 0; 2470 int cpu; 2471 2472 for_each_possible_cpu(cpu) { 2473 const struct bpf_prog_stats *st; 2474 unsigned int start; 2475 u64 tnsecs, tcnt, tmisses; 2476 2477 st = per_cpu_ptr(prog->stats, cpu); 2478 do { 2479 start = u64_stats_fetch_begin(&st->syncp); 2480 tnsecs = u64_stats_read(&st->nsecs); 2481 tcnt = u64_stats_read(&st->cnt); 2482 tmisses = u64_stats_read(&st->misses); 2483 } while (u64_stats_fetch_retry(&st->syncp, start)); 2484 nsecs += tnsecs; 2485 cnt += tcnt; 2486 misses += tmisses; 2487 } 2488 stats->nsecs = nsecs; 2489 stats->cnt = cnt; 2490 stats->misses = misses; 2491 } 2492 2493 #ifdef CONFIG_PROC_FS 2494 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2495 { 2496 const struct bpf_prog *prog = filp->private_data; 2497 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2498 struct bpf_prog_kstats stats; 2499 2500 bpf_prog_get_stats(prog, &stats); 2501 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2502 seq_printf(m, 2503 "prog_type:\t%u\n" 2504 "prog_jited:\t%u\n" 2505 "prog_tag:\t%s\n" 2506 "memlock:\t%llu\n" 2507 "prog_id:\t%u\n" 2508 "run_time_ns:\t%llu\n" 2509 "run_cnt:\t%llu\n" 2510 "recursion_misses:\t%llu\n" 2511 "verified_insns:\t%u\n", 2512 prog->type, 2513 prog->jited, 2514 prog_tag, 2515 prog->pages * 1ULL << PAGE_SHIFT, 2516 prog->aux->id, 2517 stats.nsecs, 2518 stats.cnt, 2519 stats.misses, 2520 prog->aux->verified_insns); 2521 } 2522 #endif 2523 2524 const struct file_operations bpf_prog_fops = { 2525 #ifdef CONFIG_PROC_FS 2526 .show_fdinfo = bpf_prog_show_fdinfo, 2527 #endif 2528 .release = bpf_prog_release, 2529 .read = bpf_dummy_read, 2530 .write = bpf_dummy_write, 2531 }; 2532 2533 int bpf_prog_new_fd(struct bpf_prog *prog) 2534 { 2535 int ret; 2536 2537 ret = security_bpf_prog(prog); 2538 if (ret < 0) 2539 return ret; 2540 2541 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2542 O_RDWR | O_CLOEXEC); 2543 } 2544 2545 void bpf_prog_add(struct bpf_prog *prog, int i) 2546 { 2547 atomic64_add(i, &prog->aux->refcnt); 2548 } 2549 EXPORT_SYMBOL_GPL(bpf_prog_add); 2550 2551 void bpf_prog_sub(struct bpf_prog *prog, int i) 2552 { 2553 /* Only to be used for undoing previous bpf_prog_add() in some 2554 * error path. We still know that another entity in our call 2555 * path holds a reference to the program, thus atomic_sub() can 2556 * be safely used in such cases! 2557 */ 2558 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2559 } 2560 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2561 2562 void bpf_prog_inc(struct bpf_prog *prog) 2563 { 2564 atomic64_inc(&prog->aux->refcnt); 2565 } 2566 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2567 2568 /* prog_idr_lock should have been held */ 2569 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2570 { 2571 int refold; 2572 2573 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2574 2575 if (!refold) 2576 return ERR_PTR(-ENOENT); 2577 2578 return prog; 2579 } 2580 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2581 2582 bool bpf_prog_get_ok(struct bpf_prog *prog, 2583 enum bpf_prog_type *attach_type, bool attach_drv) 2584 { 2585 /* not an attachment, just a refcount inc, always allow */ 2586 if (!attach_type) 2587 return true; 2588 2589 if (prog->type != *attach_type) 2590 return false; 2591 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2592 return false; 2593 2594 return true; 2595 } 2596 2597 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2598 bool attach_drv) 2599 { 2600 CLASS(fd, f)(ufd); 2601 struct bpf_prog *prog; 2602 2603 if (fd_empty(f)) 2604 return ERR_PTR(-EBADF); 2605 if (fd_file(f)->f_op != &bpf_prog_fops) 2606 return ERR_PTR(-EINVAL); 2607 2608 prog = fd_file(f)->private_data; 2609 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) 2610 return ERR_PTR(-EINVAL); 2611 2612 bpf_prog_inc(prog); 2613 return prog; 2614 } 2615 2616 struct bpf_prog *bpf_prog_get(u32 ufd) 2617 { 2618 return __bpf_prog_get(ufd, NULL, false); 2619 } 2620 2621 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2622 bool attach_drv) 2623 { 2624 return __bpf_prog_get(ufd, &type, attach_drv); 2625 } 2626 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2627 2628 /* Initially all BPF programs could be loaded w/o specifying 2629 * expected_attach_type. Later for some of them specifying expected_attach_type 2630 * at load time became required so that program could be validated properly. 2631 * Programs of types that are allowed to be loaded both w/ and w/o (for 2632 * backward compatibility) expected_attach_type, should have the default attach 2633 * type assigned to expected_attach_type for the latter case, so that it can be 2634 * validated later at attach time. 2635 * 2636 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2637 * prog type requires it but has some attach types that have to be backward 2638 * compatible. 2639 */ 2640 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2641 { 2642 switch (attr->prog_type) { 2643 case BPF_PROG_TYPE_CGROUP_SOCK: 2644 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2645 * exist so checking for non-zero is the way to go here. 2646 */ 2647 if (!attr->expected_attach_type) 2648 attr->expected_attach_type = 2649 BPF_CGROUP_INET_SOCK_CREATE; 2650 break; 2651 case BPF_PROG_TYPE_SK_REUSEPORT: 2652 if (!attr->expected_attach_type) 2653 attr->expected_attach_type = 2654 BPF_SK_REUSEPORT_SELECT; 2655 break; 2656 } 2657 } 2658 2659 static int 2660 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2661 enum bpf_attach_type expected_attach_type, 2662 struct btf *attach_btf, u32 btf_id, 2663 struct bpf_prog *dst_prog) 2664 { 2665 if (btf_id) { 2666 if (btf_id > BTF_MAX_TYPE) 2667 return -EINVAL; 2668 2669 if (!attach_btf && !dst_prog) 2670 return -EINVAL; 2671 2672 switch (prog_type) { 2673 case BPF_PROG_TYPE_TRACING: 2674 case BPF_PROG_TYPE_LSM: 2675 case BPF_PROG_TYPE_STRUCT_OPS: 2676 case BPF_PROG_TYPE_EXT: 2677 break; 2678 default: 2679 return -EINVAL; 2680 } 2681 } 2682 2683 if (attach_btf && (!btf_id || dst_prog)) 2684 return -EINVAL; 2685 2686 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2687 prog_type != BPF_PROG_TYPE_EXT) 2688 return -EINVAL; 2689 2690 switch (prog_type) { 2691 case BPF_PROG_TYPE_CGROUP_SOCK: 2692 switch (expected_attach_type) { 2693 case BPF_CGROUP_INET_SOCK_CREATE: 2694 case BPF_CGROUP_INET_SOCK_RELEASE: 2695 case BPF_CGROUP_INET4_POST_BIND: 2696 case BPF_CGROUP_INET6_POST_BIND: 2697 return 0; 2698 default: 2699 return -EINVAL; 2700 } 2701 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2702 switch (expected_attach_type) { 2703 case BPF_CGROUP_INET4_BIND: 2704 case BPF_CGROUP_INET6_BIND: 2705 case BPF_CGROUP_INET4_CONNECT: 2706 case BPF_CGROUP_INET6_CONNECT: 2707 case BPF_CGROUP_UNIX_CONNECT: 2708 case BPF_CGROUP_INET4_GETPEERNAME: 2709 case BPF_CGROUP_INET6_GETPEERNAME: 2710 case BPF_CGROUP_UNIX_GETPEERNAME: 2711 case BPF_CGROUP_INET4_GETSOCKNAME: 2712 case BPF_CGROUP_INET6_GETSOCKNAME: 2713 case BPF_CGROUP_UNIX_GETSOCKNAME: 2714 case BPF_CGROUP_UDP4_SENDMSG: 2715 case BPF_CGROUP_UDP6_SENDMSG: 2716 case BPF_CGROUP_UNIX_SENDMSG: 2717 case BPF_CGROUP_UDP4_RECVMSG: 2718 case BPF_CGROUP_UDP6_RECVMSG: 2719 case BPF_CGROUP_UNIX_RECVMSG: 2720 return 0; 2721 default: 2722 return -EINVAL; 2723 } 2724 case BPF_PROG_TYPE_CGROUP_SKB: 2725 switch (expected_attach_type) { 2726 case BPF_CGROUP_INET_INGRESS: 2727 case BPF_CGROUP_INET_EGRESS: 2728 return 0; 2729 default: 2730 return -EINVAL; 2731 } 2732 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2733 switch (expected_attach_type) { 2734 case BPF_CGROUP_SETSOCKOPT: 2735 case BPF_CGROUP_GETSOCKOPT: 2736 return 0; 2737 default: 2738 return -EINVAL; 2739 } 2740 case BPF_PROG_TYPE_SK_LOOKUP: 2741 if (expected_attach_type == BPF_SK_LOOKUP) 2742 return 0; 2743 return -EINVAL; 2744 case BPF_PROG_TYPE_SK_REUSEPORT: 2745 switch (expected_attach_type) { 2746 case BPF_SK_REUSEPORT_SELECT: 2747 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2748 return 0; 2749 default: 2750 return -EINVAL; 2751 } 2752 case BPF_PROG_TYPE_NETFILTER: 2753 if (expected_attach_type == BPF_NETFILTER) 2754 return 0; 2755 return -EINVAL; 2756 case BPF_PROG_TYPE_SYSCALL: 2757 case BPF_PROG_TYPE_EXT: 2758 if (expected_attach_type) 2759 return -EINVAL; 2760 fallthrough; 2761 default: 2762 return 0; 2763 } 2764 } 2765 2766 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2767 { 2768 switch (prog_type) { 2769 case BPF_PROG_TYPE_SCHED_CLS: 2770 case BPF_PROG_TYPE_SCHED_ACT: 2771 case BPF_PROG_TYPE_XDP: 2772 case BPF_PROG_TYPE_LWT_IN: 2773 case BPF_PROG_TYPE_LWT_OUT: 2774 case BPF_PROG_TYPE_LWT_XMIT: 2775 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2776 case BPF_PROG_TYPE_SK_SKB: 2777 case BPF_PROG_TYPE_SK_MSG: 2778 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2779 case BPF_PROG_TYPE_CGROUP_DEVICE: 2780 case BPF_PROG_TYPE_CGROUP_SOCK: 2781 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2782 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2783 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2784 case BPF_PROG_TYPE_SOCK_OPS: 2785 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2786 case BPF_PROG_TYPE_NETFILTER: 2787 return true; 2788 case BPF_PROG_TYPE_CGROUP_SKB: 2789 /* always unpriv */ 2790 case BPF_PROG_TYPE_SK_REUSEPORT: 2791 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2792 default: 2793 return false; 2794 } 2795 } 2796 2797 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2798 { 2799 switch (prog_type) { 2800 case BPF_PROG_TYPE_KPROBE: 2801 case BPF_PROG_TYPE_TRACEPOINT: 2802 case BPF_PROG_TYPE_PERF_EVENT: 2803 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2804 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2805 case BPF_PROG_TYPE_TRACING: 2806 case BPF_PROG_TYPE_LSM: 2807 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2808 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2809 return true; 2810 default: 2811 return false; 2812 } 2813 } 2814 2815 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, 2816 bool is_kernel) 2817 { 2818 bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); 2819 struct bpf_dynptr_kern sig_ptr, insns_ptr; 2820 struct bpf_key *key = NULL; 2821 void *sig; 2822 int err = 0; 2823 2824 if (system_keyring_id_check(attr->keyring_id) == 0) 2825 key = bpf_lookup_system_key(attr->keyring_id); 2826 else 2827 key = bpf_lookup_user_key(attr->keyring_id, 0); 2828 2829 if (!key) 2830 return -EINVAL; 2831 2832 sig = kvmemdup_bpfptr(usig, attr->signature_size); 2833 if (IS_ERR(sig)) { 2834 bpf_key_put(key); 2835 return -ENOMEM; 2836 } 2837 2838 bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, 2839 attr->signature_size); 2840 bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, 2841 prog->len * sizeof(struct bpf_insn)); 2842 2843 err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, 2844 (struct bpf_dynptr *)&sig_ptr, key); 2845 2846 bpf_key_put(key); 2847 kvfree(sig); 2848 return err; 2849 } 2850 2851 static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) 2852 { 2853 int err; 2854 int i; 2855 2856 for (i = 0; i < prog->aux->used_map_cnt; i++) { 2857 if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY) 2858 continue; 2859 2860 err = bpf_insn_array_ready(prog->aux->used_maps[i]); 2861 if (err) 2862 return err; 2863 } 2864 2865 return 0; 2866 } 2867 2868 /* last field in 'union bpf_attr' used by this command */ 2869 #define BPF_PROG_LOAD_LAST_FIELD keyring_id 2870 2871 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2872 { 2873 enum bpf_prog_type type = attr->prog_type; 2874 struct bpf_prog *prog, *dst_prog = NULL; 2875 struct btf *attach_btf = NULL; 2876 struct bpf_token *token = NULL; 2877 bool bpf_cap; 2878 int err; 2879 char license[128]; 2880 2881 if (CHECK_ATTR(BPF_PROG_LOAD)) 2882 return -EINVAL; 2883 2884 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2885 BPF_F_ANY_ALIGNMENT | 2886 BPF_F_TEST_STATE_FREQ | 2887 BPF_F_SLEEPABLE | 2888 BPF_F_TEST_RND_HI32 | 2889 BPF_F_XDP_HAS_FRAGS | 2890 BPF_F_XDP_DEV_BOUND_ONLY | 2891 BPF_F_TEST_REG_INVARIANTS | 2892 BPF_F_TOKEN_FD)) 2893 return -EINVAL; 2894 2895 bpf_prog_load_fixup_attach_type(attr); 2896 2897 if (attr->prog_flags & BPF_F_TOKEN_FD) { 2898 token = bpf_token_get_from_fd(attr->prog_token_fd); 2899 if (IS_ERR(token)) 2900 return PTR_ERR(token); 2901 /* if current token doesn't grant prog loading permissions, 2902 * then we can't use this token, so ignore it and rely on 2903 * system-wide capabilities checks 2904 */ 2905 if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || 2906 !bpf_token_allow_prog_type(token, attr->prog_type, 2907 attr->expected_attach_type)) { 2908 bpf_token_put(token); 2909 token = NULL; 2910 } 2911 } 2912 2913 bpf_cap = bpf_token_capable(token, CAP_BPF); 2914 err = -EPERM; 2915 2916 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2917 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2918 !bpf_cap) 2919 goto put_token; 2920 2921 /* Intent here is for unprivileged_bpf_disabled to block BPF program 2922 * creation for unprivileged users; other actions depend 2923 * on fd availability and access to bpffs, so are dependent on 2924 * object creation success. Even with unprivileged BPF disabled, 2925 * capability checks are still carried out for these 2926 * and other operations. 2927 */ 2928 if (sysctl_unprivileged_bpf_disabled && !bpf_cap) 2929 goto put_token; 2930 2931 if (attr->insn_cnt == 0 || 2932 attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2933 err = -E2BIG; 2934 goto put_token; 2935 } 2936 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2937 type != BPF_PROG_TYPE_CGROUP_SKB && 2938 !bpf_cap) 2939 goto put_token; 2940 2941 if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) 2942 goto put_token; 2943 if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) 2944 goto put_token; 2945 2946 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2947 * or btf, we need to check which one it is 2948 */ 2949 if (attr->attach_prog_fd) { 2950 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2951 if (IS_ERR(dst_prog)) { 2952 dst_prog = NULL; 2953 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2954 if (IS_ERR(attach_btf)) { 2955 err = -EINVAL; 2956 goto put_token; 2957 } 2958 if (!btf_is_kernel(attach_btf)) { 2959 /* attaching through specifying bpf_prog's BTF 2960 * objects directly might be supported eventually 2961 */ 2962 btf_put(attach_btf); 2963 err = -ENOTSUPP; 2964 goto put_token; 2965 } 2966 } 2967 } else if (attr->attach_btf_id) { 2968 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2969 attach_btf = bpf_get_btf_vmlinux(); 2970 if (IS_ERR(attach_btf)) { 2971 err = PTR_ERR(attach_btf); 2972 goto put_token; 2973 } 2974 if (!attach_btf) { 2975 err = -EINVAL; 2976 goto put_token; 2977 } 2978 btf_get(attach_btf); 2979 } 2980 2981 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2982 attach_btf, attr->attach_btf_id, 2983 dst_prog)) { 2984 if (dst_prog) 2985 bpf_prog_put(dst_prog); 2986 if (attach_btf) 2987 btf_put(attach_btf); 2988 err = -EINVAL; 2989 goto put_token; 2990 } 2991 2992 /* plain bpf_prog allocation */ 2993 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2994 if (!prog) { 2995 if (dst_prog) 2996 bpf_prog_put(dst_prog); 2997 if (attach_btf) 2998 btf_put(attach_btf); 2999 err = -EINVAL; 3000 goto put_token; 3001 } 3002 3003 prog->expected_attach_type = attr->expected_attach_type; 3004 prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); 3005 prog->aux->attach_btf = attach_btf; 3006 prog->aux->attach_btf_id = attr->attach_btf_id; 3007 prog->aux->dst_prog = dst_prog; 3008 prog->aux->dev_bound = !!attr->prog_ifindex; 3009 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 3010 3011 /* move token into prog->aux, reuse taken refcnt */ 3012 prog->aux->token = token; 3013 token = NULL; 3014 3015 prog->aux->user = get_current_user(); 3016 prog->len = attr->insn_cnt; 3017 3018 err = -EFAULT; 3019 if (copy_from_bpfptr(prog->insns, 3020 make_bpfptr(attr->insns, uattr.is_kernel), 3021 bpf_prog_insn_size(prog)) != 0) 3022 goto free_prog; 3023 /* copy eBPF program license from user space */ 3024 if (strncpy_from_bpfptr(license, 3025 make_bpfptr(attr->license, uattr.is_kernel), 3026 sizeof(license) - 1) < 0) 3027 goto free_prog; 3028 license[sizeof(license) - 1] = 0; 3029 3030 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 3031 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 3032 3033 if (attr->signature) { 3034 err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); 3035 if (err) 3036 goto free_prog; 3037 } 3038 3039 prog->orig_prog = NULL; 3040 prog->jited = 0; 3041 3042 atomic64_set(&prog->aux->refcnt, 1); 3043 3044 if (bpf_prog_is_dev_bound(prog->aux)) { 3045 err = bpf_prog_dev_bound_init(prog, attr); 3046 if (err) 3047 goto free_prog; 3048 } 3049 3050 if (type == BPF_PROG_TYPE_EXT && dst_prog && 3051 bpf_prog_is_dev_bound(dst_prog->aux)) { 3052 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 3053 if (err) 3054 goto free_prog; 3055 } 3056 3057 /* 3058 * Bookkeeping for managing the program attachment chain. 3059 * 3060 * It might be tempting to set attach_tracing_prog flag at the attachment 3061 * time, but this will not prevent from loading bunch of tracing prog 3062 * first, then attach them one to another. 3063 * 3064 * The flag attach_tracing_prog is set for the whole program lifecycle, and 3065 * doesn't have to be cleared in bpf_tracing_link_release, since tracing 3066 * programs cannot change attachment target. 3067 */ 3068 if (type == BPF_PROG_TYPE_TRACING && dst_prog && 3069 dst_prog->type == BPF_PROG_TYPE_TRACING) { 3070 prog->aux->attach_tracing_prog = true; 3071 } 3072 3073 /* find program type: socket_filter vs tracing_filter */ 3074 err = find_prog_type(type, prog); 3075 if (err < 0) 3076 goto free_prog; 3077 3078 prog->aux->load_time = ktime_get_boottime_ns(); 3079 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 3080 sizeof(attr->prog_name)); 3081 if (err < 0) 3082 goto free_prog; 3083 3084 err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 3085 if (err) 3086 goto free_prog_sec; 3087 3088 /* run eBPF verifier */ 3089 err = bpf_check(&prog, attr, uattr, uattr_size); 3090 if (err < 0) 3091 goto free_used_maps; 3092 3093 prog = bpf_prog_select_runtime(prog, &err); 3094 if (err < 0) 3095 goto free_used_maps; 3096 3097 err = bpf_prog_mark_insn_arrays_ready(prog); 3098 if (err < 0) 3099 goto free_used_maps; 3100 3101 err = bpf_prog_alloc_id(prog); 3102 if (err) 3103 goto free_used_maps; 3104 3105 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 3106 * effectively publicly exposed. However, retrieving via 3107 * bpf_prog_get_fd_by_id() will take another reference, 3108 * therefore it cannot be gone underneath us. 3109 * 3110 * Only for the time /after/ successful bpf_prog_new_fd() 3111 * and before returning to userspace, we might just hold 3112 * one reference and any parallel close on that fd could 3113 * rip everything out. Hence, below notifications must 3114 * happen before bpf_prog_new_fd(). 3115 * 3116 * Also, any failure handling from this point onwards must 3117 * be using bpf_prog_put() given the program is exposed. 3118 */ 3119 bpf_prog_kallsyms_add(prog); 3120 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 3121 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 3122 3123 err = bpf_prog_new_fd(prog); 3124 if (err < 0) 3125 bpf_prog_put(prog); 3126 return err; 3127 3128 free_used_maps: 3129 /* In case we have subprogs, we need to wait for a grace 3130 * period before we can tear down JIT memory since symbols 3131 * are already exposed under kallsyms. 3132 */ 3133 __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); 3134 return err; 3135 3136 free_prog_sec: 3137 security_bpf_prog_free(prog); 3138 free_prog: 3139 free_uid(prog->aux->user); 3140 if (prog->aux->attach_btf) 3141 btf_put(prog->aux->attach_btf); 3142 bpf_prog_free(prog); 3143 put_token: 3144 bpf_token_put(token); 3145 return err; 3146 } 3147 3148 #define BPF_OBJ_LAST_FIELD path_fd 3149 3150 static int bpf_obj_pin(const union bpf_attr *attr) 3151 { 3152 int path_fd; 3153 3154 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 3155 return -EINVAL; 3156 3157 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3158 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3159 return -EINVAL; 3160 3161 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3162 return bpf_obj_pin_user(attr->bpf_fd, path_fd, 3163 u64_to_user_ptr(attr->pathname)); 3164 } 3165 3166 static int bpf_obj_get(const union bpf_attr *attr) 3167 { 3168 int path_fd; 3169 3170 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 3171 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 3172 return -EINVAL; 3173 3174 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3175 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3176 return -EINVAL; 3177 3178 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3179 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 3180 attr->file_flags); 3181 } 3182 3183 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has 3184 * "sleepable" semantics, which normally would mean that BPF link's attach 3185 * hook can dereference link or link's underlying program for some time after 3186 * detachment due to RCU Tasks Trace-based lifetime protection scheme. 3187 * BPF program itself can be non-sleepable, yet, because it's transitively 3188 * reachable through BPF link, its freeing has to be delayed until after RCU 3189 * Tasks Trace GP. 3190 */ 3191 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, 3192 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3193 enum bpf_attach_type attach_type, bool sleepable) 3194 { 3195 WARN_ON(ops->dealloc && ops->dealloc_deferred); 3196 atomic64_set(&link->refcnt, 1); 3197 link->type = type; 3198 link->sleepable = sleepable; 3199 link->id = 0; 3200 link->ops = ops; 3201 link->prog = prog; 3202 link->attach_type = attach_type; 3203 } 3204 3205 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 3206 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3207 enum bpf_attach_type attach_type) 3208 { 3209 bpf_link_init_sleepable(link, type, ops, prog, attach_type, false); 3210 } 3211 3212 static void bpf_link_free_id(int id) 3213 { 3214 if (!id) 3215 return; 3216 3217 spin_lock_bh(&link_idr_lock); 3218 idr_remove(&link_idr, id); 3219 spin_unlock_bh(&link_idr_lock); 3220 } 3221 3222 /* Clean up bpf_link and corresponding anon_inode file and FD. After 3223 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 3224 * anon_inode's release() call. This helper marks bpf_link as 3225 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 3226 * is not decremented, it's the responsibility of a calling code that failed 3227 * to complete bpf_link initialization. 3228 * This helper eventually calls link's dealloc callback, but does not call 3229 * link's release callback. 3230 */ 3231 void bpf_link_cleanup(struct bpf_link_primer *primer) 3232 { 3233 primer->link->prog = NULL; 3234 bpf_link_free_id(primer->id); 3235 fput(primer->file); 3236 put_unused_fd(primer->fd); 3237 } 3238 3239 void bpf_link_inc(struct bpf_link *link) 3240 { 3241 atomic64_inc(&link->refcnt); 3242 } 3243 3244 static void bpf_link_dealloc(struct bpf_link *link) 3245 { 3246 /* now that we know that bpf_link itself can't be reached, put underlying BPF program */ 3247 if (link->prog) 3248 bpf_prog_put(link->prog); 3249 3250 /* free bpf_link and its containing memory */ 3251 if (link->ops->dealloc_deferred) 3252 link->ops->dealloc_deferred(link); 3253 else 3254 link->ops->dealloc(link); 3255 } 3256 3257 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) 3258 { 3259 struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); 3260 3261 bpf_link_dealloc(link); 3262 } 3263 3264 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3265 { 3266 if (rcu_trace_implies_rcu_gp()) 3267 bpf_link_defer_dealloc_rcu_gp(rcu); 3268 else 3269 call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3270 } 3271 3272 /* bpf_link_free is guaranteed to be called from process context */ 3273 static void bpf_link_free(struct bpf_link *link) 3274 { 3275 const struct bpf_link_ops *ops = link->ops; 3276 3277 bpf_link_free_id(link->id); 3278 /* detach BPF program, clean up used resources */ 3279 if (link->prog) 3280 ops->release(link); 3281 if (ops->dealloc_deferred) { 3282 /* Schedule BPF link deallocation, which will only then 3283 * trigger putting BPF program refcount. 3284 * If underlying BPF program is sleepable or BPF link's target 3285 * attach hookpoint is sleepable or otherwise requires RCU GPs 3286 * to ensure link and its underlying BPF program is not 3287 * reachable anymore, we need to first wait for RCU tasks 3288 * trace sync, and then go through "classic" RCU grace period 3289 */ 3290 if (link->sleepable || (link->prog && link->prog->sleepable)) 3291 call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3292 else 3293 call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3294 } else if (ops->dealloc) { 3295 bpf_link_dealloc(link); 3296 } 3297 } 3298 3299 static void bpf_link_put_deferred(struct work_struct *work) 3300 { 3301 struct bpf_link *link = container_of(work, struct bpf_link, work); 3302 3303 bpf_link_free(link); 3304 } 3305 3306 /* bpf_link_put might be called from atomic context. It needs to be called 3307 * from sleepable context in order to acquire sleeping locks during the process. 3308 */ 3309 void bpf_link_put(struct bpf_link *link) 3310 { 3311 if (!atomic64_dec_and_test(&link->refcnt)) 3312 return; 3313 3314 INIT_WORK(&link->work, bpf_link_put_deferred); 3315 schedule_work(&link->work); 3316 } 3317 EXPORT_SYMBOL(bpf_link_put); 3318 3319 static void bpf_link_put_direct(struct bpf_link *link) 3320 { 3321 if (!atomic64_dec_and_test(&link->refcnt)) 3322 return; 3323 bpf_link_free(link); 3324 } 3325 3326 static int bpf_link_release(struct inode *inode, struct file *filp) 3327 { 3328 struct bpf_link *link = filp->private_data; 3329 3330 bpf_link_put_direct(link); 3331 return 0; 3332 } 3333 3334 #ifdef CONFIG_PROC_FS 3335 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 3336 #define BPF_MAP_TYPE(_id, _ops) 3337 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 3338 static const char *bpf_link_type_strs[] = { 3339 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 3340 #include <linux/bpf_types.h> 3341 }; 3342 #undef BPF_PROG_TYPE 3343 #undef BPF_MAP_TYPE 3344 #undef BPF_LINK_TYPE 3345 3346 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 3347 { 3348 const struct bpf_link *link = filp->private_data; 3349 const struct bpf_prog *prog = link->prog; 3350 enum bpf_link_type type = link->type; 3351 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 3352 3353 if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { 3354 if (link->type == BPF_LINK_TYPE_KPROBE_MULTI) 3355 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? 3356 "kretprobe_multi" : "kprobe_multi"); 3357 else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) 3358 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? 3359 "uretprobe_multi" : "uprobe_multi"); 3360 else 3361 seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); 3362 } else { 3363 WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); 3364 seq_printf(m, "link_type:\t<%u>\n", type); 3365 } 3366 seq_printf(m, "link_id:\t%u\n", link->id); 3367 3368 if (prog) { 3369 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 3370 seq_printf(m, 3371 "prog_tag:\t%s\n" 3372 "prog_id:\t%u\n", 3373 prog_tag, 3374 prog->aux->id); 3375 } 3376 if (link->ops->show_fdinfo) 3377 link->ops->show_fdinfo(link, m); 3378 } 3379 #endif 3380 3381 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) 3382 { 3383 struct bpf_link *link = file->private_data; 3384 3385 return link->ops->poll(file, pts); 3386 } 3387 3388 static const struct file_operations bpf_link_fops = { 3389 #ifdef CONFIG_PROC_FS 3390 .show_fdinfo = bpf_link_show_fdinfo, 3391 #endif 3392 .release = bpf_link_release, 3393 .read = bpf_dummy_read, 3394 .write = bpf_dummy_write, 3395 }; 3396 3397 static const struct file_operations bpf_link_fops_poll = { 3398 #ifdef CONFIG_PROC_FS 3399 .show_fdinfo = bpf_link_show_fdinfo, 3400 #endif 3401 .release = bpf_link_release, 3402 .read = bpf_dummy_read, 3403 .write = bpf_dummy_write, 3404 .poll = bpf_link_poll, 3405 }; 3406 3407 static int bpf_link_alloc_id(struct bpf_link *link) 3408 { 3409 int id; 3410 3411 idr_preload(GFP_KERNEL); 3412 spin_lock_bh(&link_idr_lock); 3413 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 3414 spin_unlock_bh(&link_idr_lock); 3415 idr_preload_end(); 3416 3417 return id; 3418 } 3419 3420 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 3421 * reserving unused FD and allocating ID from link_idr. This is to be paired 3422 * with bpf_link_settle() to install FD and ID and expose bpf_link to 3423 * user-space, if bpf_link is successfully attached. If not, bpf_link and 3424 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 3425 * transient state is passed around in struct bpf_link_primer. 3426 * This is preferred way to create and initialize bpf_link, especially when 3427 * there are complicated and expensive operations in between creating bpf_link 3428 * itself and attaching it to BPF hook. By using bpf_link_prime() and 3429 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 3430 * expensive (and potentially failing) roll back operations in a rare case 3431 * that file, FD, or ID can't be allocated. 3432 */ 3433 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 3434 { 3435 struct file *file; 3436 int fd, id; 3437 3438 fd = get_unused_fd_flags(O_CLOEXEC); 3439 if (fd < 0) 3440 return fd; 3441 3442 3443 id = bpf_link_alloc_id(link); 3444 if (id < 0) { 3445 put_unused_fd(fd); 3446 return id; 3447 } 3448 3449 file = anon_inode_getfile("bpf_link", 3450 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3451 link, O_CLOEXEC); 3452 if (IS_ERR(file)) { 3453 bpf_link_free_id(id); 3454 put_unused_fd(fd); 3455 return PTR_ERR(file); 3456 } 3457 3458 primer->link = link; 3459 primer->file = file; 3460 primer->fd = fd; 3461 primer->id = id; 3462 return 0; 3463 } 3464 3465 int bpf_link_settle(struct bpf_link_primer *primer) 3466 { 3467 /* make bpf_link fetchable by ID */ 3468 spin_lock_bh(&link_idr_lock); 3469 primer->link->id = primer->id; 3470 spin_unlock_bh(&link_idr_lock); 3471 /* make bpf_link fetchable by FD */ 3472 fd_install(primer->fd, primer->file); 3473 /* pass through installed FD */ 3474 return primer->fd; 3475 } 3476 3477 int bpf_link_new_fd(struct bpf_link *link) 3478 { 3479 return anon_inode_getfd("bpf-link", 3480 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3481 link, O_CLOEXEC); 3482 } 3483 3484 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 3485 { 3486 CLASS(fd, f)(ufd); 3487 struct bpf_link *link; 3488 3489 if (fd_empty(f)) 3490 return ERR_PTR(-EBADF); 3491 if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) 3492 return ERR_PTR(-EINVAL); 3493 3494 link = fd_file(f)->private_data; 3495 bpf_link_inc(link); 3496 return link; 3497 } 3498 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); 3499 3500 static void bpf_tracing_link_release(struct bpf_link *link) 3501 { 3502 struct bpf_tracing_link *tr_link = 3503 container_of(link, struct bpf_tracing_link, link.link); 3504 3505 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3506 tr_link->trampoline, 3507 tr_link->tgt_prog)); 3508 3509 bpf_trampoline_put(tr_link->trampoline); 3510 3511 /* tgt_prog is NULL if target is a kernel function */ 3512 if (tr_link->tgt_prog) 3513 bpf_prog_put(tr_link->tgt_prog); 3514 } 3515 3516 static void bpf_tracing_link_dealloc(struct bpf_link *link) 3517 { 3518 struct bpf_tracing_link *tr_link = 3519 container_of(link, struct bpf_tracing_link, link.link); 3520 3521 kfree(tr_link); 3522 } 3523 3524 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 3525 struct seq_file *seq) 3526 { 3527 struct bpf_tracing_link *tr_link = 3528 container_of(link, struct bpf_tracing_link, link.link); 3529 u32 target_btf_id, target_obj_id; 3530 3531 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3532 &target_obj_id, &target_btf_id); 3533 seq_printf(seq, 3534 "attach_type:\t%d\n" 3535 "target_obj_id:\t%u\n" 3536 "target_btf_id:\t%u\n" 3537 "cookie:\t%llu\n", 3538 link->attach_type, 3539 target_obj_id, 3540 target_btf_id, 3541 tr_link->link.cookie); 3542 } 3543 3544 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 3545 struct bpf_link_info *info) 3546 { 3547 struct bpf_tracing_link *tr_link = 3548 container_of(link, struct bpf_tracing_link, link.link); 3549 3550 info->tracing.attach_type = link->attach_type; 3551 info->tracing.cookie = tr_link->link.cookie; 3552 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3553 &info->tracing.target_obj_id, 3554 &info->tracing.target_btf_id); 3555 3556 return 0; 3557 } 3558 3559 static const struct bpf_link_ops bpf_tracing_link_lops = { 3560 .release = bpf_tracing_link_release, 3561 .dealloc = bpf_tracing_link_dealloc, 3562 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3563 .fill_link_info = bpf_tracing_link_fill_link_info, 3564 }; 3565 3566 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3567 int tgt_prog_fd, 3568 u32 btf_id, 3569 u64 bpf_cookie, 3570 enum bpf_attach_type attach_type) 3571 { 3572 struct bpf_link_primer link_primer; 3573 struct bpf_prog *tgt_prog = NULL; 3574 struct bpf_trampoline *tr = NULL; 3575 struct bpf_tracing_link *link; 3576 u64 key = 0; 3577 int err; 3578 3579 switch (prog->type) { 3580 case BPF_PROG_TYPE_TRACING: 3581 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3582 prog->expected_attach_type != BPF_TRACE_FEXIT && 3583 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3584 err = -EINVAL; 3585 goto out_put_prog; 3586 } 3587 break; 3588 case BPF_PROG_TYPE_EXT: 3589 if (prog->expected_attach_type != 0) { 3590 err = -EINVAL; 3591 goto out_put_prog; 3592 } 3593 break; 3594 case BPF_PROG_TYPE_LSM: 3595 if (prog->expected_attach_type != BPF_LSM_MAC) { 3596 err = -EINVAL; 3597 goto out_put_prog; 3598 } 3599 break; 3600 default: 3601 err = -EINVAL; 3602 goto out_put_prog; 3603 } 3604 3605 if (!!tgt_prog_fd != !!btf_id) { 3606 err = -EINVAL; 3607 goto out_put_prog; 3608 } 3609 3610 if (tgt_prog_fd) { 3611 /* 3612 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this 3613 * part would be changed to implement the same for 3614 * BPF_PROG_TYPE_TRACING, do not forget to update the way how 3615 * attach_tracing_prog flag is set. 3616 */ 3617 if (prog->type != BPF_PROG_TYPE_EXT) { 3618 err = -EINVAL; 3619 goto out_put_prog; 3620 } 3621 3622 tgt_prog = bpf_prog_get(tgt_prog_fd); 3623 if (IS_ERR(tgt_prog)) { 3624 err = PTR_ERR(tgt_prog); 3625 tgt_prog = NULL; 3626 goto out_put_prog; 3627 } 3628 3629 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3630 } 3631 3632 link = kzalloc(sizeof(*link), GFP_USER); 3633 if (!link) { 3634 err = -ENOMEM; 3635 goto out_put_prog; 3636 } 3637 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3638 &bpf_tracing_link_lops, prog, attach_type); 3639 3640 link->link.cookie = bpf_cookie; 3641 3642 mutex_lock(&prog->aux->dst_mutex); 3643 3644 /* There are a few possible cases here: 3645 * 3646 * - if prog->aux->dst_trampoline is set, the program was just loaded 3647 * and not yet attached to anything, so we can use the values stored 3648 * in prog->aux 3649 * 3650 * - if prog->aux->dst_trampoline is NULL, the program has already been 3651 * attached to a target and its initial target was cleared (below) 3652 * 3653 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3654 * target_btf_id using the link_create API. 3655 * 3656 * - if tgt_prog == NULL when this function was called using the old 3657 * raw_tracepoint_open API, and we need a target from prog->aux 3658 * 3659 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3660 * was detached and is going for re-attachment. 3661 * 3662 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf 3663 * are NULL, then program was already attached and user did not provide 3664 * tgt_prog_fd so we have no way to find out or create trampoline 3665 */ 3666 if (!prog->aux->dst_trampoline && !tgt_prog) { 3667 /* 3668 * Allow re-attach for TRACING and LSM programs. If it's 3669 * currently linked, bpf_trampoline_link_prog will fail. 3670 * EXT programs need to specify tgt_prog_fd, so they 3671 * re-attach in separate code path. 3672 */ 3673 if (prog->type != BPF_PROG_TYPE_TRACING && 3674 prog->type != BPF_PROG_TYPE_LSM) { 3675 err = -EINVAL; 3676 goto out_unlock; 3677 } 3678 /* We can allow re-attach only if we have valid attach_btf. */ 3679 if (!prog->aux->attach_btf) { 3680 err = -EINVAL; 3681 goto out_unlock; 3682 } 3683 btf_id = prog->aux->attach_btf_id; 3684 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3685 } 3686 3687 if (!prog->aux->dst_trampoline || 3688 (key && key != prog->aux->dst_trampoline->key)) { 3689 /* If there is no saved target, or the specified target is 3690 * different from the destination specified at load time, we 3691 * need a new trampoline and a check for compatibility 3692 */ 3693 struct bpf_attach_target_info tgt_info = {}; 3694 3695 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3696 &tgt_info); 3697 if (err) 3698 goto out_unlock; 3699 3700 if (tgt_info.tgt_mod) { 3701 module_put(prog->aux->mod); 3702 prog->aux->mod = tgt_info.tgt_mod; 3703 } 3704 3705 tr = bpf_trampoline_get(key, &tgt_info); 3706 if (!tr) { 3707 err = -ENOMEM; 3708 goto out_unlock; 3709 } 3710 } else { 3711 /* The caller didn't specify a target, or the target was the 3712 * same as the destination supplied during program load. This 3713 * means we can reuse the trampoline and reference from program 3714 * load time, and there is no need to allocate a new one. This 3715 * can only happen once for any program, as the saved values in 3716 * prog->aux are cleared below. 3717 */ 3718 tr = prog->aux->dst_trampoline; 3719 tgt_prog = prog->aux->dst_prog; 3720 } 3721 3722 err = bpf_link_prime(&link->link.link, &link_primer); 3723 if (err) 3724 goto out_unlock; 3725 3726 err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3727 if (err) { 3728 bpf_link_cleanup(&link_primer); 3729 link = NULL; 3730 goto out_unlock; 3731 } 3732 3733 link->tgt_prog = tgt_prog; 3734 link->trampoline = tr; 3735 3736 /* Always clear the trampoline and target prog from prog->aux to make 3737 * sure the original attach destination is not kept alive after a 3738 * program is (re-)attached to another target. 3739 */ 3740 if (prog->aux->dst_prog && 3741 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3742 /* got extra prog ref from syscall, or attaching to different prog */ 3743 bpf_prog_put(prog->aux->dst_prog); 3744 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3745 /* we allocated a new trampoline, so free the old one */ 3746 bpf_trampoline_put(prog->aux->dst_trampoline); 3747 3748 prog->aux->dst_prog = NULL; 3749 prog->aux->dst_trampoline = NULL; 3750 mutex_unlock(&prog->aux->dst_mutex); 3751 3752 return bpf_link_settle(&link_primer); 3753 out_unlock: 3754 if (tr && tr != prog->aux->dst_trampoline) 3755 bpf_trampoline_put(tr); 3756 mutex_unlock(&prog->aux->dst_mutex); 3757 kfree(link); 3758 out_put_prog: 3759 if (tgt_prog_fd && tgt_prog) 3760 bpf_prog_put(tgt_prog); 3761 return err; 3762 } 3763 3764 static void bpf_raw_tp_link_release(struct bpf_link *link) 3765 { 3766 struct bpf_raw_tp_link *raw_tp = 3767 container_of(link, struct bpf_raw_tp_link, link); 3768 3769 bpf_probe_unregister(raw_tp->btp, raw_tp); 3770 bpf_put_raw_tracepoint(raw_tp->btp); 3771 } 3772 3773 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3774 { 3775 struct bpf_raw_tp_link *raw_tp = 3776 container_of(link, struct bpf_raw_tp_link, link); 3777 3778 kfree(raw_tp); 3779 } 3780 3781 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3782 struct seq_file *seq) 3783 { 3784 struct bpf_raw_tp_link *raw_tp_link = 3785 container_of(link, struct bpf_raw_tp_link, link); 3786 3787 seq_printf(seq, 3788 "tp_name:\t%s\n" 3789 "cookie:\t%llu\n", 3790 raw_tp_link->btp->tp->name, 3791 raw_tp_link->cookie); 3792 } 3793 3794 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, 3795 u32 len) 3796 { 3797 if (ulen >= len + 1) { 3798 if (copy_to_user(ubuf, buf, len + 1)) 3799 return -EFAULT; 3800 } else { 3801 char zero = '\0'; 3802 3803 if (copy_to_user(ubuf, buf, ulen - 1)) 3804 return -EFAULT; 3805 if (put_user(zero, ubuf + ulen - 1)) 3806 return -EFAULT; 3807 return -ENOSPC; 3808 } 3809 3810 return 0; 3811 } 3812 3813 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3814 struct bpf_link_info *info) 3815 { 3816 struct bpf_raw_tp_link *raw_tp_link = 3817 container_of(link, struct bpf_raw_tp_link, link); 3818 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3819 const char *tp_name = raw_tp_link->btp->tp->name; 3820 u32 ulen = info->raw_tracepoint.tp_name_len; 3821 size_t tp_len = strlen(tp_name); 3822 3823 if (!ulen ^ !ubuf) 3824 return -EINVAL; 3825 3826 info->raw_tracepoint.tp_name_len = tp_len + 1; 3827 info->raw_tracepoint.cookie = raw_tp_link->cookie; 3828 3829 if (!ubuf) 3830 return 0; 3831 3832 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len); 3833 } 3834 3835 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3836 .release = bpf_raw_tp_link_release, 3837 .dealloc_deferred = bpf_raw_tp_link_dealloc, 3838 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3839 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3840 }; 3841 3842 #ifdef CONFIG_PERF_EVENTS 3843 struct bpf_perf_link { 3844 struct bpf_link link; 3845 struct file *perf_file; 3846 }; 3847 3848 static void bpf_perf_link_release(struct bpf_link *link) 3849 { 3850 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3851 struct perf_event *event = perf_link->perf_file->private_data; 3852 3853 perf_event_free_bpf_prog(event); 3854 fput(perf_link->perf_file); 3855 } 3856 3857 static void bpf_perf_link_dealloc(struct bpf_link *link) 3858 { 3859 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3860 3861 kfree(perf_link); 3862 } 3863 3864 static int bpf_perf_link_fill_common(const struct perf_event *event, 3865 char __user *uname, u32 *ulenp, 3866 u64 *probe_offset, u64 *probe_addr, 3867 u32 *fd_type, unsigned long *missed) 3868 { 3869 const char *buf; 3870 u32 prog_id, ulen; 3871 size_t len; 3872 int err; 3873 3874 ulen = *ulenp; 3875 if (!ulen ^ !uname) 3876 return -EINVAL; 3877 3878 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, 3879 probe_offset, probe_addr, missed); 3880 if (err) 3881 return err; 3882 3883 if (buf) { 3884 len = strlen(buf); 3885 *ulenp = len + 1; 3886 } else { 3887 *ulenp = 1; 3888 } 3889 if (!uname) 3890 return 0; 3891 3892 if (buf) { 3893 err = bpf_copy_to_user(uname, buf, ulen, len); 3894 if (err) 3895 return err; 3896 } else { 3897 char zero = '\0'; 3898 3899 if (put_user(zero, uname)) 3900 return -EFAULT; 3901 } 3902 return 0; 3903 } 3904 3905 #ifdef CONFIG_KPROBE_EVENTS 3906 static int bpf_perf_link_fill_kprobe(const struct perf_event *event, 3907 struct bpf_link_info *info) 3908 { 3909 unsigned long missed; 3910 char __user *uname; 3911 u64 addr, offset; 3912 u32 ulen, type; 3913 int err; 3914 3915 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); 3916 ulen = info->perf_event.kprobe.name_len; 3917 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, 3918 &type, &missed); 3919 if (err) 3920 return err; 3921 if (type == BPF_FD_TYPE_KRETPROBE) 3922 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; 3923 else 3924 info->perf_event.type = BPF_PERF_EVENT_KPROBE; 3925 info->perf_event.kprobe.name_len = ulen; 3926 info->perf_event.kprobe.offset = offset; 3927 info->perf_event.kprobe.missed = missed; 3928 if (!kallsyms_show_value(current_cred())) 3929 addr = 0; 3930 info->perf_event.kprobe.addr = addr; 3931 info->perf_event.kprobe.cookie = event->bpf_cookie; 3932 return 0; 3933 } 3934 3935 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, 3936 struct seq_file *seq) 3937 { 3938 const char *name; 3939 int err; 3940 u32 prog_id, type; 3941 u64 offset, addr; 3942 unsigned long missed; 3943 3944 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3945 &offset, &addr, &missed); 3946 if (err) 3947 return; 3948 3949 seq_printf(seq, 3950 "name:\t%s\n" 3951 "offset:\t%#llx\n" 3952 "missed:\t%lu\n" 3953 "addr:\t%#llx\n" 3954 "event_type:\t%s\n" 3955 "cookie:\t%llu\n", 3956 name, offset, missed, addr, 3957 type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", 3958 event->bpf_cookie); 3959 } 3960 #endif 3961 3962 #ifdef CONFIG_UPROBE_EVENTS 3963 static int bpf_perf_link_fill_uprobe(const struct perf_event *event, 3964 struct bpf_link_info *info) 3965 { 3966 u64 ref_ctr_offset, offset; 3967 char __user *uname; 3968 u32 ulen, type; 3969 int err; 3970 3971 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); 3972 ulen = info->perf_event.uprobe.name_len; 3973 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, 3974 &type, NULL); 3975 if (err) 3976 return err; 3977 3978 if (type == BPF_FD_TYPE_URETPROBE) 3979 info->perf_event.type = BPF_PERF_EVENT_URETPROBE; 3980 else 3981 info->perf_event.type = BPF_PERF_EVENT_UPROBE; 3982 info->perf_event.uprobe.name_len = ulen; 3983 info->perf_event.uprobe.offset = offset; 3984 info->perf_event.uprobe.cookie = event->bpf_cookie; 3985 info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; 3986 return 0; 3987 } 3988 3989 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, 3990 struct seq_file *seq) 3991 { 3992 const char *name; 3993 int err; 3994 u32 prog_id, type; 3995 u64 offset, ref_ctr_offset; 3996 unsigned long missed; 3997 3998 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3999 &offset, &ref_ctr_offset, &missed); 4000 if (err) 4001 return; 4002 4003 seq_printf(seq, 4004 "name:\t%s\n" 4005 "offset:\t%#llx\n" 4006 "ref_ctr_offset:\t%#llx\n" 4007 "event_type:\t%s\n" 4008 "cookie:\t%llu\n", 4009 name, offset, ref_ctr_offset, 4010 type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", 4011 event->bpf_cookie); 4012 } 4013 #endif 4014 4015 static int bpf_perf_link_fill_probe(const struct perf_event *event, 4016 struct bpf_link_info *info) 4017 { 4018 #ifdef CONFIG_KPROBE_EVENTS 4019 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4020 return bpf_perf_link_fill_kprobe(event, info); 4021 #endif 4022 #ifdef CONFIG_UPROBE_EVENTS 4023 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4024 return bpf_perf_link_fill_uprobe(event, info); 4025 #endif 4026 return -EOPNOTSUPP; 4027 } 4028 4029 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, 4030 struct bpf_link_info *info) 4031 { 4032 char __user *uname; 4033 u32 ulen; 4034 int err; 4035 4036 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); 4037 ulen = info->perf_event.tracepoint.name_len; 4038 err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); 4039 if (err) 4040 return err; 4041 4042 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; 4043 info->perf_event.tracepoint.name_len = ulen; 4044 info->perf_event.tracepoint.cookie = event->bpf_cookie; 4045 return 0; 4046 } 4047 4048 static int bpf_perf_link_fill_perf_event(const struct perf_event *event, 4049 struct bpf_link_info *info) 4050 { 4051 info->perf_event.event.type = event->attr.type; 4052 info->perf_event.event.config = event->attr.config; 4053 info->perf_event.event.cookie = event->bpf_cookie; 4054 info->perf_event.type = BPF_PERF_EVENT_EVENT; 4055 return 0; 4056 } 4057 4058 static int bpf_perf_link_fill_link_info(const struct bpf_link *link, 4059 struct bpf_link_info *info) 4060 { 4061 struct bpf_perf_link *perf_link; 4062 const struct perf_event *event; 4063 4064 perf_link = container_of(link, struct bpf_perf_link, link); 4065 event = perf_get_event(perf_link->perf_file); 4066 if (IS_ERR(event)) 4067 return PTR_ERR(event); 4068 4069 switch (event->prog->type) { 4070 case BPF_PROG_TYPE_PERF_EVENT: 4071 return bpf_perf_link_fill_perf_event(event, info); 4072 case BPF_PROG_TYPE_TRACEPOINT: 4073 return bpf_perf_link_fill_tracepoint(event, info); 4074 case BPF_PROG_TYPE_KPROBE: 4075 return bpf_perf_link_fill_probe(event, info); 4076 default: 4077 return -EOPNOTSUPP; 4078 } 4079 } 4080 4081 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, 4082 struct seq_file *seq) 4083 { 4084 seq_printf(seq, 4085 "type:\t%u\n" 4086 "config:\t%llu\n" 4087 "event_type:\t%s\n" 4088 "cookie:\t%llu\n", 4089 event->attr.type, event->attr.config, 4090 "event", event->bpf_cookie); 4091 } 4092 4093 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, 4094 struct seq_file *seq) 4095 { 4096 int err; 4097 const char *name; 4098 u32 prog_id; 4099 4100 err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, 4101 NULL, NULL); 4102 if (err) 4103 return; 4104 4105 seq_printf(seq, 4106 "tp_name:\t%s\n" 4107 "event_type:\t%s\n" 4108 "cookie:\t%llu\n", 4109 name, "tracepoint", event->bpf_cookie); 4110 } 4111 4112 static void bpf_probe_link_show_fdinfo(const struct perf_event *event, 4113 struct seq_file *seq) 4114 { 4115 #ifdef CONFIG_KPROBE_EVENTS 4116 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4117 return bpf_perf_link_fdinfo_kprobe(event, seq); 4118 #endif 4119 4120 #ifdef CONFIG_UPROBE_EVENTS 4121 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4122 return bpf_perf_link_fdinfo_uprobe(event, seq); 4123 #endif 4124 } 4125 4126 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, 4127 struct seq_file *seq) 4128 { 4129 struct bpf_perf_link *perf_link; 4130 const struct perf_event *event; 4131 4132 perf_link = container_of(link, struct bpf_perf_link, link); 4133 event = perf_get_event(perf_link->perf_file); 4134 if (IS_ERR(event)) 4135 return; 4136 4137 switch (event->prog->type) { 4138 case BPF_PROG_TYPE_PERF_EVENT: 4139 return bpf_perf_event_link_show_fdinfo(event, seq); 4140 case BPF_PROG_TYPE_TRACEPOINT: 4141 return bpf_tracepoint_link_show_fdinfo(event, seq); 4142 case BPF_PROG_TYPE_KPROBE: 4143 return bpf_probe_link_show_fdinfo(event, seq); 4144 default: 4145 return; 4146 } 4147 } 4148 4149 static const struct bpf_link_ops bpf_perf_link_lops = { 4150 .release = bpf_perf_link_release, 4151 .dealloc = bpf_perf_link_dealloc, 4152 .fill_link_info = bpf_perf_link_fill_link_info, 4153 .show_fdinfo = bpf_perf_link_show_fdinfo, 4154 }; 4155 4156 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4157 { 4158 struct bpf_link_primer link_primer; 4159 struct bpf_perf_link *link; 4160 struct perf_event *event; 4161 struct file *perf_file; 4162 int err; 4163 4164 if (attr->link_create.flags) 4165 return -EINVAL; 4166 4167 perf_file = perf_event_get(attr->link_create.target_fd); 4168 if (IS_ERR(perf_file)) 4169 return PTR_ERR(perf_file); 4170 4171 link = kzalloc(sizeof(*link), GFP_USER); 4172 if (!link) { 4173 err = -ENOMEM; 4174 goto out_put_file; 4175 } 4176 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog, 4177 attr->link_create.attach_type); 4178 link->perf_file = perf_file; 4179 4180 err = bpf_link_prime(&link->link, &link_primer); 4181 if (err) { 4182 kfree(link); 4183 goto out_put_file; 4184 } 4185 4186 event = perf_file->private_data; 4187 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 4188 if (err) { 4189 bpf_link_cleanup(&link_primer); 4190 goto out_put_file; 4191 } 4192 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 4193 bpf_prog_inc(prog); 4194 4195 return bpf_link_settle(&link_primer); 4196 4197 out_put_file: 4198 fput(perf_file); 4199 return err; 4200 } 4201 #else 4202 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4203 { 4204 return -EOPNOTSUPP; 4205 } 4206 #endif /* CONFIG_PERF_EVENTS */ 4207 4208 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 4209 const char __user *user_tp_name, u64 cookie, 4210 enum bpf_attach_type attach_type) 4211 { 4212 struct bpf_link_primer link_primer; 4213 struct bpf_raw_tp_link *link; 4214 struct bpf_raw_event_map *btp; 4215 const char *tp_name; 4216 char buf[128]; 4217 int err; 4218 4219 switch (prog->type) { 4220 case BPF_PROG_TYPE_TRACING: 4221 case BPF_PROG_TYPE_EXT: 4222 case BPF_PROG_TYPE_LSM: 4223 if (user_tp_name) 4224 /* The attach point for this category of programs 4225 * should be specified via btf_id during program load. 4226 */ 4227 return -EINVAL; 4228 if (prog->type == BPF_PROG_TYPE_TRACING && 4229 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 4230 tp_name = prog->aux->attach_func_name; 4231 break; 4232 } 4233 return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type); 4234 case BPF_PROG_TYPE_RAW_TRACEPOINT: 4235 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 4236 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 4237 return -EFAULT; 4238 buf[sizeof(buf) - 1] = 0; 4239 tp_name = buf; 4240 break; 4241 default: 4242 return -EINVAL; 4243 } 4244 4245 btp = bpf_get_raw_tracepoint(tp_name); 4246 if (!btp) 4247 return -ENOENT; 4248 4249 link = kzalloc(sizeof(*link), GFP_USER); 4250 if (!link) { 4251 err = -ENOMEM; 4252 goto out_put_btp; 4253 } 4254 bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 4255 &bpf_raw_tp_link_lops, prog, attach_type, 4256 tracepoint_is_faultable(btp->tp)); 4257 link->btp = btp; 4258 link->cookie = cookie; 4259 4260 err = bpf_link_prime(&link->link, &link_primer); 4261 if (err) { 4262 kfree(link); 4263 goto out_put_btp; 4264 } 4265 4266 err = bpf_probe_register(link->btp, link); 4267 if (err) { 4268 bpf_link_cleanup(&link_primer); 4269 goto out_put_btp; 4270 } 4271 4272 return bpf_link_settle(&link_primer); 4273 4274 out_put_btp: 4275 bpf_put_raw_tracepoint(btp); 4276 return err; 4277 } 4278 4279 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie 4280 4281 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 4282 { 4283 struct bpf_prog *prog; 4284 void __user *tp_name; 4285 __u64 cookie; 4286 int fd; 4287 4288 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 4289 return -EINVAL; 4290 4291 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 4292 if (IS_ERR(prog)) 4293 return PTR_ERR(prog); 4294 4295 tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); 4296 cookie = attr->raw_tracepoint.cookie; 4297 fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type); 4298 if (fd < 0) 4299 bpf_prog_put(prog); 4300 return fd; 4301 } 4302 4303 static enum bpf_prog_type 4304 attach_type_to_prog_type(enum bpf_attach_type attach_type) 4305 { 4306 switch (attach_type) { 4307 case BPF_CGROUP_INET_INGRESS: 4308 case BPF_CGROUP_INET_EGRESS: 4309 return BPF_PROG_TYPE_CGROUP_SKB; 4310 case BPF_CGROUP_INET_SOCK_CREATE: 4311 case BPF_CGROUP_INET_SOCK_RELEASE: 4312 case BPF_CGROUP_INET4_POST_BIND: 4313 case BPF_CGROUP_INET6_POST_BIND: 4314 return BPF_PROG_TYPE_CGROUP_SOCK; 4315 case BPF_CGROUP_INET4_BIND: 4316 case BPF_CGROUP_INET6_BIND: 4317 case BPF_CGROUP_INET4_CONNECT: 4318 case BPF_CGROUP_INET6_CONNECT: 4319 case BPF_CGROUP_UNIX_CONNECT: 4320 case BPF_CGROUP_INET4_GETPEERNAME: 4321 case BPF_CGROUP_INET6_GETPEERNAME: 4322 case BPF_CGROUP_UNIX_GETPEERNAME: 4323 case BPF_CGROUP_INET4_GETSOCKNAME: 4324 case BPF_CGROUP_INET6_GETSOCKNAME: 4325 case BPF_CGROUP_UNIX_GETSOCKNAME: 4326 case BPF_CGROUP_UDP4_SENDMSG: 4327 case BPF_CGROUP_UDP6_SENDMSG: 4328 case BPF_CGROUP_UNIX_SENDMSG: 4329 case BPF_CGROUP_UDP4_RECVMSG: 4330 case BPF_CGROUP_UDP6_RECVMSG: 4331 case BPF_CGROUP_UNIX_RECVMSG: 4332 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 4333 case BPF_CGROUP_SOCK_OPS: 4334 return BPF_PROG_TYPE_SOCK_OPS; 4335 case BPF_CGROUP_DEVICE: 4336 return BPF_PROG_TYPE_CGROUP_DEVICE; 4337 case BPF_SK_MSG_VERDICT: 4338 return BPF_PROG_TYPE_SK_MSG; 4339 case BPF_SK_SKB_STREAM_PARSER: 4340 case BPF_SK_SKB_STREAM_VERDICT: 4341 case BPF_SK_SKB_VERDICT: 4342 return BPF_PROG_TYPE_SK_SKB; 4343 case BPF_LIRC_MODE2: 4344 return BPF_PROG_TYPE_LIRC_MODE2; 4345 case BPF_FLOW_DISSECTOR: 4346 return BPF_PROG_TYPE_FLOW_DISSECTOR; 4347 case BPF_CGROUP_SYSCTL: 4348 return BPF_PROG_TYPE_CGROUP_SYSCTL; 4349 case BPF_CGROUP_GETSOCKOPT: 4350 case BPF_CGROUP_SETSOCKOPT: 4351 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 4352 case BPF_TRACE_ITER: 4353 case BPF_TRACE_RAW_TP: 4354 case BPF_TRACE_FENTRY: 4355 case BPF_TRACE_FEXIT: 4356 case BPF_MODIFY_RETURN: 4357 return BPF_PROG_TYPE_TRACING; 4358 case BPF_LSM_MAC: 4359 return BPF_PROG_TYPE_LSM; 4360 case BPF_SK_LOOKUP: 4361 return BPF_PROG_TYPE_SK_LOOKUP; 4362 case BPF_XDP: 4363 return BPF_PROG_TYPE_XDP; 4364 case BPF_LSM_CGROUP: 4365 return BPF_PROG_TYPE_LSM; 4366 case BPF_TCX_INGRESS: 4367 case BPF_TCX_EGRESS: 4368 case BPF_NETKIT_PRIMARY: 4369 case BPF_NETKIT_PEER: 4370 return BPF_PROG_TYPE_SCHED_CLS; 4371 default: 4372 return BPF_PROG_TYPE_UNSPEC; 4373 } 4374 } 4375 4376 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 4377 enum bpf_attach_type attach_type) 4378 { 4379 enum bpf_prog_type ptype; 4380 4381 switch (prog->type) { 4382 case BPF_PROG_TYPE_CGROUP_SOCK: 4383 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4384 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4385 case BPF_PROG_TYPE_SK_LOOKUP: 4386 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 4387 case BPF_PROG_TYPE_CGROUP_SKB: 4388 if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) 4389 /* cg-skb progs can be loaded by unpriv user. 4390 * check permissions at attach time. 4391 */ 4392 return -EPERM; 4393 4394 ptype = attach_type_to_prog_type(attach_type); 4395 if (prog->type != ptype) 4396 return -EINVAL; 4397 4398 return prog->enforce_expected_attach_type && 4399 prog->expected_attach_type != attach_type ? 4400 -EINVAL : 0; 4401 case BPF_PROG_TYPE_EXT: 4402 return 0; 4403 case BPF_PROG_TYPE_NETFILTER: 4404 if (attach_type != BPF_NETFILTER) 4405 return -EINVAL; 4406 return 0; 4407 case BPF_PROG_TYPE_PERF_EVENT: 4408 case BPF_PROG_TYPE_TRACEPOINT: 4409 if (attach_type != BPF_PERF_EVENT) 4410 return -EINVAL; 4411 return 0; 4412 case BPF_PROG_TYPE_KPROBE: 4413 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 4414 attach_type != BPF_TRACE_KPROBE_MULTI) 4415 return -EINVAL; 4416 if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && 4417 attach_type != BPF_TRACE_KPROBE_SESSION) 4418 return -EINVAL; 4419 if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 4420 attach_type != BPF_TRACE_UPROBE_MULTI) 4421 return -EINVAL; 4422 if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION && 4423 attach_type != BPF_TRACE_UPROBE_SESSION) 4424 return -EINVAL; 4425 if (attach_type != BPF_PERF_EVENT && 4426 attach_type != BPF_TRACE_KPROBE_MULTI && 4427 attach_type != BPF_TRACE_KPROBE_SESSION && 4428 attach_type != BPF_TRACE_UPROBE_MULTI && 4429 attach_type != BPF_TRACE_UPROBE_SESSION) 4430 return -EINVAL; 4431 return 0; 4432 case BPF_PROG_TYPE_SCHED_CLS: 4433 if (attach_type != BPF_TCX_INGRESS && 4434 attach_type != BPF_TCX_EGRESS && 4435 attach_type != BPF_NETKIT_PRIMARY && 4436 attach_type != BPF_NETKIT_PEER) 4437 return -EINVAL; 4438 return 0; 4439 default: 4440 ptype = attach_type_to_prog_type(attach_type); 4441 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 4442 return -EINVAL; 4443 return 0; 4444 } 4445 } 4446 4447 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, 4448 bool check_atype) 4449 { 4450 switch (ptype) { 4451 case BPF_PROG_TYPE_CGROUP_DEVICE: 4452 case BPF_PROG_TYPE_CGROUP_SKB: 4453 case BPF_PROG_TYPE_CGROUP_SOCK: 4454 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4455 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4456 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4457 case BPF_PROG_TYPE_SOCK_OPS: 4458 return true; 4459 case BPF_PROG_TYPE_LSM: 4460 return check_atype ? atype == BPF_LSM_CGROUP : true; 4461 default: 4462 return false; 4463 } 4464 } 4465 4466 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision 4467 4468 #define BPF_F_ATTACH_MASK_BASE \ 4469 (BPF_F_ALLOW_OVERRIDE | \ 4470 BPF_F_ALLOW_MULTI | \ 4471 BPF_F_REPLACE | \ 4472 BPF_F_PREORDER) 4473 4474 #define BPF_F_ATTACH_MASK_MPROG \ 4475 (BPF_F_REPLACE | \ 4476 BPF_F_BEFORE | \ 4477 BPF_F_AFTER | \ 4478 BPF_F_ID | \ 4479 BPF_F_LINK) 4480 4481 static int bpf_prog_attach(const union bpf_attr *attr) 4482 { 4483 enum bpf_prog_type ptype; 4484 struct bpf_prog *prog; 4485 int ret; 4486 4487 if (CHECK_ATTR(BPF_PROG_ATTACH)) 4488 return -EINVAL; 4489 4490 ptype = attach_type_to_prog_type(attr->attach_type); 4491 if (ptype == BPF_PROG_TYPE_UNSPEC) 4492 return -EINVAL; 4493 if (bpf_mprog_supported(ptype)) { 4494 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4495 return -EINVAL; 4496 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4497 if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) 4498 return -EINVAL; 4499 } else { 4500 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) 4501 return -EINVAL; 4502 if (attr->relative_fd || 4503 attr->expected_revision) 4504 return -EINVAL; 4505 } 4506 4507 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4508 if (IS_ERR(prog)) 4509 return PTR_ERR(prog); 4510 4511 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 4512 bpf_prog_put(prog); 4513 return -EINVAL; 4514 } 4515 4516 if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { 4517 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 4518 goto out; 4519 } 4520 4521 switch (ptype) { 4522 case BPF_PROG_TYPE_SK_SKB: 4523 case BPF_PROG_TYPE_SK_MSG: 4524 ret = sock_map_get_from_fd(attr, prog); 4525 break; 4526 case BPF_PROG_TYPE_LIRC_MODE2: 4527 ret = lirc_prog_attach(attr, prog); 4528 break; 4529 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4530 ret = netns_bpf_prog_attach(attr, prog); 4531 break; 4532 case BPF_PROG_TYPE_SCHED_CLS: 4533 if (attr->attach_type == BPF_TCX_INGRESS || 4534 attr->attach_type == BPF_TCX_EGRESS) 4535 ret = tcx_prog_attach(attr, prog); 4536 else 4537 ret = netkit_prog_attach(attr, prog); 4538 break; 4539 default: 4540 ret = -EINVAL; 4541 } 4542 out: 4543 if (ret) 4544 bpf_prog_put(prog); 4545 return ret; 4546 } 4547 4548 #define BPF_PROG_DETACH_LAST_FIELD expected_revision 4549 4550 static int bpf_prog_detach(const union bpf_attr *attr) 4551 { 4552 struct bpf_prog *prog = NULL; 4553 enum bpf_prog_type ptype; 4554 int ret; 4555 4556 if (CHECK_ATTR(BPF_PROG_DETACH)) 4557 return -EINVAL; 4558 4559 ptype = attach_type_to_prog_type(attr->attach_type); 4560 if (bpf_mprog_supported(ptype)) { 4561 if (ptype == BPF_PROG_TYPE_UNSPEC) 4562 return -EINVAL; 4563 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4564 return -EINVAL; 4565 if (attr->attach_bpf_fd) { 4566 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4567 if (IS_ERR(prog)) 4568 return PTR_ERR(prog); 4569 } 4570 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4571 if (attr->attach_flags || attr->relative_fd) 4572 return -EINVAL; 4573 } else if (attr->attach_flags || 4574 attr->relative_fd || 4575 attr->expected_revision) { 4576 return -EINVAL; 4577 } 4578 4579 switch (ptype) { 4580 case BPF_PROG_TYPE_SK_MSG: 4581 case BPF_PROG_TYPE_SK_SKB: 4582 ret = sock_map_prog_detach(attr, ptype); 4583 break; 4584 case BPF_PROG_TYPE_LIRC_MODE2: 4585 ret = lirc_prog_detach(attr); 4586 break; 4587 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4588 ret = netns_bpf_prog_detach(attr, ptype); 4589 break; 4590 case BPF_PROG_TYPE_CGROUP_DEVICE: 4591 case BPF_PROG_TYPE_CGROUP_SKB: 4592 case BPF_PROG_TYPE_CGROUP_SOCK: 4593 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4594 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4595 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4596 case BPF_PROG_TYPE_SOCK_OPS: 4597 case BPF_PROG_TYPE_LSM: 4598 ret = cgroup_bpf_prog_detach(attr, ptype); 4599 break; 4600 case BPF_PROG_TYPE_SCHED_CLS: 4601 if (attr->attach_type == BPF_TCX_INGRESS || 4602 attr->attach_type == BPF_TCX_EGRESS) 4603 ret = tcx_prog_detach(attr, prog); 4604 else 4605 ret = netkit_prog_detach(attr, prog); 4606 break; 4607 default: 4608 ret = -EINVAL; 4609 } 4610 4611 if (prog) 4612 bpf_prog_put(prog); 4613 return ret; 4614 } 4615 4616 #define BPF_PROG_QUERY_LAST_FIELD query.revision 4617 4618 static int bpf_prog_query(const union bpf_attr *attr, 4619 union bpf_attr __user *uattr) 4620 { 4621 if (!bpf_net_capable()) 4622 return -EPERM; 4623 if (CHECK_ATTR(BPF_PROG_QUERY)) 4624 return -EINVAL; 4625 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 4626 return -EINVAL; 4627 4628 switch (attr->query.attach_type) { 4629 case BPF_CGROUP_INET_INGRESS: 4630 case BPF_CGROUP_INET_EGRESS: 4631 case BPF_CGROUP_INET_SOCK_CREATE: 4632 case BPF_CGROUP_INET_SOCK_RELEASE: 4633 case BPF_CGROUP_INET4_BIND: 4634 case BPF_CGROUP_INET6_BIND: 4635 case BPF_CGROUP_INET4_POST_BIND: 4636 case BPF_CGROUP_INET6_POST_BIND: 4637 case BPF_CGROUP_INET4_CONNECT: 4638 case BPF_CGROUP_INET6_CONNECT: 4639 case BPF_CGROUP_UNIX_CONNECT: 4640 case BPF_CGROUP_INET4_GETPEERNAME: 4641 case BPF_CGROUP_INET6_GETPEERNAME: 4642 case BPF_CGROUP_UNIX_GETPEERNAME: 4643 case BPF_CGROUP_INET4_GETSOCKNAME: 4644 case BPF_CGROUP_INET6_GETSOCKNAME: 4645 case BPF_CGROUP_UNIX_GETSOCKNAME: 4646 case BPF_CGROUP_UDP4_SENDMSG: 4647 case BPF_CGROUP_UDP6_SENDMSG: 4648 case BPF_CGROUP_UNIX_SENDMSG: 4649 case BPF_CGROUP_UDP4_RECVMSG: 4650 case BPF_CGROUP_UDP6_RECVMSG: 4651 case BPF_CGROUP_UNIX_RECVMSG: 4652 case BPF_CGROUP_SOCK_OPS: 4653 case BPF_CGROUP_DEVICE: 4654 case BPF_CGROUP_SYSCTL: 4655 case BPF_CGROUP_GETSOCKOPT: 4656 case BPF_CGROUP_SETSOCKOPT: 4657 case BPF_LSM_CGROUP: 4658 return cgroup_bpf_prog_query(attr, uattr); 4659 case BPF_LIRC_MODE2: 4660 return lirc_prog_query(attr, uattr); 4661 case BPF_FLOW_DISSECTOR: 4662 case BPF_SK_LOOKUP: 4663 return netns_bpf_prog_query(attr, uattr); 4664 case BPF_SK_SKB_STREAM_PARSER: 4665 case BPF_SK_SKB_STREAM_VERDICT: 4666 case BPF_SK_MSG_VERDICT: 4667 case BPF_SK_SKB_VERDICT: 4668 return sock_map_bpf_prog_query(attr, uattr); 4669 case BPF_TCX_INGRESS: 4670 case BPF_TCX_EGRESS: 4671 return tcx_prog_query(attr, uattr); 4672 case BPF_NETKIT_PRIMARY: 4673 case BPF_NETKIT_PEER: 4674 return netkit_prog_query(attr, uattr); 4675 default: 4676 return -EINVAL; 4677 } 4678 } 4679 4680 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 4681 4682 static int bpf_prog_test_run(const union bpf_attr *attr, 4683 union bpf_attr __user *uattr) 4684 { 4685 struct bpf_prog *prog; 4686 int ret = -ENOTSUPP; 4687 4688 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 4689 return -EINVAL; 4690 4691 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 4692 (!attr->test.ctx_size_in && attr->test.ctx_in)) 4693 return -EINVAL; 4694 4695 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 4696 (!attr->test.ctx_size_out && attr->test.ctx_out)) 4697 return -EINVAL; 4698 4699 prog = bpf_prog_get(attr->test.prog_fd); 4700 if (IS_ERR(prog)) 4701 return PTR_ERR(prog); 4702 4703 if (prog->aux->ops->test_run) 4704 ret = prog->aux->ops->test_run(prog, attr, uattr); 4705 4706 bpf_prog_put(prog); 4707 return ret; 4708 } 4709 4710 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 4711 4712 static int bpf_obj_get_next_id(const union bpf_attr *attr, 4713 union bpf_attr __user *uattr, 4714 struct idr *idr, 4715 spinlock_t *lock) 4716 { 4717 u32 next_id = attr->start_id; 4718 int err = 0; 4719 4720 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 4721 return -EINVAL; 4722 4723 if (!capable(CAP_SYS_ADMIN)) 4724 return -EPERM; 4725 4726 next_id++; 4727 spin_lock_bh(lock); 4728 if (!idr_get_next(idr, &next_id)) 4729 err = -ENOENT; 4730 spin_unlock_bh(lock); 4731 4732 if (!err) 4733 err = put_user(next_id, &uattr->next_id); 4734 4735 return err; 4736 } 4737 4738 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 4739 { 4740 struct bpf_map *map; 4741 4742 spin_lock_bh(&map_idr_lock); 4743 again: 4744 map = idr_get_next(&map_idr, id); 4745 if (map) { 4746 map = __bpf_map_inc_not_zero(map, false); 4747 if (IS_ERR(map)) { 4748 (*id)++; 4749 goto again; 4750 } 4751 } 4752 spin_unlock_bh(&map_idr_lock); 4753 4754 return map; 4755 } 4756 4757 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 4758 { 4759 struct bpf_prog *prog; 4760 4761 spin_lock_bh(&prog_idr_lock); 4762 again: 4763 prog = idr_get_next(&prog_idr, id); 4764 if (prog) { 4765 prog = bpf_prog_inc_not_zero(prog); 4766 if (IS_ERR(prog)) { 4767 (*id)++; 4768 goto again; 4769 } 4770 } 4771 spin_unlock_bh(&prog_idr_lock); 4772 4773 return prog; 4774 } 4775 4776 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 4777 4778 struct bpf_prog *bpf_prog_by_id(u32 id) 4779 { 4780 struct bpf_prog *prog; 4781 4782 if (!id) 4783 return ERR_PTR(-ENOENT); 4784 4785 spin_lock_bh(&prog_idr_lock); 4786 prog = idr_find(&prog_idr, id); 4787 if (prog) 4788 prog = bpf_prog_inc_not_zero(prog); 4789 else 4790 prog = ERR_PTR(-ENOENT); 4791 spin_unlock_bh(&prog_idr_lock); 4792 return prog; 4793 } 4794 4795 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 4796 { 4797 struct bpf_prog *prog; 4798 u32 id = attr->prog_id; 4799 int fd; 4800 4801 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 4802 return -EINVAL; 4803 4804 if (!capable(CAP_SYS_ADMIN)) 4805 return -EPERM; 4806 4807 prog = bpf_prog_by_id(id); 4808 if (IS_ERR(prog)) 4809 return PTR_ERR(prog); 4810 4811 fd = bpf_prog_new_fd(prog); 4812 if (fd < 0) 4813 bpf_prog_put(prog); 4814 4815 return fd; 4816 } 4817 4818 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 4819 4820 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 4821 { 4822 struct bpf_map *map; 4823 u32 id = attr->map_id; 4824 int f_flags; 4825 int fd; 4826 4827 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 4828 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 4829 return -EINVAL; 4830 4831 if (!capable(CAP_SYS_ADMIN)) 4832 return -EPERM; 4833 4834 f_flags = bpf_get_file_flag(attr->open_flags); 4835 if (f_flags < 0) 4836 return f_flags; 4837 4838 spin_lock_bh(&map_idr_lock); 4839 map = idr_find(&map_idr, id); 4840 if (map) 4841 map = __bpf_map_inc_not_zero(map, true); 4842 else 4843 map = ERR_PTR(-ENOENT); 4844 spin_unlock_bh(&map_idr_lock); 4845 4846 if (IS_ERR(map)) 4847 return PTR_ERR(map); 4848 4849 fd = bpf_map_new_fd(map, f_flags); 4850 if (fd < 0) 4851 bpf_map_put_with_uref(map); 4852 4853 return fd; 4854 } 4855 4856 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 4857 unsigned long addr, u32 *off, 4858 u32 *type) 4859 { 4860 const struct bpf_map *map; 4861 int i; 4862 4863 mutex_lock(&prog->aux->used_maps_mutex); 4864 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 4865 map = prog->aux->used_maps[i]; 4866 if (map == (void *)addr) { 4867 *type = BPF_PSEUDO_MAP_FD; 4868 goto out; 4869 } 4870 if (!map->ops->map_direct_value_meta) 4871 continue; 4872 if (!map->ops->map_direct_value_meta(map, addr, off)) { 4873 *type = BPF_PSEUDO_MAP_VALUE; 4874 goto out; 4875 } 4876 } 4877 map = NULL; 4878 4879 out: 4880 mutex_unlock(&prog->aux->used_maps_mutex); 4881 return map; 4882 } 4883 4884 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 4885 const struct cred *f_cred) 4886 { 4887 const struct bpf_map *map; 4888 struct bpf_insn *insns; 4889 u32 off, type; 4890 u64 imm; 4891 u8 code; 4892 int i; 4893 4894 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 4895 GFP_USER); 4896 if (!insns) 4897 return insns; 4898 4899 for (i = 0; i < prog->len; i++) { 4900 code = insns[i].code; 4901 4902 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 4903 insns[i].code = BPF_JMP | BPF_CALL; 4904 insns[i].imm = BPF_FUNC_tail_call; 4905 /* fall-through */ 4906 } 4907 if (code == (BPF_JMP | BPF_CALL) || 4908 code == (BPF_JMP | BPF_CALL_ARGS)) { 4909 if (code == (BPF_JMP | BPF_CALL_ARGS)) 4910 insns[i].code = BPF_JMP | BPF_CALL; 4911 if (!bpf_dump_raw_ok(f_cred)) 4912 insns[i].imm = 0; 4913 continue; 4914 } 4915 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 4916 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 4917 continue; 4918 } 4919 4920 if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX || 4921 BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) { 4922 insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM; 4923 continue; 4924 } 4925 4926 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 4927 continue; 4928 4929 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 4930 map = bpf_map_from_imm(prog, imm, &off, &type); 4931 if (map) { 4932 insns[i].src_reg = type; 4933 insns[i].imm = map->id; 4934 insns[i + 1].imm = off; 4935 continue; 4936 } 4937 } 4938 4939 return insns; 4940 } 4941 4942 static int set_info_rec_size(struct bpf_prog_info *info) 4943 { 4944 /* 4945 * Ensure info.*_rec_size is the same as kernel expected size 4946 * 4947 * or 4948 * 4949 * Only allow zero *_rec_size if both _rec_size and _cnt are 4950 * zero. In this case, the kernel will set the expected 4951 * _rec_size back to the info. 4952 */ 4953 4954 if ((info->nr_func_info || info->func_info_rec_size) && 4955 info->func_info_rec_size != sizeof(struct bpf_func_info)) 4956 return -EINVAL; 4957 4958 if ((info->nr_line_info || info->line_info_rec_size) && 4959 info->line_info_rec_size != sizeof(struct bpf_line_info)) 4960 return -EINVAL; 4961 4962 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 4963 info->jited_line_info_rec_size != sizeof(__u64)) 4964 return -EINVAL; 4965 4966 info->func_info_rec_size = sizeof(struct bpf_func_info); 4967 info->line_info_rec_size = sizeof(struct bpf_line_info); 4968 info->jited_line_info_rec_size = sizeof(__u64); 4969 4970 return 0; 4971 } 4972 4973 static int bpf_prog_get_info_by_fd(struct file *file, 4974 struct bpf_prog *prog, 4975 const union bpf_attr *attr, 4976 union bpf_attr __user *uattr) 4977 { 4978 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4979 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 4980 struct bpf_prog_info info; 4981 u32 info_len = attr->info.info_len; 4982 struct bpf_prog_kstats stats; 4983 char __user *uinsns; 4984 u32 ulen; 4985 int err; 4986 4987 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4988 if (err) 4989 return err; 4990 info_len = min_t(u32, sizeof(info), info_len); 4991 4992 memset(&info, 0, sizeof(info)); 4993 if (copy_from_user(&info, uinfo, info_len)) 4994 return -EFAULT; 4995 4996 info.type = prog->type; 4997 info.id = prog->aux->id; 4998 info.load_time = prog->aux->load_time; 4999 info.created_by_uid = from_kuid_munged(current_user_ns(), 5000 prog->aux->user->uid); 5001 info.gpl_compatible = prog->gpl_compatible; 5002 5003 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 5004 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 5005 5006 mutex_lock(&prog->aux->used_maps_mutex); 5007 ulen = info.nr_map_ids; 5008 info.nr_map_ids = prog->aux->used_map_cnt; 5009 ulen = min_t(u32, info.nr_map_ids, ulen); 5010 if (ulen) { 5011 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 5012 u32 i; 5013 5014 for (i = 0; i < ulen; i++) 5015 if (put_user(prog->aux->used_maps[i]->id, 5016 &user_map_ids[i])) { 5017 mutex_unlock(&prog->aux->used_maps_mutex); 5018 return -EFAULT; 5019 } 5020 } 5021 mutex_unlock(&prog->aux->used_maps_mutex); 5022 5023 err = set_info_rec_size(&info); 5024 if (err) 5025 return err; 5026 5027 bpf_prog_get_stats(prog, &stats); 5028 info.run_time_ns = stats.nsecs; 5029 info.run_cnt = stats.cnt; 5030 info.recursion_misses = stats.misses; 5031 5032 info.verified_insns = prog->aux->verified_insns; 5033 if (prog->aux->btf) 5034 info.btf_id = btf_obj_id(prog->aux->btf); 5035 5036 if (!bpf_capable()) { 5037 info.jited_prog_len = 0; 5038 info.xlated_prog_len = 0; 5039 info.nr_jited_ksyms = 0; 5040 info.nr_jited_func_lens = 0; 5041 info.nr_func_info = 0; 5042 info.nr_line_info = 0; 5043 info.nr_jited_line_info = 0; 5044 goto done; 5045 } 5046 5047 ulen = info.xlated_prog_len; 5048 info.xlated_prog_len = bpf_prog_insn_size(prog); 5049 if (info.xlated_prog_len && ulen) { 5050 struct bpf_insn *insns_sanitized; 5051 bool fault; 5052 5053 if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) { 5054 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 5055 if (!insns_sanitized) 5056 return -ENOMEM; 5057 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 5058 ulen = min_t(u32, info.xlated_prog_len, ulen); 5059 fault = copy_to_user(uinsns, insns_sanitized, ulen); 5060 kfree(insns_sanitized); 5061 if (fault) 5062 return -EFAULT; 5063 } else { 5064 info.xlated_prog_insns = 0; 5065 } 5066 } 5067 5068 if (bpf_prog_is_offloaded(prog->aux)) { 5069 err = bpf_prog_offload_info_fill(&info, prog); 5070 if (err) 5071 return err; 5072 goto done; 5073 } 5074 5075 /* NOTE: the following code is supposed to be skipped for offload. 5076 * bpf_prog_offload_info_fill() is the place to fill similar fields 5077 * for offload. 5078 */ 5079 ulen = info.jited_prog_len; 5080 if (prog->aux->func_cnt) { 5081 u32 i; 5082 5083 info.jited_prog_len = 0; 5084 for (i = 0; i < prog->aux->func_cnt; i++) 5085 info.jited_prog_len += prog->aux->func[i]->jited_len; 5086 } else { 5087 info.jited_prog_len = prog->jited_len; 5088 } 5089 5090 if (info.jited_prog_len && ulen) { 5091 if (bpf_dump_raw_ok(file->f_cred)) { 5092 uinsns = u64_to_user_ptr(info.jited_prog_insns); 5093 ulen = min_t(u32, info.jited_prog_len, ulen); 5094 5095 /* for multi-function programs, copy the JITed 5096 * instructions for all the functions 5097 */ 5098 if (prog->aux->func_cnt) { 5099 u32 len, free, i; 5100 u8 *img; 5101 5102 free = ulen; 5103 for (i = 0; i < prog->aux->func_cnt; i++) { 5104 len = prog->aux->func[i]->jited_len; 5105 len = min_t(u32, len, free); 5106 img = (u8 *) prog->aux->func[i]->bpf_func; 5107 if (copy_to_user(uinsns, img, len)) 5108 return -EFAULT; 5109 uinsns += len; 5110 free -= len; 5111 if (!free) 5112 break; 5113 } 5114 } else { 5115 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 5116 return -EFAULT; 5117 } 5118 } else { 5119 info.jited_prog_insns = 0; 5120 } 5121 } 5122 5123 ulen = info.nr_jited_ksyms; 5124 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 5125 if (ulen) { 5126 if (bpf_dump_raw_ok(file->f_cred)) { 5127 unsigned long ksym_addr; 5128 u64 __user *user_ksyms; 5129 u32 i; 5130 5131 /* copy the address of the kernel symbol 5132 * corresponding to each function 5133 */ 5134 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 5135 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 5136 if (prog->aux->func_cnt) { 5137 for (i = 0; i < ulen; i++) { 5138 ksym_addr = (unsigned long) 5139 prog->aux->func[i]->bpf_func; 5140 if (put_user((u64) ksym_addr, 5141 &user_ksyms[i])) 5142 return -EFAULT; 5143 } 5144 } else { 5145 ksym_addr = (unsigned long) prog->bpf_func; 5146 if (put_user((u64) ksym_addr, &user_ksyms[0])) 5147 return -EFAULT; 5148 } 5149 } else { 5150 info.jited_ksyms = 0; 5151 } 5152 } 5153 5154 ulen = info.nr_jited_func_lens; 5155 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 5156 if (ulen) { 5157 if (bpf_dump_raw_ok(file->f_cred)) { 5158 u32 __user *user_lens; 5159 u32 func_len, i; 5160 5161 /* copy the JITed image lengths for each function */ 5162 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 5163 user_lens = u64_to_user_ptr(info.jited_func_lens); 5164 if (prog->aux->func_cnt) { 5165 for (i = 0; i < ulen; i++) { 5166 func_len = 5167 prog->aux->func[i]->jited_len; 5168 if (put_user(func_len, &user_lens[i])) 5169 return -EFAULT; 5170 } 5171 } else { 5172 func_len = prog->jited_len; 5173 if (put_user(func_len, &user_lens[0])) 5174 return -EFAULT; 5175 } 5176 } else { 5177 info.jited_func_lens = 0; 5178 } 5179 } 5180 5181 info.attach_btf_id = prog->aux->attach_btf_id; 5182 if (attach_btf) 5183 info.attach_btf_obj_id = btf_obj_id(attach_btf); 5184 5185 ulen = info.nr_func_info; 5186 info.nr_func_info = prog->aux->func_info_cnt; 5187 if (info.nr_func_info && ulen) { 5188 char __user *user_finfo; 5189 5190 user_finfo = u64_to_user_ptr(info.func_info); 5191 ulen = min_t(u32, info.nr_func_info, ulen); 5192 if (copy_to_user(user_finfo, prog->aux->func_info, 5193 info.func_info_rec_size * ulen)) 5194 return -EFAULT; 5195 } 5196 5197 ulen = info.nr_line_info; 5198 info.nr_line_info = prog->aux->nr_linfo; 5199 if (info.nr_line_info && ulen) { 5200 __u8 __user *user_linfo; 5201 5202 user_linfo = u64_to_user_ptr(info.line_info); 5203 ulen = min_t(u32, info.nr_line_info, ulen); 5204 if (copy_to_user(user_linfo, prog->aux->linfo, 5205 info.line_info_rec_size * ulen)) 5206 return -EFAULT; 5207 } 5208 5209 ulen = info.nr_jited_line_info; 5210 if (prog->aux->jited_linfo) 5211 info.nr_jited_line_info = prog->aux->nr_linfo; 5212 else 5213 info.nr_jited_line_info = 0; 5214 if (info.nr_jited_line_info && ulen) { 5215 if (bpf_dump_raw_ok(file->f_cred)) { 5216 unsigned long line_addr; 5217 __u64 __user *user_linfo; 5218 u32 i; 5219 5220 user_linfo = u64_to_user_ptr(info.jited_line_info); 5221 ulen = min_t(u32, info.nr_jited_line_info, ulen); 5222 for (i = 0; i < ulen; i++) { 5223 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 5224 if (put_user((__u64)line_addr, &user_linfo[i])) 5225 return -EFAULT; 5226 } 5227 } else { 5228 info.jited_line_info = 0; 5229 } 5230 } 5231 5232 ulen = info.nr_prog_tags; 5233 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 5234 if (ulen) { 5235 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 5236 u32 i; 5237 5238 user_prog_tags = u64_to_user_ptr(info.prog_tags); 5239 ulen = min_t(u32, info.nr_prog_tags, ulen); 5240 if (prog->aux->func_cnt) { 5241 for (i = 0; i < ulen; i++) { 5242 if (copy_to_user(user_prog_tags[i], 5243 prog->aux->func[i]->tag, 5244 BPF_TAG_SIZE)) 5245 return -EFAULT; 5246 } 5247 } else { 5248 if (copy_to_user(user_prog_tags[0], 5249 prog->tag, BPF_TAG_SIZE)) 5250 return -EFAULT; 5251 } 5252 } 5253 5254 done: 5255 if (copy_to_user(uinfo, &info, info_len) || 5256 put_user(info_len, &uattr->info.info_len)) 5257 return -EFAULT; 5258 5259 return 0; 5260 } 5261 5262 static int bpf_map_get_info_by_fd(struct file *file, 5263 struct bpf_map *map, 5264 const union bpf_attr *attr, 5265 union bpf_attr __user *uattr) 5266 { 5267 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5268 struct bpf_map_info info; 5269 u32 info_len = attr->info.info_len; 5270 int err; 5271 5272 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5273 if (err) 5274 return err; 5275 info_len = min_t(u32, sizeof(info), info_len); 5276 5277 memset(&info, 0, sizeof(info)); 5278 if (copy_from_user(&info, uinfo, info_len)) 5279 return -EFAULT; 5280 5281 info.type = map->map_type; 5282 info.id = map->id; 5283 info.key_size = map->key_size; 5284 info.value_size = map->value_size; 5285 info.max_entries = map->max_entries; 5286 info.map_flags = map->map_flags; 5287 info.map_extra = map->map_extra; 5288 memcpy(info.name, map->name, sizeof(map->name)); 5289 5290 if (map->btf) { 5291 info.btf_id = btf_obj_id(map->btf); 5292 info.btf_key_type_id = map->btf_key_type_id; 5293 info.btf_value_type_id = map->btf_value_type_id; 5294 } 5295 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5296 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) 5297 bpf_map_struct_ops_info_fill(&info, map); 5298 5299 if (bpf_map_is_offloaded(map)) { 5300 err = bpf_map_offload_info_fill(&info, map); 5301 if (err) 5302 return err; 5303 } 5304 5305 if (info.hash) { 5306 char __user *uhash = u64_to_user_ptr(info.hash); 5307 5308 if (!map->ops->map_get_hash) 5309 return -EINVAL; 5310 5311 if (info.hash_size != SHA256_DIGEST_SIZE) 5312 return -EINVAL; 5313 5314 err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); 5315 if (err != 0) 5316 return err; 5317 5318 if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) 5319 return -EFAULT; 5320 } else if (info.hash_size) { 5321 return -EINVAL; 5322 } 5323 5324 if (copy_to_user(uinfo, &info, info_len) || 5325 put_user(info_len, &uattr->info.info_len)) 5326 return -EFAULT; 5327 5328 return 0; 5329 } 5330 5331 static int bpf_btf_get_info_by_fd(struct file *file, 5332 struct btf *btf, 5333 const union bpf_attr *attr, 5334 union bpf_attr __user *uattr) 5335 { 5336 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5337 u32 info_len = attr->info.info_len; 5338 int err; 5339 5340 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5341 if (err) 5342 return err; 5343 5344 return btf_get_info_by_fd(btf, attr, uattr); 5345 } 5346 5347 static int bpf_link_get_info_by_fd(struct file *file, 5348 struct bpf_link *link, 5349 const union bpf_attr *attr, 5350 union bpf_attr __user *uattr) 5351 { 5352 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5353 struct bpf_link_info info; 5354 u32 info_len = attr->info.info_len; 5355 int err; 5356 5357 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5358 if (err) 5359 return err; 5360 info_len = min_t(u32, sizeof(info), info_len); 5361 5362 memset(&info, 0, sizeof(info)); 5363 if (copy_from_user(&info, uinfo, info_len)) 5364 return -EFAULT; 5365 5366 info.type = link->type; 5367 info.id = link->id; 5368 if (link->prog) 5369 info.prog_id = link->prog->aux->id; 5370 5371 if (link->ops->fill_link_info) { 5372 err = link->ops->fill_link_info(link, &info); 5373 if (err) 5374 return err; 5375 } 5376 5377 if (copy_to_user(uinfo, &info, info_len) || 5378 put_user(info_len, &uattr->info.info_len)) 5379 return -EFAULT; 5380 5381 return 0; 5382 } 5383 5384 5385 static int token_get_info_by_fd(struct file *file, 5386 struct bpf_token *token, 5387 const union bpf_attr *attr, 5388 union bpf_attr __user *uattr) 5389 { 5390 struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5391 u32 info_len = attr->info.info_len; 5392 int err; 5393 5394 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5395 if (err) 5396 return err; 5397 return bpf_token_get_info_by_fd(token, attr, uattr); 5398 } 5399 5400 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 5401 5402 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 5403 union bpf_attr __user *uattr) 5404 { 5405 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 5406 return -EINVAL; 5407 5408 CLASS(fd, f)(attr->info.bpf_fd); 5409 if (fd_empty(f)) 5410 return -EBADFD; 5411 5412 if (fd_file(f)->f_op == &bpf_prog_fops) 5413 return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5414 uattr); 5415 else if (fd_file(f)->f_op == &bpf_map_fops) 5416 return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5417 uattr); 5418 else if (fd_file(f)->f_op == &btf_fops) 5419 return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); 5420 else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) 5421 return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5422 attr, uattr); 5423 else if (fd_file(f)->f_op == &bpf_token_fops) 5424 return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5425 attr, uattr); 5426 return -EINVAL; 5427 } 5428 5429 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd 5430 5431 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 5432 { 5433 struct bpf_token *token = NULL; 5434 5435 if (CHECK_ATTR(BPF_BTF_LOAD)) 5436 return -EINVAL; 5437 5438 if (attr->btf_flags & ~BPF_F_TOKEN_FD) 5439 return -EINVAL; 5440 5441 if (attr->btf_flags & BPF_F_TOKEN_FD) { 5442 token = bpf_token_get_from_fd(attr->btf_token_fd); 5443 if (IS_ERR(token)) 5444 return PTR_ERR(token); 5445 if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { 5446 bpf_token_put(token); 5447 token = NULL; 5448 } 5449 } 5450 5451 if (!bpf_token_capable(token, CAP_BPF)) { 5452 bpf_token_put(token); 5453 return -EPERM; 5454 } 5455 5456 bpf_token_put(token); 5457 5458 return btf_new_fd(attr, uattr, uattr_size); 5459 } 5460 5461 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5462 5463 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5464 { 5465 struct bpf_token *token = NULL; 5466 5467 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5468 return -EINVAL; 5469 5470 if (attr->open_flags & ~BPF_F_TOKEN_FD) 5471 return -EINVAL; 5472 5473 if (attr->open_flags & BPF_F_TOKEN_FD) { 5474 token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5475 if (IS_ERR(token)) 5476 return PTR_ERR(token); 5477 if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5478 bpf_token_put(token); 5479 token = NULL; 5480 } 5481 } 5482 5483 if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5484 bpf_token_put(token); 5485 return -EPERM; 5486 } 5487 5488 bpf_token_put(token); 5489 5490 return btf_get_fd_by_id(attr->btf_id); 5491 } 5492 5493 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 5494 union bpf_attr __user *uattr, 5495 u32 prog_id, u32 fd_type, 5496 const char *buf, u64 probe_offset, 5497 u64 probe_addr) 5498 { 5499 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 5500 u32 len = buf ? strlen(buf) : 0, input_len; 5501 int err = 0; 5502 5503 if (put_user(len, &uattr->task_fd_query.buf_len)) 5504 return -EFAULT; 5505 input_len = attr->task_fd_query.buf_len; 5506 if (input_len && ubuf) { 5507 if (!len) { 5508 /* nothing to copy, just make ubuf NULL terminated */ 5509 char zero = '\0'; 5510 5511 if (put_user(zero, ubuf)) 5512 return -EFAULT; 5513 } else { 5514 err = bpf_copy_to_user(ubuf, buf, input_len, len); 5515 if (err == -EFAULT) 5516 return err; 5517 } 5518 } 5519 5520 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 5521 put_user(fd_type, &uattr->task_fd_query.fd_type) || 5522 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 5523 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 5524 return -EFAULT; 5525 5526 return err; 5527 } 5528 5529 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 5530 5531 static int bpf_task_fd_query(const union bpf_attr *attr, 5532 union bpf_attr __user *uattr) 5533 { 5534 pid_t pid = attr->task_fd_query.pid; 5535 u32 fd = attr->task_fd_query.fd; 5536 const struct perf_event *event; 5537 struct task_struct *task; 5538 struct file *file; 5539 int err; 5540 5541 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 5542 return -EINVAL; 5543 5544 if (!capable(CAP_SYS_ADMIN)) 5545 return -EPERM; 5546 5547 if (attr->task_fd_query.flags != 0) 5548 return -EINVAL; 5549 5550 rcu_read_lock(); 5551 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 5552 rcu_read_unlock(); 5553 if (!task) 5554 return -ENOENT; 5555 5556 err = 0; 5557 file = fget_task(task, fd); 5558 put_task_struct(task); 5559 if (!file) 5560 return -EBADF; 5561 5562 if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { 5563 struct bpf_link *link = file->private_data; 5564 5565 if (link->ops == &bpf_raw_tp_link_lops) { 5566 struct bpf_raw_tp_link *raw_tp = 5567 container_of(link, struct bpf_raw_tp_link, link); 5568 struct bpf_raw_event_map *btp = raw_tp->btp; 5569 5570 err = bpf_task_fd_query_copy(attr, uattr, 5571 raw_tp->link.prog->aux->id, 5572 BPF_FD_TYPE_RAW_TRACEPOINT, 5573 btp->tp->name, 0, 0); 5574 goto put_file; 5575 } 5576 goto out_not_supp; 5577 } 5578 5579 event = perf_get_event(file); 5580 if (!IS_ERR(event)) { 5581 u64 probe_offset, probe_addr; 5582 u32 prog_id, fd_type; 5583 const char *buf; 5584 5585 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 5586 &buf, &probe_offset, 5587 &probe_addr, NULL); 5588 if (!err) 5589 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 5590 fd_type, buf, 5591 probe_offset, 5592 probe_addr); 5593 goto put_file; 5594 } 5595 5596 out_not_supp: 5597 err = -ENOTSUPP; 5598 put_file: 5599 fput(file); 5600 return err; 5601 } 5602 5603 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 5604 5605 #define BPF_DO_BATCH(fn, ...) \ 5606 do { \ 5607 if (!fn) { \ 5608 err = -ENOTSUPP; \ 5609 goto err_put; \ 5610 } \ 5611 err = fn(__VA_ARGS__); \ 5612 } while (0) 5613 5614 static int bpf_map_do_batch(const union bpf_attr *attr, 5615 union bpf_attr __user *uattr, 5616 int cmd) 5617 { 5618 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 5619 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 5620 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 5621 struct bpf_map *map; 5622 int err; 5623 5624 if (CHECK_ATTR(BPF_MAP_BATCH)) 5625 return -EINVAL; 5626 5627 CLASS(fd, f)(attr->batch.map_fd); 5628 5629 map = __bpf_map_get(f); 5630 if (IS_ERR(map)) 5631 return PTR_ERR(map); 5632 if (has_write) 5633 bpf_map_write_active_inc(map); 5634 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 5635 err = -EPERM; 5636 goto err_put; 5637 } 5638 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 5639 err = -EPERM; 5640 goto err_put; 5641 } 5642 5643 if (cmd == BPF_MAP_LOOKUP_BATCH) 5644 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 5645 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 5646 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 5647 else if (cmd == BPF_MAP_UPDATE_BATCH) 5648 BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr); 5649 else 5650 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 5651 err_put: 5652 if (has_write) { 5653 maybe_wait_bpf_programs(map); 5654 bpf_map_write_active_dec(map); 5655 } 5656 return err; 5657 } 5658 5659 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 5660 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 5661 { 5662 struct bpf_prog *prog; 5663 int ret; 5664 5665 if (CHECK_ATTR(BPF_LINK_CREATE)) 5666 return -EINVAL; 5667 5668 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 5669 return bpf_struct_ops_link_create(attr); 5670 5671 prog = bpf_prog_get(attr->link_create.prog_fd); 5672 if (IS_ERR(prog)) 5673 return PTR_ERR(prog); 5674 5675 ret = bpf_prog_attach_check_attach_type(prog, 5676 attr->link_create.attach_type); 5677 if (ret) 5678 goto out; 5679 5680 switch (prog->type) { 5681 case BPF_PROG_TYPE_CGROUP_SKB: 5682 case BPF_PROG_TYPE_CGROUP_SOCK: 5683 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5684 case BPF_PROG_TYPE_SOCK_OPS: 5685 case BPF_PROG_TYPE_CGROUP_DEVICE: 5686 case BPF_PROG_TYPE_CGROUP_SYSCTL: 5687 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 5688 ret = cgroup_bpf_link_attach(attr, prog); 5689 break; 5690 case BPF_PROG_TYPE_EXT: 5691 ret = bpf_tracing_prog_attach(prog, 5692 attr->link_create.target_fd, 5693 attr->link_create.target_btf_id, 5694 attr->link_create.tracing.cookie, 5695 attr->link_create.attach_type); 5696 break; 5697 case BPF_PROG_TYPE_LSM: 5698 case BPF_PROG_TYPE_TRACING: 5699 if (attr->link_create.attach_type != prog->expected_attach_type) { 5700 ret = -EINVAL; 5701 goto out; 5702 } 5703 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 5704 ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie, 5705 attr->link_create.attach_type); 5706 else if (prog->expected_attach_type == BPF_TRACE_ITER) 5707 ret = bpf_iter_link_attach(attr, uattr, prog); 5708 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 5709 ret = cgroup_bpf_link_attach(attr, prog); 5710 else 5711 ret = bpf_tracing_prog_attach(prog, 5712 attr->link_create.target_fd, 5713 attr->link_create.target_btf_id, 5714 attr->link_create.tracing.cookie, 5715 attr->link_create.attach_type); 5716 break; 5717 case BPF_PROG_TYPE_FLOW_DISSECTOR: 5718 case BPF_PROG_TYPE_SK_LOOKUP: 5719 ret = netns_bpf_link_create(attr, prog); 5720 break; 5721 case BPF_PROG_TYPE_SK_MSG: 5722 case BPF_PROG_TYPE_SK_SKB: 5723 ret = sock_map_link_create(attr, prog); 5724 break; 5725 #ifdef CONFIG_NET 5726 case BPF_PROG_TYPE_XDP: 5727 ret = bpf_xdp_link_attach(attr, prog); 5728 break; 5729 case BPF_PROG_TYPE_SCHED_CLS: 5730 if (attr->link_create.attach_type == BPF_TCX_INGRESS || 5731 attr->link_create.attach_type == BPF_TCX_EGRESS) 5732 ret = tcx_link_attach(attr, prog); 5733 else 5734 ret = netkit_link_attach(attr, prog); 5735 break; 5736 case BPF_PROG_TYPE_NETFILTER: 5737 ret = bpf_nf_link_attach(attr, prog); 5738 break; 5739 #endif 5740 case BPF_PROG_TYPE_PERF_EVENT: 5741 case BPF_PROG_TYPE_TRACEPOINT: 5742 ret = bpf_perf_link_attach(attr, prog); 5743 break; 5744 case BPF_PROG_TYPE_KPROBE: 5745 if (attr->link_create.attach_type == BPF_PERF_EVENT) 5746 ret = bpf_perf_link_attach(attr, prog); 5747 else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || 5748 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) 5749 ret = bpf_kprobe_multi_link_attach(attr, prog); 5750 else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI || 5751 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION) 5752 ret = bpf_uprobe_multi_link_attach(attr, prog); 5753 break; 5754 default: 5755 ret = -EINVAL; 5756 } 5757 5758 out: 5759 if (ret < 0) 5760 bpf_prog_put(prog); 5761 return ret; 5762 } 5763 5764 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 5765 { 5766 struct bpf_map *new_map, *old_map = NULL; 5767 int ret; 5768 5769 new_map = bpf_map_get(attr->link_update.new_map_fd); 5770 if (IS_ERR(new_map)) 5771 return PTR_ERR(new_map); 5772 5773 if (attr->link_update.flags & BPF_F_REPLACE) { 5774 old_map = bpf_map_get(attr->link_update.old_map_fd); 5775 if (IS_ERR(old_map)) { 5776 ret = PTR_ERR(old_map); 5777 goto out_put; 5778 } 5779 } else if (attr->link_update.old_map_fd) { 5780 ret = -EINVAL; 5781 goto out_put; 5782 } 5783 5784 ret = link->ops->update_map(link, new_map, old_map); 5785 5786 if (old_map) 5787 bpf_map_put(old_map); 5788 out_put: 5789 bpf_map_put(new_map); 5790 return ret; 5791 } 5792 5793 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 5794 5795 static int link_update(union bpf_attr *attr) 5796 { 5797 struct bpf_prog *old_prog = NULL, *new_prog; 5798 struct bpf_link *link; 5799 u32 flags; 5800 int ret; 5801 5802 if (CHECK_ATTR(BPF_LINK_UPDATE)) 5803 return -EINVAL; 5804 5805 flags = attr->link_update.flags; 5806 if (flags & ~BPF_F_REPLACE) 5807 return -EINVAL; 5808 5809 link = bpf_link_get_from_fd(attr->link_update.link_fd); 5810 if (IS_ERR(link)) 5811 return PTR_ERR(link); 5812 5813 if (link->ops->update_map) { 5814 ret = link_update_map(link, attr); 5815 goto out_put_link; 5816 } 5817 5818 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 5819 if (IS_ERR(new_prog)) { 5820 ret = PTR_ERR(new_prog); 5821 goto out_put_link; 5822 } 5823 5824 if (flags & BPF_F_REPLACE) { 5825 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 5826 if (IS_ERR(old_prog)) { 5827 ret = PTR_ERR(old_prog); 5828 old_prog = NULL; 5829 goto out_put_progs; 5830 } 5831 } else if (attr->link_update.old_prog_fd) { 5832 ret = -EINVAL; 5833 goto out_put_progs; 5834 } 5835 5836 if (link->ops->update_prog) 5837 ret = link->ops->update_prog(link, new_prog, old_prog); 5838 else 5839 ret = -EINVAL; 5840 5841 out_put_progs: 5842 if (old_prog) 5843 bpf_prog_put(old_prog); 5844 if (ret) 5845 bpf_prog_put(new_prog); 5846 out_put_link: 5847 bpf_link_put_direct(link); 5848 return ret; 5849 } 5850 5851 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 5852 5853 static int link_detach(union bpf_attr *attr) 5854 { 5855 struct bpf_link *link; 5856 int ret; 5857 5858 if (CHECK_ATTR(BPF_LINK_DETACH)) 5859 return -EINVAL; 5860 5861 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 5862 if (IS_ERR(link)) 5863 return PTR_ERR(link); 5864 5865 if (link->ops->detach) 5866 ret = link->ops->detach(link); 5867 else 5868 ret = -EOPNOTSUPP; 5869 5870 bpf_link_put_direct(link); 5871 return ret; 5872 } 5873 5874 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 5875 { 5876 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 5877 } 5878 EXPORT_SYMBOL(bpf_link_inc_not_zero); 5879 5880 struct bpf_link *bpf_link_by_id(u32 id) 5881 { 5882 struct bpf_link *link; 5883 5884 if (!id) 5885 return ERR_PTR(-ENOENT); 5886 5887 spin_lock_bh(&link_idr_lock); 5888 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 5889 link = idr_find(&link_idr, id); 5890 if (link) { 5891 if (link->id) 5892 link = bpf_link_inc_not_zero(link); 5893 else 5894 link = ERR_PTR(-EAGAIN); 5895 } else { 5896 link = ERR_PTR(-ENOENT); 5897 } 5898 spin_unlock_bh(&link_idr_lock); 5899 return link; 5900 } 5901 5902 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 5903 { 5904 struct bpf_link *link; 5905 5906 spin_lock_bh(&link_idr_lock); 5907 again: 5908 link = idr_get_next(&link_idr, id); 5909 if (link) { 5910 link = bpf_link_inc_not_zero(link); 5911 if (IS_ERR(link)) { 5912 (*id)++; 5913 goto again; 5914 } 5915 } 5916 spin_unlock_bh(&link_idr_lock); 5917 5918 return link; 5919 } 5920 5921 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 5922 5923 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 5924 { 5925 struct bpf_link *link; 5926 u32 id = attr->link_id; 5927 int fd; 5928 5929 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 5930 return -EINVAL; 5931 5932 if (!capable(CAP_SYS_ADMIN)) 5933 return -EPERM; 5934 5935 link = bpf_link_by_id(id); 5936 if (IS_ERR(link)) 5937 return PTR_ERR(link); 5938 5939 fd = bpf_link_new_fd(link); 5940 if (fd < 0) 5941 bpf_link_put_direct(link); 5942 5943 return fd; 5944 } 5945 5946 DEFINE_MUTEX(bpf_stats_enabled_mutex); 5947 5948 static int bpf_stats_release(struct inode *inode, struct file *file) 5949 { 5950 mutex_lock(&bpf_stats_enabled_mutex); 5951 static_key_slow_dec(&bpf_stats_enabled_key.key); 5952 mutex_unlock(&bpf_stats_enabled_mutex); 5953 return 0; 5954 } 5955 5956 static const struct file_operations bpf_stats_fops = { 5957 .release = bpf_stats_release, 5958 }; 5959 5960 static int bpf_enable_runtime_stats(void) 5961 { 5962 int fd; 5963 5964 mutex_lock(&bpf_stats_enabled_mutex); 5965 5966 /* Set a very high limit to avoid overflow */ 5967 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 5968 mutex_unlock(&bpf_stats_enabled_mutex); 5969 return -EBUSY; 5970 } 5971 5972 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 5973 if (fd >= 0) 5974 static_key_slow_inc(&bpf_stats_enabled_key.key); 5975 5976 mutex_unlock(&bpf_stats_enabled_mutex); 5977 return fd; 5978 } 5979 5980 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 5981 5982 static int bpf_enable_stats(union bpf_attr *attr) 5983 { 5984 5985 if (CHECK_ATTR(BPF_ENABLE_STATS)) 5986 return -EINVAL; 5987 5988 if (!capable(CAP_SYS_ADMIN)) 5989 return -EPERM; 5990 5991 switch (attr->enable_stats.type) { 5992 case BPF_STATS_RUN_TIME: 5993 return bpf_enable_runtime_stats(); 5994 default: 5995 break; 5996 } 5997 return -EINVAL; 5998 } 5999 6000 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 6001 6002 static int bpf_iter_create(union bpf_attr *attr) 6003 { 6004 struct bpf_link *link; 6005 int err; 6006 6007 if (CHECK_ATTR(BPF_ITER_CREATE)) 6008 return -EINVAL; 6009 6010 if (attr->iter_create.flags) 6011 return -EINVAL; 6012 6013 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 6014 if (IS_ERR(link)) 6015 return PTR_ERR(link); 6016 6017 err = bpf_iter_new_fd(link); 6018 bpf_link_put_direct(link); 6019 6020 return err; 6021 } 6022 6023 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 6024 6025 static int bpf_prog_bind_map(union bpf_attr *attr) 6026 { 6027 struct bpf_prog *prog; 6028 struct bpf_map *map; 6029 struct bpf_map **used_maps_old, **used_maps_new; 6030 int i, ret = 0; 6031 6032 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 6033 return -EINVAL; 6034 6035 if (attr->prog_bind_map.flags) 6036 return -EINVAL; 6037 6038 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 6039 if (IS_ERR(prog)) 6040 return PTR_ERR(prog); 6041 6042 map = bpf_map_get(attr->prog_bind_map.map_fd); 6043 if (IS_ERR(map)) { 6044 ret = PTR_ERR(map); 6045 goto out_prog_put; 6046 } 6047 6048 mutex_lock(&prog->aux->used_maps_mutex); 6049 6050 used_maps_old = prog->aux->used_maps; 6051 6052 for (i = 0; i < prog->aux->used_map_cnt; i++) 6053 if (used_maps_old[i] == map) { 6054 bpf_map_put(map); 6055 goto out_unlock; 6056 } 6057 6058 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 6059 sizeof(used_maps_new[0]), 6060 GFP_KERNEL); 6061 if (!used_maps_new) { 6062 ret = -ENOMEM; 6063 goto out_unlock; 6064 } 6065 6066 /* The bpf program will not access the bpf map, but for the sake of 6067 * simplicity, increase sleepable_refcnt for sleepable program as well. 6068 */ 6069 if (prog->sleepable) 6070 atomic64_inc(&map->sleepable_refcnt); 6071 memcpy(used_maps_new, used_maps_old, 6072 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 6073 used_maps_new[prog->aux->used_map_cnt] = map; 6074 6075 prog->aux->used_map_cnt++; 6076 prog->aux->used_maps = used_maps_new; 6077 6078 kfree(used_maps_old); 6079 6080 out_unlock: 6081 mutex_unlock(&prog->aux->used_maps_mutex); 6082 6083 if (ret) 6084 bpf_map_put(map); 6085 out_prog_put: 6086 bpf_prog_put(prog); 6087 return ret; 6088 } 6089 6090 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd 6091 6092 static int token_create(union bpf_attr *attr) 6093 { 6094 if (CHECK_ATTR(BPF_TOKEN_CREATE)) 6095 return -EINVAL; 6096 6097 /* no flags are supported yet */ 6098 if (attr->token_create.flags) 6099 return -EINVAL; 6100 6101 return bpf_token_create(attr); 6102 } 6103 6104 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd 6105 6106 static int prog_stream_read(union bpf_attr *attr) 6107 { 6108 char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); 6109 u32 len = attr->prog_stream_read.stream_buf_len; 6110 struct bpf_prog *prog; 6111 int ret; 6112 6113 if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) 6114 return -EINVAL; 6115 6116 prog = bpf_prog_get(attr->prog_stream_read.prog_fd); 6117 if (IS_ERR(prog)) 6118 return PTR_ERR(prog); 6119 6120 ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); 6121 bpf_prog_put(prog); 6122 6123 return ret; 6124 } 6125 6126 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) 6127 { 6128 union bpf_attr attr; 6129 int err; 6130 6131 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 6132 if (err) 6133 return err; 6134 size = min_t(u32, size, sizeof(attr)); 6135 6136 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 6137 memset(&attr, 0, sizeof(attr)); 6138 if (copy_from_bpfptr(&attr, uattr, size) != 0) 6139 return -EFAULT; 6140 6141 err = security_bpf(cmd, &attr, size, uattr.is_kernel); 6142 if (err < 0) 6143 return err; 6144 6145 switch (cmd) { 6146 case BPF_MAP_CREATE: 6147 err = map_create(&attr, uattr); 6148 break; 6149 case BPF_MAP_LOOKUP_ELEM: 6150 err = map_lookup_elem(&attr); 6151 break; 6152 case BPF_MAP_UPDATE_ELEM: 6153 err = map_update_elem(&attr, uattr); 6154 break; 6155 case BPF_MAP_DELETE_ELEM: 6156 err = map_delete_elem(&attr, uattr); 6157 break; 6158 case BPF_MAP_GET_NEXT_KEY: 6159 err = map_get_next_key(&attr); 6160 break; 6161 case BPF_MAP_FREEZE: 6162 err = map_freeze(&attr); 6163 break; 6164 case BPF_PROG_LOAD: 6165 err = bpf_prog_load(&attr, uattr, size); 6166 break; 6167 case BPF_OBJ_PIN: 6168 err = bpf_obj_pin(&attr); 6169 break; 6170 case BPF_OBJ_GET: 6171 err = bpf_obj_get(&attr); 6172 break; 6173 case BPF_PROG_ATTACH: 6174 err = bpf_prog_attach(&attr); 6175 break; 6176 case BPF_PROG_DETACH: 6177 err = bpf_prog_detach(&attr); 6178 break; 6179 case BPF_PROG_QUERY: 6180 err = bpf_prog_query(&attr, uattr.user); 6181 break; 6182 case BPF_PROG_TEST_RUN: 6183 err = bpf_prog_test_run(&attr, uattr.user); 6184 break; 6185 case BPF_PROG_GET_NEXT_ID: 6186 err = bpf_obj_get_next_id(&attr, uattr.user, 6187 &prog_idr, &prog_idr_lock); 6188 break; 6189 case BPF_MAP_GET_NEXT_ID: 6190 err = bpf_obj_get_next_id(&attr, uattr.user, 6191 &map_idr, &map_idr_lock); 6192 break; 6193 case BPF_BTF_GET_NEXT_ID: 6194 err = bpf_obj_get_next_id(&attr, uattr.user, 6195 &btf_idr, &btf_idr_lock); 6196 break; 6197 case BPF_PROG_GET_FD_BY_ID: 6198 err = bpf_prog_get_fd_by_id(&attr); 6199 break; 6200 case BPF_MAP_GET_FD_BY_ID: 6201 err = bpf_map_get_fd_by_id(&attr); 6202 break; 6203 case BPF_OBJ_GET_INFO_BY_FD: 6204 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 6205 break; 6206 case BPF_RAW_TRACEPOINT_OPEN: 6207 err = bpf_raw_tracepoint_open(&attr); 6208 break; 6209 case BPF_BTF_LOAD: 6210 err = bpf_btf_load(&attr, uattr, size); 6211 break; 6212 case BPF_BTF_GET_FD_BY_ID: 6213 err = bpf_btf_get_fd_by_id(&attr); 6214 break; 6215 case BPF_TASK_FD_QUERY: 6216 err = bpf_task_fd_query(&attr, uattr.user); 6217 break; 6218 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 6219 err = map_lookup_and_delete_elem(&attr); 6220 break; 6221 case BPF_MAP_LOOKUP_BATCH: 6222 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 6223 break; 6224 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 6225 err = bpf_map_do_batch(&attr, uattr.user, 6226 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 6227 break; 6228 case BPF_MAP_UPDATE_BATCH: 6229 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 6230 break; 6231 case BPF_MAP_DELETE_BATCH: 6232 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 6233 break; 6234 case BPF_LINK_CREATE: 6235 err = link_create(&attr, uattr); 6236 break; 6237 case BPF_LINK_UPDATE: 6238 err = link_update(&attr); 6239 break; 6240 case BPF_LINK_GET_FD_BY_ID: 6241 err = bpf_link_get_fd_by_id(&attr); 6242 break; 6243 case BPF_LINK_GET_NEXT_ID: 6244 err = bpf_obj_get_next_id(&attr, uattr.user, 6245 &link_idr, &link_idr_lock); 6246 break; 6247 case BPF_ENABLE_STATS: 6248 err = bpf_enable_stats(&attr); 6249 break; 6250 case BPF_ITER_CREATE: 6251 err = bpf_iter_create(&attr); 6252 break; 6253 case BPF_LINK_DETACH: 6254 err = link_detach(&attr); 6255 break; 6256 case BPF_PROG_BIND_MAP: 6257 err = bpf_prog_bind_map(&attr); 6258 break; 6259 case BPF_TOKEN_CREATE: 6260 err = token_create(&attr); 6261 break; 6262 case BPF_PROG_STREAM_READ_BY_FD: 6263 err = prog_stream_read(&attr); 6264 break; 6265 default: 6266 err = -EINVAL; 6267 break; 6268 } 6269 6270 return err; 6271 } 6272 6273 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 6274 { 6275 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 6276 } 6277 6278 static bool syscall_prog_is_valid_access(int off, int size, 6279 enum bpf_access_type type, 6280 const struct bpf_prog *prog, 6281 struct bpf_insn_access_aux *info) 6282 { 6283 if (off < 0 || off >= U16_MAX) 6284 return false; 6285 if (off % size != 0) 6286 return false; 6287 return true; 6288 } 6289 6290 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 6291 { 6292 switch (cmd) { 6293 case BPF_MAP_CREATE: 6294 case BPF_MAP_DELETE_ELEM: 6295 case BPF_MAP_UPDATE_ELEM: 6296 case BPF_MAP_FREEZE: 6297 case BPF_MAP_GET_FD_BY_ID: 6298 case BPF_PROG_LOAD: 6299 case BPF_BTF_LOAD: 6300 case BPF_LINK_CREATE: 6301 case BPF_RAW_TRACEPOINT_OPEN: 6302 break; 6303 default: 6304 return -EINVAL; 6305 } 6306 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 6307 } 6308 6309 6310 /* To shut up -Wmissing-prototypes. 6311 * This function is used by the kernel light skeleton 6312 * to load bpf programs when modules are loaded or during kernel boot. 6313 * See tools/lib/bpf/skel_internal.h 6314 */ 6315 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 6316 6317 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 6318 { 6319 struct bpf_prog * __maybe_unused prog; 6320 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 6321 6322 switch (cmd) { 6323 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 6324 case BPF_PROG_TEST_RUN: 6325 if (attr->test.data_in || attr->test.data_out || 6326 attr->test.ctx_out || attr->test.duration || 6327 attr->test.repeat || attr->test.flags) 6328 return -EINVAL; 6329 6330 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 6331 if (IS_ERR(prog)) 6332 return PTR_ERR(prog); 6333 6334 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 6335 attr->test.ctx_size_in > U16_MAX) { 6336 bpf_prog_put(prog); 6337 return -EINVAL; 6338 } 6339 6340 run_ctx.bpf_cookie = 0; 6341 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 6342 /* recursion detected */ 6343 __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx); 6344 bpf_prog_put(prog); 6345 return -EBUSY; 6346 } 6347 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 6348 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 6349 &run_ctx); 6350 bpf_prog_put(prog); 6351 return 0; 6352 #endif 6353 default: 6354 return ____bpf_sys_bpf(cmd, attr, size); 6355 } 6356 } 6357 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); 6358 6359 static const struct bpf_func_proto bpf_sys_bpf_proto = { 6360 .func = bpf_sys_bpf, 6361 .gpl_only = false, 6362 .ret_type = RET_INTEGER, 6363 .arg1_type = ARG_ANYTHING, 6364 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6365 .arg3_type = ARG_CONST_SIZE, 6366 }; 6367 6368 const struct bpf_func_proto * __weak 6369 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6370 { 6371 return bpf_base_func_proto(func_id, prog); 6372 } 6373 6374 BPF_CALL_1(bpf_sys_close, u32, fd) 6375 { 6376 /* When bpf program calls this helper there should not be 6377 * an fdget() without matching completed fdput(). 6378 * This helper is allowed in the following callchain only: 6379 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 6380 */ 6381 return close_fd(fd); 6382 } 6383 6384 static const struct bpf_func_proto bpf_sys_close_proto = { 6385 .func = bpf_sys_close, 6386 .gpl_only = false, 6387 .ret_type = RET_INTEGER, 6388 .arg1_type = ARG_ANYTHING, 6389 }; 6390 6391 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 6392 { 6393 *res = 0; 6394 if (flags) 6395 return -EINVAL; 6396 6397 if (name_sz <= 1 || name[name_sz - 1]) 6398 return -EINVAL; 6399 6400 if (!bpf_dump_raw_ok(current_cred())) 6401 return -EPERM; 6402 6403 *res = kallsyms_lookup_name(name); 6404 return *res ? 0 : -ENOENT; 6405 } 6406 6407 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 6408 .func = bpf_kallsyms_lookup_name, 6409 .gpl_only = false, 6410 .ret_type = RET_INTEGER, 6411 .arg1_type = ARG_PTR_TO_MEM, 6412 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 6413 .arg3_type = ARG_ANYTHING, 6414 .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 6415 .arg4_size = sizeof(u64), 6416 }; 6417 6418 static const struct bpf_func_proto * 6419 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6420 { 6421 switch (func_id) { 6422 case BPF_FUNC_sys_bpf: 6423 return !bpf_token_capable(prog->aux->token, CAP_PERFMON) 6424 ? NULL : &bpf_sys_bpf_proto; 6425 case BPF_FUNC_btf_find_by_name_kind: 6426 return &bpf_btf_find_by_name_kind_proto; 6427 case BPF_FUNC_sys_close: 6428 return &bpf_sys_close_proto; 6429 case BPF_FUNC_kallsyms_lookup_name: 6430 return &bpf_kallsyms_lookup_name_proto; 6431 default: 6432 return tracing_prog_func_proto(func_id, prog); 6433 } 6434 } 6435 6436 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 6437 .get_func_proto = syscall_prog_func_proto, 6438 .is_valid_access = syscall_prog_is_valid_access, 6439 }; 6440 6441 const struct bpf_prog_ops bpf_syscall_prog_ops = { 6442 .test_run = bpf_prog_test_run_syscall, 6443 }; 6444 6445 #ifdef CONFIG_SYSCTL 6446 static int bpf_stats_handler(const struct ctl_table *table, int write, 6447 void *buffer, size_t *lenp, loff_t *ppos) 6448 { 6449 struct static_key *key = (struct static_key *)table->data; 6450 static int saved_val; 6451 int val, ret; 6452 struct ctl_table tmp = { 6453 .data = &val, 6454 .maxlen = sizeof(val), 6455 .mode = table->mode, 6456 .extra1 = SYSCTL_ZERO, 6457 .extra2 = SYSCTL_ONE, 6458 }; 6459 6460 if (write && !capable(CAP_SYS_ADMIN)) 6461 return -EPERM; 6462 6463 mutex_lock(&bpf_stats_enabled_mutex); 6464 val = saved_val; 6465 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6466 if (write && !ret && val != saved_val) { 6467 if (val) 6468 static_key_slow_inc(key); 6469 else 6470 static_key_slow_dec(key); 6471 saved_val = val; 6472 } 6473 mutex_unlock(&bpf_stats_enabled_mutex); 6474 return ret; 6475 } 6476 6477 void __weak unpriv_ebpf_notify(int new_state) 6478 { 6479 } 6480 6481 static int bpf_unpriv_handler(const struct ctl_table *table, int write, 6482 void *buffer, size_t *lenp, loff_t *ppos) 6483 { 6484 int ret, unpriv_enable = *(int *)table->data; 6485 bool locked_state = unpriv_enable == 1; 6486 struct ctl_table tmp = *table; 6487 6488 if (write && !capable(CAP_SYS_ADMIN)) 6489 return -EPERM; 6490 6491 tmp.data = &unpriv_enable; 6492 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6493 if (write && !ret) { 6494 if (locked_state && unpriv_enable != 1) 6495 return -EPERM; 6496 *(int *)table->data = unpriv_enable; 6497 } 6498 6499 if (write) 6500 unpriv_ebpf_notify(unpriv_enable); 6501 6502 return ret; 6503 } 6504 6505 static const struct ctl_table bpf_syscall_table[] = { 6506 { 6507 .procname = "unprivileged_bpf_disabled", 6508 .data = &sysctl_unprivileged_bpf_disabled, 6509 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 6510 .mode = 0644, 6511 .proc_handler = bpf_unpriv_handler, 6512 .extra1 = SYSCTL_ZERO, 6513 .extra2 = SYSCTL_TWO, 6514 }, 6515 { 6516 .procname = "bpf_stats_enabled", 6517 .data = &bpf_stats_enabled_key.key, 6518 .mode = 0644, 6519 .proc_handler = bpf_stats_handler, 6520 }, 6521 }; 6522 6523 static int __init bpf_syscall_sysctl_init(void) 6524 { 6525 register_sysctl_init("kernel", bpf_syscall_table); 6526 return 0; 6527 } 6528 late_initcall(bpf_syscall_sysctl_init); 6529 #endif /* CONFIG_SYSCTL */ 6530