1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <crypto/sha2.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf-cgroup.h> 7 #include <linux/bpf_trace.h> 8 #include <linux/bpf_lirc.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bsearch.h> 11 #include <linux/btf.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched/signal.h> 15 #include <linux/vmalloc.h> 16 #include <linux/mmzone.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/fdtable.h> 19 #include <linux/file.h> 20 #include <linux/fs.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/kernel.h> 24 #include <linux/idr.h> 25 #include <linux/cred.h> 26 #include <linux/timekeeping.h> 27 #include <linux/ctype.h> 28 #include <linux/nospec.h> 29 #include <linux/audit.h> 30 #include <uapi/linux/btf.h> 31 #include <linux/pgtable.h> 32 #include <linux/bpf_lsm.h> 33 #include <linux/poll.h> 34 #include <linux/sort.h> 35 #include <linux/bpf-netns.h> 36 #include <linux/rcupdate_trace.h> 37 #include <linux/memcontrol.h> 38 #include <linux/trace_events.h> 39 #include <linux/tracepoint.h> 40 #include <linux/overflow.h> 41 #include <linux/cookie.h> 42 #include <linux/verification.h> 43 44 #include <net/netfilter/nf_bpf_link.h> 45 #include <net/netkit.h> 46 #include <net/tcx.h> 47 48 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 49 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 50 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 51 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 52 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 53 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 54 IS_FD_HASH(map)) 55 56 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 57 58 DEFINE_PER_CPU(int, bpf_prog_active); 59 DEFINE_COOKIE(bpf_map_cookie); 60 static DEFINE_IDR(prog_idr); 61 static DEFINE_SPINLOCK(prog_idr_lock); 62 static DEFINE_IDR(map_idr); 63 static DEFINE_SPINLOCK(map_idr_lock); 64 static DEFINE_IDR(link_idr); 65 static DEFINE_SPINLOCK(link_idr_lock); 66 67 int sysctl_unprivileged_bpf_disabled __read_mostly = 68 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 69 70 static const struct bpf_map_ops * const bpf_map_types[] = { 71 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 72 #define BPF_MAP_TYPE(_id, _ops) \ 73 [_id] = &_ops, 74 #define BPF_LINK_TYPE(_id, _name) 75 #include <linux/bpf_types.h> 76 #undef BPF_PROG_TYPE 77 #undef BPF_MAP_TYPE 78 #undef BPF_LINK_TYPE 79 }; 80 81 /* 82 * If we're handed a bigger struct than we know of, ensure all the unknown bits 83 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 84 * we don't know about yet. 85 * 86 * There is a ToCToU between this function call and the following 87 * copy_from_user() call. However, this is not a concern since this function is 88 * meant to be a future-proofing of bits. 89 */ 90 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 91 size_t expected_size, 92 size_t actual_size) 93 { 94 int res; 95 96 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 97 return -E2BIG; 98 99 if (actual_size <= expected_size) 100 return 0; 101 102 if (uaddr.is_kernel) 103 res = memchr_inv(uaddr.kernel + expected_size, 0, 104 actual_size - expected_size) == NULL; 105 else 106 res = check_zeroed_user(uaddr.user + expected_size, 107 actual_size - expected_size); 108 if (res < 0) 109 return res; 110 return res ? 0 : -E2BIG; 111 } 112 113 const struct bpf_map_ops bpf_map_offload_ops = { 114 .map_meta_equal = bpf_map_meta_equal, 115 .map_alloc = bpf_map_offload_map_alloc, 116 .map_free = bpf_map_offload_map_free, 117 .map_check_btf = map_check_no_btf, 118 .map_mem_usage = bpf_map_offload_map_mem_usage, 119 }; 120 121 static void bpf_map_write_active_inc(struct bpf_map *map) 122 { 123 atomic64_inc(&map->writecnt); 124 } 125 126 static void bpf_map_write_active_dec(struct bpf_map *map) 127 { 128 atomic64_dec(&map->writecnt); 129 } 130 131 bool bpf_map_write_active(const struct bpf_map *map) 132 { 133 return atomic64_read(&map->writecnt) != 0; 134 } 135 136 static u32 bpf_map_value_size(const struct bpf_map *map) 137 { 138 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 139 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 140 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 141 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 142 return round_up(map->value_size, 8) * num_possible_cpus(); 143 else if (IS_FD_MAP(map)) 144 return sizeof(u32); 145 else 146 return map->value_size; 147 } 148 149 static void maybe_wait_bpf_programs(struct bpf_map *map) 150 { 151 /* Wait for any running non-sleepable BPF programs to complete so that 152 * userspace, when we return to it, knows that all non-sleepable 153 * programs that could be running use the new map value. For sleepable 154 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait 155 * for the completions of these programs, but considering the waiting 156 * time can be very long and userspace may think it will hang forever, 157 * so don't handle sleepable BPF programs now. 158 */ 159 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 160 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 161 synchronize_rcu_expedited(); 162 } 163 164 static void unpin_uptr_kaddr(void *kaddr) 165 { 166 if (kaddr) 167 unpin_user_page(virt_to_page(kaddr)); 168 } 169 170 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) 171 { 172 const struct btf_field *field; 173 void **uptr_addr; 174 int i; 175 176 for (i = 0, field = rec->fields; i < cnt; i++, field++) { 177 if (field->type != BPF_UPTR) 178 continue; 179 180 uptr_addr = obj + field->offset; 181 unpin_uptr_kaddr(*uptr_addr); 182 } 183 } 184 185 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) 186 { 187 if (!btf_record_has_field(rec, BPF_UPTR)) 188 return; 189 190 __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); 191 } 192 193 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) 194 { 195 const struct btf_field *field; 196 const struct btf_type *t; 197 unsigned long start, end; 198 struct page *page; 199 void **uptr_addr; 200 int i, err; 201 202 if (!btf_record_has_field(rec, BPF_UPTR)) 203 return 0; 204 205 for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { 206 if (field->type != BPF_UPTR) 207 continue; 208 209 uptr_addr = obj + field->offset; 210 start = *(unsigned long *)uptr_addr; 211 if (!start) 212 continue; 213 214 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 215 /* t->size was checked for zero before */ 216 if (check_add_overflow(start, t->size - 1, &end)) { 217 err = -EFAULT; 218 goto unpin_all; 219 } 220 221 /* The uptr's struct cannot span across two pages */ 222 if ((start & PAGE_MASK) != (end & PAGE_MASK)) { 223 err = -EOPNOTSUPP; 224 goto unpin_all; 225 } 226 227 err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); 228 if (err != 1) 229 goto unpin_all; 230 231 if (PageHighMem(page)) { 232 err = -EOPNOTSUPP; 233 unpin_user_page(page); 234 goto unpin_all; 235 } 236 237 *uptr_addr = page_address(page) + offset_in_page(start); 238 } 239 240 return 0; 241 242 unpin_all: 243 __bpf_obj_unpin_uptrs(rec, i, obj); 244 return err; 245 } 246 247 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 248 void *key, void *value, __u64 flags) 249 { 250 int err; 251 252 /* Need to create a kthread, thus must support schedule */ 253 if (bpf_map_is_offloaded(map)) { 254 return bpf_map_offload_update_elem(map, key, value, flags); 255 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 256 map->map_type == BPF_MAP_TYPE_ARENA || 257 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 258 return map->ops->map_update_elem(map, key, value, flags); 259 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 260 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 261 return sock_map_update_elem_sys(map, key, value, flags); 262 } else if (IS_FD_PROG_ARRAY(map)) { 263 return bpf_fd_array_map_update_elem(map, map_file, key, value, 264 flags); 265 } 266 267 bpf_disable_instrumentation(); 268 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 269 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 270 err = bpf_percpu_hash_update(map, key, value, flags); 271 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 272 err = bpf_percpu_array_update(map, key, value, flags); 273 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 274 err = bpf_percpu_cgroup_storage_update(map, key, value, 275 flags); 276 } else if (IS_FD_ARRAY(map)) { 277 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 278 flags); 279 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 280 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 281 flags); 282 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 283 /* rcu_read_lock() is not needed */ 284 err = bpf_fd_reuseport_array_update_elem(map, key, value, 285 flags); 286 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 287 map->map_type == BPF_MAP_TYPE_STACK || 288 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 289 err = map->ops->map_push_elem(map, value, flags); 290 } else { 291 err = bpf_obj_pin_uptrs(map->record, value); 292 if (!err) { 293 rcu_read_lock(); 294 err = map->ops->map_update_elem(map, key, value, flags); 295 rcu_read_unlock(); 296 if (err) 297 bpf_obj_unpin_uptrs(map->record, value); 298 } 299 } 300 bpf_enable_instrumentation(); 301 302 return err; 303 } 304 305 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 306 __u64 flags) 307 { 308 void *ptr; 309 int err; 310 311 if (bpf_map_is_offloaded(map)) 312 return bpf_map_offload_lookup_elem(map, key, value); 313 314 bpf_disable_instrumentation(); 315 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 316 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 317 err = bpf_percpu_hash_copy(map, key, value); 318 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 319 err = bpf_percpu_array_copy(map, key, value); 320 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 321 err = bpf_percpu_cgroup_storage_copy(map, key, value); 322 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 323 err = bpf_stackmap_extract(map, key, value, false); 324 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 325 err = bpf_fd_array_map_lookup_elem(map, key, value); 326 } else if (IS_FD_HASH(map)) { 327 err = bpf_fd_htab_map_lookup_elem(map, key, value); 328 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 329 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 330 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 331 map->map_type == BPF_MAP_TYPE_STACK || 332 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 333 err = map->ops->map_peek_elem(map, value); 334 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 335 /* struct_ops map requires directly updating "value" */ 336 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 337 } else { 338 rcu_read_lock(); 339 if (map->ops->map_lookup_elem_sys_only) 340 ptr = map->ops->map_lookup_elem_sys_only(map, key); 341 else 342 ptr = map->ops->map_lookup_elem(map, key); 343 if (IS_ERR(ptr)) { 344 err = PTR_ERR(ptr); 345 } else if (!ptr) { 346 err = -ENOENT; 347 } else { 348 err = 0; 349 if (flags & BPF_F_LOCK) 350 /* lock 'ptr' and copy everything but lock */ 351 copy_map_value_locked(map, value, ptr, true); 352 else 353 copy_map_value(map, value, ptr); 354 /* mask lock and timer, since value wasn't zero inited */ 355 check_and_init_map_value(map, value); 356 } 357 rcu_read_unlock(); 358 } 359 360 bpf_enable_instrumentation(); 361 362 return err; 363 } 364 365 /* Please, do not use this function outside from the map creation path 366 * (e.g. in map update path) without taking care of setting the active 367 * memory cgroup (see at bpf_map_kmalloc_node() for example). 368 */ 369 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 370 { 371 /* We really just want to fail instead of triggering OOM killer 372 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 373 * which is used for lower order allocation requests. 374 * 375 * It has been observed that higher order allocation requests done by 376 * vmalloc with __GFP_NORETRY being set might fail due to not trying 377 * to reclaim memory from the page cache, thus we set 378 * __GFP_RETRY_MAYFAIL to avoid such situations. 379 */ 380 381 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 382 unsigned int flags = 0; 383 unsigned long align = 1; 384 void *area; 385 386 if (size >= SIZE_MAX) 387 return NULL; 388 389 /* kmalloc()'ed memory can't be mmap()'ed */ 390 if (mmapable) { 391 BUG_ON(!PAGE_ALIGNED(size)); 392 align = SHMLBA; 393 flags = VM_USERMAP; 394 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 395 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 396 numa_node); 397 if (area != NULL) 398 return area; 399 } 400 401 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 402 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 403 flags, numa_node, __builtin_return_address(0)); 404 } 405 406 void *bpf_map_area_alloc(u64 size, int numa_node) 407 { 408 return __bpf_map_area_alloc(size, numa_node, false); 409 } 410 411 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 412 { 413 return __bpf_map_area_alloc(size, numa_node, true); 414 } 415 416 void bpf_map_area_free(void *area) 417 { 418 kvfree(area); 419 } 420 421 static u32 bpf_map_flags_retain_permanent(u32 flags) 422 { 423 /* Some map creation flags are not tied to the map object but 424 * rather to the map fd instead, so they have no meaning upon 425 * map object inspection since multiple file descriptors with 426 * different (access) properties can exist here. Thus, given 427 * this has zero meaning for the map itself, lets clear these 428 * from here. 429 */ 430 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 431 } 432 433 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 434 { 435 map->map_type = attr->map_type; 436 map->key_size = attr->key_size; 437 map->value_size = attr->value_size; 438 map->max_entries = attr->max_entries; 439 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 440 map->numa_node = bpf_map_attr_numa_node(attr); 441 map->map_extra = attr->map_extra; 442 } 443 444 static int bpf_map_alloc_id(struct bpf_map *map) 445 { 446 int id; 447 448 idr_preload(GFP_KERNEL); 449 spin_lock_bh(&map_idr_lock); 450 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 451 if (id > 0) 452 map->id = id; 453 spin_unlock_bh(&map_idr_lock); 454 idr_preload_end(); 455 456 if (WARN_ON_ONCE(!id)) 457 return -ENOSPC; 458 459 return id > 0 ? 0 : id; 460 } 461 462 void bpf_map_free_id(struct bpf_map *map) 463 { 464 unsigned long flags; 465 466 /* Offloaded maps are removed from the IDR store when their device 467 * disappears - even if someone holds an fd to them they are unusable, 468 * the memory is gone, all ops will fail; they are simply waiting for 469 * refcnt to drop to be freed. 470 */ 471 if (!map->id) 472 return; 473 474 spin_lock_irqsave(&map_idr_lock, flags); 475 476 idr_remove(&map_idr, map->id); 477 map->id = 0; 478 479 spin_unlock_irqrestore(&map_idr_lock, flags); 480 } 481 482 #ifdef CONFIG_MEMCG 483 static void bpf_map_save_memcg(struct bpf_map *map) 484 { 485 /* Currently if a map is created by a process belonging to the root 486 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 487 * So we have to check map->objcg for being NULL each time it's 488 * being used. 489 */ 490 if (memcg_bpf_enabled()) 491 map->objcg = get_obj_cgroup_from_current(); 492 } 493 494 static void bpf_map_release_memcg(struct bpf_map *map) 495 { 496 if (map->objcg) 497 obj_cgroup_put(map->objcg); 498 } 499 500 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 501 { 502 if (map->objcg) 503 return get_mem_cgroup_from_objcg(map->objcg); 504 505 return root_mem_cgroup; 506 } 507 508 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 509 int node) 510 { 511 struct mem_cgroup *memcg, *old_memcg; 512 void *ptr; 513 514 memcg = bpf_map_get_memcg(map); 515 old_memcg = set_active_memcg(memcg); 516 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 517 set_active_memcg(old_memcg); 518 mem_cgroup_put(memcg); 519 520 return ptr; 521 } 522 523 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 524 int node) 525 { 526 struct mem_cgroup *memcg, *old_memcg; 527 void *ptr; 528 529 memcg = bpf_map_get_memcg(map); 530 old_memcg = set_active_memcg(memcg); 531 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 532 set_active_memcg(old_memcg); 533 mem_cgroup_put(memcg); 534 535 return ptr; 536 } 537 538 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 539 { 540 struct mem_cgroup *memcg, *old_memcg; 541 void *ptr; 542 543 memcg = bpf_map_get_memcg(map); 544 old_memcg = set_active_memcg(memcg); 545 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 546 set_active_memcg(old_memcg); 547 mem_cgroup_put(memcg); 548 549 return ptr; 550 } 551 552 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 553 gfp_t flags) 554 { 555 struct mem_cgroup *memcg, *old_memcg; 556 void *ptr; 557 558 memcg = bpf_map_get_memcg(map); 559 old_memcg = set_active_memcg(memcg); 560 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 561 set_active_memcg(old_memcg); 562 mem_cgroup_put(memcg); 563 564 return ptr; 565 } 566 567 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 568 size_t align, gfp_t flags) 569 { 570 struct mem_cgroup *memcg, *old_memcg; 571 void __percpu *ptr; 572 573 memcg = bpf_map_get_memcg(map); 574 old_memcg = set_active_memcg(memcg); 575 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 576 set_active_memcg(old_memcg); 577 mem_cgroup_put(memcg); 578 579 return ptr; 580 } 581 582 #else 583 static void bpf_map_save_memcg(struct bpf_map *map) 584 { 585 } 586 587 static void bpf_map_release_memcg(struct bpf_map *map) 588 { 589 } 590 #endif 591 592 static bool can_alloc_pages(void) 593 { 594 return preempt_count() == 0 && !irqs_disabled() && 595 !IS_ENABLED(CONFIG_PREEMPT_RT); 596 } 597 598 static struct page *__bpf_alloc_page(int nid) 599 { 600 if (!can_alloc_pages()) 601 return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); 602 603 return alloc_pages_node(nid, 604 GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT 605 | __GFP_NOWARN, 606 0); 607 } 608 609 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 610 unsigned long nr_pages, struct page **pages) 611 { 612 unsigned long i, j; 613 struct page *pg; 614 int ret = 0; 615 #ifdef CONFIG_MEMCG 616 struct mem_cgroup *memcg, *old_memcg; 617 618 memcg = bpf_map_get_memcg(map); 619 old_memcg = set_active_memcg(memcg); 620 #endif 621 for (i = 0; i < nr_pages; i++) { 622 pg = __bpf_alloc_page(nid); 623 624 if (pg) { 625 pages[i] = pg; 626 continue; 627 } 628 for (j = 0; j < i; j++) 629 free_pages_nolock(pages[j], 0); 630 ret = -ENOMEM; 631 break; 632 } 633 634 #ifdef CONFIG_MEMCG 635 set_active_memcg(old_memcg); 636 mem_cgroup_put(memcg); 637 #endif 638 return ret; 639 } 640 641 642 static int btf_field_cmp(const void *a, const void *b) 643 { 644 const struct btf_field *f1 = a, *f2 = b; 645 646 if (f1->offset < f2->offset) 647 return -1; 648 else if (f1->offset > f2->offset) 649 return 1; 650 return 0; 651 } 652 653 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 654 u32 field_mask) 655 { 656 struct btf_field *field; 657 658 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 659 return NULL; 660 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 661 if (!field || !(field->type & field_mask)) 662 return NULL; 663 return field; 664 } 665 666 void btf_record_free(struct btf_record *rec) 667 { 668 int i; 669 670 if (IS_ERR_OR_NULL(rec)) 671 return; 672 for (i = 0; i < rec->cnt; i++) { 673 switch (rec->fields[i].type) { 674 case BPF_KPTR_UNREF: 675 case BPF_KPTR_REF: 676 case BPF_KPTR_PERCPU: 677 case BPF_UPTR: 678 if (rec->fields[i].kptr.module) 679 module_put(rec->fields[i].kptr.module); 680 if (btf_is_kernel(rec->fields[i].kptr.btf)) 681 btf_put(rec->fields[i].kptr.btf); 682 break; 683 case BPF_LIST_HEAD: 684 case BPF_LIST_NODE: 685 case BPF_RB_ROOT: 686 case BPF_RB_NODE: 687 case BPF_SPIN_LOCK: 688 case BPF_RES_SPIN_LOCK: 689 case BPF_TIMER: 690 case BPF_REFCOUNT: 691 case BPF_WORKQUEUE: 692 case BPF_TASK_WORK: 693 /* Nothing to release */ 694 break; 695 default: 696 WARN_ON_ONCE(1); 697 continue; 698 } 699 } 700 kfree(rec); 701 } 702 703 void bpf_map_free_record(struct bpf_map *map) 704 { 705 btf_record_free(map->record); 706 map->record = NULL; 707 } 708 709 struct btf_record *btf_record_dup(const struct btf_record *rec) 710 { 711 const struct btf_field *fields; 712 struct btf_record *new_rec; 713 int ret, size, i; 714 715 if (IS_ERR_OR_NULL(rec)) 716 return NULL; 717 size = struct_size(rec, fields, rec->cnt); 718 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 719 if (!new_rec) 720 return ERR_PTR(-ENOMEM); 721 /* Do a deep copy of the btf_record */ 722 fields = rec->fields; 723 new_rec->cnt = 0; 724 for (i = 0; i < rec->cnt; i++) { 725 switch (fields[i].type) { 726 case BPF_KPTR_UNREF: 727 case BPF_KPTR_REF: 728 case BPF_KPTR_PERCPU: 729 case BPF_UPTR: 730 if (btf_is_kernel(fields[i].kptr.btf)) 731 btf_get(fields[i].kptr.btf); 732 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 733 ret = -ENXIO; 734 goto free; 735 } 736 break; 737 case BPF_LIST_HEAD: 738 case BPF_LIST_NODE: 739 case BPF_RB_ROOT: 740 case BPF_RB_NODE: 741 case BPF_SPIN_LOCK: 742 case BPF_RES_SPIN_LOCK: 743 case BPF_TIMER: 744 case BPF_REFCOUNT: 745 case BPF_WORKQUEUE: 746 case BPF_TASK_WORK: 747 /* Nothing to acquire */ 748 break; 749 default: 750 ret = -EFAULT; 751 WARN_ON_ONCE(1); 752 goto free; 753 } 754 new_rec->cnt++; 755 } 756 return new_rec; 757 free: 758 btf_record_free(new_rec); 759 return ERR_PTR(ret); 760 } 761 762 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 763 { 764 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 765 int size; 766 767 if (!a_has_fields && !b_has_fields) 768 return true; 769 if (a_has_fields != b_has_fields) 770 return false; 771 if (rec_a->cnt != rec_b->cnt) 772 return false; 773 size = struct_size(rec_a, fields, rec_a->cnt); 774 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 775 * members are zeroed out. So memcmp is safe to do without worrying 776 * about padding/unused fields. 777 * 778 * While spin_lock, timer, and kptr have no relation to map BTF, 779 * list_head metadata is specific to map BTF, the btf and value_rec 780 * members in particular. btf is the map BTF, while value_rec points to 781 * btf_record in that map BTF. 782 * 783 * So while by default, we don't rely on the map BTF (which the records 784 * were parsed from) matching for both records, which is not backwards 785 * compatible, in case list_head is part of it, we implicitly rely on 786 * that by way of depending on memcmp succeeding for it. 787 */ 788 return !memcmp(rec_a, rec_b, size); 789 } 790 791 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 792 { 793 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 794 return; 795 bpf_timer_cancel_and_free(obj + rec->timer_off); 796 } 797 798 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) 799 { 800 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) 801 return; 802 bpf_wq_cancel_and_free(obj + rec->wq_off); 803 } 804 805 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) 806 { 807 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) 808 return; 809 bpf_task_work_cancel_and_free(obj + rec->task_work_off); 810 } 811 812 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 813 { 814 const struct btf_field *fields; 815 int i; 816 817 if (IS_ERR_OR_NULL(rec)) 818 return; 819 fields = rec->fields; 820 for (i = 0; i < rec->cnt; i++) { 821 struct btf_struct_meta *pointee_struct_meta; 822 const struct btf_field *field = &fields[i]; 823 void *field_ptr = obj + field->offset; 824 void *xchgd_field; 825 826 switch (fields[i].type) { 827 case BPF_SPIN_LOCK: 828 case BPF_RES_SPIN_LOCK: 829 break; 830 case BPF_TIMER: 831 bpf_timer_cancel_and_free(field_ptr); 832 break; 833 case BPF_WORKQUEUE: 834 bpf_wq_cancel_and_free(field_ptr); 835 break; 836 case BPF_TASK_WORK: 837 bpf_task_work_cancel_and_free(field_ptr); 838 break; 839 case BPF_KPTR_UNREF: 840 WRITE_ONCE(*(u64 *)field_ptr, 0); 841 break; 842 case BPF_KPTR_REF: 843 case BPF_KPTR_PERCPU: 844 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 845 if (!xchgd_field) 846 break; 847 848 if (!btf_is_kernel(field->kptr.btf)) { 849 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 850 field->kptr.btf_id); 851 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 852 pointee_struct_meta->record : NULL, 853 fields[i].type == BPF_KPTR_PERCPU); 854 } else { 855 field->kptr.dtor(xchgd_field); 856 } 857 break; 858 case BPF_UPTR: 859 /* The caller ensured that no one is using the uptr */ 860 unpin_uptr_kaddr(*(void **)field_ptr); 861 break; 862 case BPF_LIST_HEAD: 863 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 864 continue; 865 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 866 break; 867 case BPF_RB_ROOT: 868 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 869 continue; 870 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 871 break; 872 case BPF_LIST_NODE: 873 case BPF_RB_NODE: 874 case BPF_REFCOUNT: 875 break; 876 default: 877 WARN_ON_ONCE(1); 878 continue; 879 } 880 } 881 } 882 883 static void bpf_map_free(struct bpf_map *map) 884 { 885 struct btf_record *rec = map->record; 886 struct btf *btf = map->btf; 887 888 /* implementation dependent freeing. Disabling migration to simplify 889 * the free of values or special fields allocated from bpf memory 890 * allocator. 891 */ 892 kfree(map->excl_prog_sha); 893 migrate_disable(); 894 map->ops->map_free(map); 895 migrate_enable(); 896 897 /* Delay freeing of btf_record for maps, as map_free 898 * callback usually needs access to them. It is better to do it here 899 * than require each callback to do the free itself manually. 900 * 901 * Note that the btf_record stashed in map->inner_map_meta->record was 902 * already freed using the map_free callback for map in map case which 903 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 904 * template bpf_map struct used during verification. 905 */ 906 btf_record_free(rec); 907 /* Delay freeing of btf for maps, as map_free callback may need 908 * struct_meta info which will be freed with btf_put(). 909 */ 910 btf_put(btf); 911 } 912 913 /* called from workqueue */ 914 static void bpf_map_free_deferred(struct work_struct *work) 915 { 916 struct bpf_map *map = container_of(work, struct bpf_map, work); 917 918 security_bpf_map_free(map); 919 bpf_map_release_memcg(map); 920 bpf_map_owner_free(map); 921 bpf_map_free(map); 922 } 923 924 static void bpf_map_put_uref(struct bpf_map *map) 925 { 926 if (atomic64_dec_and_test(&map->usercnt)) { 927 if (map->ops->map_release_uref) 928 map->ops->map_release_uref(map); 929 } 930 } 931 932 static void bpf_map_free_in_work(struct bpf_map *map) 933 { 934 INIT_WORK(&map->work, bpf_map_free_deferred); 935 /* Avoid spawning kworkers, since they all might contend 936 * for the same mutex like slab_mutex. 937 */ 938 queue_work(system_dfl_wq, &map->work); 939 } 940 941 static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 942 { 943 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 944 } 945 946 static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 947 { 948 if (rcu_trace_implies_rcu_gp()) 949 bpf_map_free_rcu_gp(rcu); 950 else 951 call_rcu(rcu, bpf_map_free_rcu_gp); 952 } 953 954 /* decrement map refcnt and schedule it for freeing via workqueue 955 * (underlying map implementation ops->map_free() might sleep) 956 */ 957 void bpf_map_put(struct bpf_map *map) 958 { 959 if (atomic64_dec_and_test(&map->refcnt)) { 960 /* bpf_map_free_id() must be called first */ 961 bpf_map_free_id(map); 962 963 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 964 if (READ_ONCE(map->free_after_mult_rcu_gp)) 965 call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 966 else if (READ_ONCE(map->free_after_rcu_gp)) 967 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 968 else 969 bpf_map_free_in_work(map); 970 } 971 } 972 EXPORT_SYMBOL_GPL(bpf_map_put); 973 974 void bpf_map_put_with_uref(struct bpf_map *map) 975 { 976 bpf_map_put_uref(map); 977 bpf_map_put(map); 978 } 979 980 static int bpf_map_release(struct inode *inode, struct file *filp) 981 { 982 struct bpf_map *map = filp->private_data; 983 984 if (map->ops->map_release) 985 map->ops->map_release(map, filp); 986 987 bpf_map_put_with_uref(map); 988 return 0; 989 } 990 991 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 992 { 993 fmode_t mode = fd_file(f)->f_mode; 994 995 /* Our file permissions may have been overridden by global 996 * map permissions facing syscall side. 997 */ 998 if (READ_ONCE(map->frozen)) 999 mode &= ~FMODE_CAN_WRITE; 1000 return mode; 1001 } 1002 1003 #ifdef CONFIG_PROC_FS 1004 /* Show the memory usage of a bpf map */ 1005 static u64 bpf_map_memory_usage(const struct bpf_map *map) 1006 { 1007 return map->ops->map_mem_usage(map); 1008 } 1009 1010 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 1011 { 1012 struct bpf_map *map = filp->private_data; 1013 u32 type = 0, jited = 0; 1014 1015 spin_lock(&map->owner_lock); 1016 if (map->owner) { 1017 type = map->owner->type; 1018 jited = map->owner->jited; 1019 } 1020 spin_unlock(&map->owner_lock); 1021 1022 seq_printf(m, 1023 "map_type:\t%u\n" 1024 "key_size:\t%u\n" 1025 "value_size:\t%u\n" 1026 "max_entries:\t%u\n" 1027 "map_flags:\t%#x\n" 1028 "map_extra:\t%#llx\n" 1029 "memlock:\t%llu\n" 1030 "map_id:\t%u\n" 1031 "frozen:\t%u\n", 1032 map->map_type, 1033 map->key_size, 1034 map->value_size, 1035 map->max_entries, 1036 map->map_flags, 1037 (unsigned long long)map->map_extra, 1038 bpf_map_memory_usage(map), 1039 map->id, 1040 READ_ONCE(map->frozen)); 1041 if (type) { 1042 seq_printf(m, "owner_prog_type:\t%u\n", type); 1043 seq_printf(m, "owner_jited:\t%u\n", jited); 1044 } 1045 } 1046 #endif 1047 1048 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 1049 loff_t *ppos) 1050 { 1051 /* We need this handler such that alloc_file() enables 1052 * f_mode with FMODE_CAN_READ. 1053 */ 1054 return -EINVAL; 1055 } 1056 1057 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 1058 size_t siz, loff_t *ppos) 1059 { 1060 /* We need this handler such that alloc_file() enables 1061 * f_mode with FMODE_CAN_WRITE. 1062 */ 1063 return -EINVAL; 1064 } 1065 1066 /* called for any extra memory-mapped regions (except initial) */ 1067 static void bpf_map_mmap_open(struct vm_area_struct *vma) 1068 { 1069 struct bpf_map *map = vma->vm_file->private_data; 1070 1071 if (vma->vm_flags & VM_MAYWRITE) 1072 bpf_map_write_active_inc(map); 1073 } 1074 1075 /* called for all unmapped memory region (including initial) */ 1076 static void bpf_map_mmap_close(struct vm_area_struct *vma) 1077 { 1078 struct bpf_map *map = vma->vm_file->private_data; 1079 1080 if (vma->vm_flags & VM_MAYWRITE) 1081 bpf_map_write_active_dec(map); 1082 } 1083 1084 static const struct vm_operations_struct bpf_map_default_vmops = { 1085 .open = bpf_map_mmap_open, 1086 .close = bpf_map_mmap_close, 1087 }; 1088 1089 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 1090 { 1091 struct bpf_map *map = filp->private_data; 1092 int err = 0; 1093 1094 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 1095 return -ENOTSUPP; 1096 1097 if (!(vma->vm_flags & VM_SHARED)) 1098 return -EINVAL; 1099 1100 mutex_lock(&map->freeze_mutex); 1101 1102 if (vma->vm_flags & VM_WRITE) { 1103 if (map->frozen) { 1104 err = -EPERM; 1105 goto out; 1106 } 1107 /* map is meant to be read-only, so do not allow mapping as 1108 * writable, because it's possible to leak a writable page 1109 * reference and allows user-space to still modify it after 1110 * freezing, while verifier will assume contents do not change 1111 */ 1112 if (map->map_flags & BPF_F_RDONLY_PROG) { 1113 err = -EACCES; 1114 goto out; 1115 } 1116 bpf_map_write_active_inc(map); 1117 } 1118 out: 1119 mutex_unlock(&map->freeze_mutex); 1120 if (err) 1121 return err; 1122 1123 /* set default open/close callbacks */ 1124 vma->vm_ops = &bpf_map_default_vmops; 1125 vma->vm_private_data = map; 1126 vm_flags_clear(vma, VM_MAYEXEC); 1127 /* If mapping is read-only, then disallow potentially re-mapping with 1128 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing 1129 * means that as far as BPF map's memory-mapped VMAs are concerned, 1130 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, 1131 * both should be set, so we can forget about VM_MAYWRITE and always 1132 * check just VM_WRITE 1133 */ 1134 if (!(vma->vm_flags & VM_WRITE)) 1135 vm_flags_clear(vma, VM_MAYWRITE); 1136 1137 err = map->ops->map_mmap(map, vma); 1138 if (err) { 1139 if (vma->vm_flags & VM_WRITE) 1140 bpf_map_write_active_dec(map); 1141 } 1142 1143 return err; 1144 } 1145 1146 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 1147 { 1148 struct bpf_map *map = filp->private_data; 1149 1150 if (map->ops->map_poll) 1151 return map->ops->map_poll(map, filp, pts); 1152 1153 return EPOLLERR; 1154 } 1155 1156 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, 1157 unsigned long len, unsigned long pgoff, 1158 unsigned long flags) 1159 { 1160 struct bpf_map *map = filp->private_data; 1161 1162 if (map->ops->map_get_unmapped_area) 1163 return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); 1164 #ifdef CONFIG_MMU 1165 return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags); 1166 #else 1167 return addr; 1168 #endif 1169 } 1170 1171 const struct file_operations bpf_map_fops = { 1172 #ifdef CONFIG_PROC_FS 1173 .show_fdinfo = bpf_map_show_fdinfo, 1174 #endif 1175 .release = bpf_map_release, 1176 .read = bpf_dummy_read, 1177 .write = bpf_dummy_write, 1178 .mmap = bpf_map_mmap, 1179 .poll = bpf_map_poll, 1180 .get_unmapped_area = bpf_get_unmapped_area, 1181 }; 1182 1183 int bpf_map_new_fd(struct bpf_map *map, int flags) 1184 { 1185 int ret; 1186 1187 ret = security_bpf_map(map, OPEN_FMODE(flags)); 1188 if (ret < 0) 1189 return ret; 1190 1191 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 1192 flags | O_CLOEXEC); 1193 } 1194 1195 int bpf_get_file_flag(int flags) 1196 { 1197 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 1198 return -EINVAL; 1199 if (flags & BPF_F_RDONLY) 1200 return O_RDONLY; 1201 if (flags & BPF_F_WRONLY) 1202 return O_WRONLY; 1203 return O_RDWR; 1204 } 1205 1206 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 1207 #define CHECK_ATTR(CMD) \ 1208 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 1209 sizeof(attr->CMD##_LAST_FIELD), 0, \ 1210 sizeof(*attr) - \ 1211 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 1212 sizeof(attr->CMD##_LAST_FIELD)) != NULL 1213 1214 /* dst and src must have at least "size" number of bytes. 1215 * Return strlen on success and < 0 on error. 1216 */ 1217 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 1218 { 1219 const char *end = src + size; 1220 const char *orig_src = src; 1221 1222 memset(dst, 0, size); 1223 /* Copy all isalnum(), '_' and '.' chars. */ 1224 while (src < end && *src) { 1225 if (!isalnum(*src) && 1226 *src != '_' && *src != '.') 1227 return -EINVAL; 1228 *dst++ = *src++; 1229 } 1230 1231 /* No '\0' found in "size" number of bytes */ 1232 if (src == end) 1233 return -EINVAL; 1234 1235 return src - orig_src; 1236 } 1237 EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); 1238 1239 int map_check_no_btf(const struct bpf_map *map, 1240 const struct btf *btf, 1241 const struct btf_type *key_type, 1242 const struct btf_type *value_type) 1243 { 1244 return -ENOTSUPP; 1245 } 1246 1247 static int map_check_btf(struct bpf_map *map, struct bpf_token *token, 1248 const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 1249 { 1250 const struct btf_type *key_type, *value_type; 1251 u32 key_size, value_size; 1252 int ret = 0; 1253 1254 /* Some maps allow key to be unspecified. */ 1255 if (btf_key_id) { 1256 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1257 if (!key_type || key_size != map->key_size) 1258 return -EINVAL; 1259 } else { 1260 key_type = btf_type_by_id(btf, 0); 1261 if (!map->ops->map_check_btf) 1262 return -EINVAL; 1263 } 1264 1265 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1266 if (!value_type || value_size != map->value_size) 1267 return -EINVAL; 1268 1269 map->record = btf_parse_fields(btf, value_type, 1270 BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1271 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | 1272 BPF_TASK_WORK, 1273 map->value_size); 1274 if (!IS_ERR_OR_NULL(map->record)) { 1275 int i; 1276 1277 if (!bpf_token_capable(token, CAP_BPF)) { 1278 ret = -EPERM; 1279 goto free_map_tab; 1280 } 1281 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1282 ret = -EACCES; 1283 goto free_map_tab; 1284 } 1285 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1286 switch (map->record->field_mask & (1 << i)) { 1287 case 0: 1288 continue; 1289 case BPF_SPIN_LOCK: 1290 case BPF_RES_SPIN_LOCK: 1291 if (map->map_type != BPF_MAP_TYPE_HASH && 1292 map->map_type != BPF_MAP_TYPE_ARRAY && 1293 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1294 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1295 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1296 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1297 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1298 ret = -EOPNOTSUPP; 1299 goto free_map_tab; 1300 } 1301 break; 1302 case BPF_TIMER: 1303 case BPF_WORKQUEUE: 1304 case BPF_TASK_WORK: 1305 if (map->map_type != BPF_MAP_TYPE_HASH && 1306 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1307 map->map_type != BPF_MAP_TYPE_ARRAY) { 1308 ret = -EOPNOTSUPP; 1309 goto free_map_tab; 1310 } 1311 break; 1312 case BPF_KPTR_UNREF: 1313 case BPF_KPTR_REF: 1314 case BPF_KPTR_PERCPU: 1315 case BPF_REFCOUNT: 1316 if (map->map_type != BPF_MAP_TYPE_HASH && 1317 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1318 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1319 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1320 map->map_type != BPF_MAP_TYPE_ARRAY && 1321 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1322 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1323 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1324 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1325 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1326 ret = -EOPNOTSUPP; 1327 goto free_map_tab; 1328 } 1329 break; 1330 case BPF_UPTR: 1331 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { 1332 ret = -EOPNOTSUPP; 1333 goto free_map_tab; 1334 } 1335 break; 1336 case BPF_LIST_HEAD: 1337 case BPF_RB_ROOT: 1338 if (map->map_type != BPF_MAP_TYPE_HASH && 1339 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1340 map->map_type != BPF_MAP_TYPE_ARRAY) { 1341 ret = -EOPNOTSUPP; 1342 goto free_map_tab; 1343 } 1344 break; 1345 default: 1346 /* Fail if map_type checks are missing for a field type */ 1347 ret = -EOPNOTSUPP; 1348 goto free_map_tab; 1349 } 1350 } 1351 } 1352 1353 ret = btf_check_and_fixup_fields(btf, map->record); 1354 if (ret < 0) 1355 goto free_map_tab; 1356 1357 if (map->ops->map_check_btf) { 1358 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1359 if (ret < 0) 1360 goto free_map_tab; 1361 } 1362 1363 return ret; 1364 free_map_tab: 1365 bpf_map_free_record(map); 1366 return ret; 1367 } 1368 1369 static bool bpf_net_capable(void) 1370 { 1371 return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); 1372 } 1373 1374 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size 1375 /* called via syscall */ 1376 static int map_create(union bpf_attr *attr, bpfptr_t uattr) 1377 { 1378 const struct bpf_map_ops *ops; 1379 struct bpf_token *token = NULL; 1380 int numa_node = bpf_map_attr_numa_node(attr); 1381 u32 map_type = attr->map_type; 1382 struct bpf_map *map; 1383 bool token_flag; 1384 int f_flags; 1385 int err; 1386 1387 err = CHECK_ATTR(BPF_MAP_CREATE); 1388 if (err) 1389 return -EINVAL; 1390 1391 /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it 1392 * to avoid per-map type checks tripping on unknown flag 1393 */ 1394 token_flag = attr->map_flags & BPF_F_TOKEN_FD; 1395 attr->map_flags &= ~BPF_F_TOKEN_FD; 1396 1397 if (attr->btf_vmlinux_value_type_id) { 1398 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1399 attr->btf_key_type_id || attr->btf_value_type_id) 1400 return -EINVAL; 1401 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1402 return -EINVAL; 1403 } 1404 1405 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1406 attr->map_type != BPF_MAP_TYPE_ARENA && 1407 attr->map_extra != 0) 1408 return -EINVAL; 1409 1410 f_flags = bpf_get_file_flag(attr->map_flags); 1411 if (f_flags < 0) 1412 return f_flags; 1413 1414 if (numa_node != NUMA_NO_NODE && 1415 ((unsigned int)numa_node >= nr_node_ids || 1416 !node_online(numa_node))) 1417 return -EINVAL; 1418 1419 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1420 map_type = attr->map_type; 1421 if (map_type >= ARRAY_SIZE(bpf_map_types)) 1422 return -EINVAL; 1423 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1424 ops = bpf_map_types[map_type]; 1425 if (!ops) 1426 return -EINVAL; 1427 1428 if (ops->map_alloc_check) { 1429 err = ops->map_alloc_check(attr); 1430 if (err) 1431 return err; 1432 } 1433 if (attr->map_ifindex) 1434 ops = &bpf_map_offload_ops; 1435 if (!ops->map_mem_usage) 1436 return -EINVAL; 1437 1438 if (token_flag) { 1439 token = bpf_token_get_from_fd(attr->map_token_fd); 1440 if (IS_ERR(token)) 1441 return PTR_ERR(token); 1442 1443 /* if current token doesn't grant map creation permissions, 1444 * then we can't use this token, so ignore it and rely on 1445 * system-wide capabilities checks 1446 */ 1447 if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || 1448 !bpf_token_allow_map_type(token, attr->map_type)) { 1449 bpf_token_put(token); 1450 token = NULL; 1451 } 1452 } 1453 1454 err = -EPERM; 1455 1456 /* Intent here is for unprivileged_bpf_disabled to block BPF map 1457 * creation for unprivileged users; other actions depend 1458 * on fd availability and access to bpffs, so are dependent on 1459 * object creation success. Even with unprivileged BPF disabled, 1460 * capability checks are still carried out. 1461 */ 1462 if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) 1463 goto put_token; 1464 1465 /* check privileged map type permissions */ 1466 switch (map_type) { 1467 case BPF_MAP_TYPE_ARRAY: 1468 case BPF_MAP_TYPE_PERCPU_ARRAY: 1469 case BPF_MAP_TYPE_PROG_ARRAY: 1470 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1471 case BPF_MAP_TYPE_CGROUP_ARRAY: 1472 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1473 case BPF_MAP_TYPE_HASH: 1474 case BPF_MAP_TYPE_PERCPU_HASH: 1475 case BPF_MAP_TYPE_HASH_OF_MAPS: 1476 case BPF_MAP_TYPE_RINGBUF: 1477 case BPF_MAP_TYPE_USER_RINGBUF: 1478 case BPF_MAP_TYPE_CGROUP_STORAGE: 1479 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1480 /* unprivileged */ 1481 break; 1482 case BPF_MAP_TYPE_SK_STORAGE: 1483 case BPF_MAP_TYPE_INODE_STORAGE: 1484 case BPF_MAP_TYPE_TASK_STORAGE: 1485 case BPF_MAP_TYPE_CGRP_STORAGE: 1486 case BPF_MAP_TYPE_BLOOM_FILTER: 1487 case BPF_MAP_TYPE_LPM_TRIE: 1488 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1489 case BPF_MAP_TYPE_STACK_TRACE: 1490 case BPF_MAP_TYPE_QUEUE: 1491 case BPF_MAP_TYPE_STACK: 1492 case BPF_MAP_TYPE_LRU_HASH: 1493 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1494 case BPF_MAP_TYPE_STRUCT_OPS: 1495 case BPF_MAP_TYPE_CPUMAP: 1496 case BPF_MAP_TYPE_ARENA: 1497 case BPF_MAP_TYPE_INSN_ARRAY: 1498 if (!bpf_token_capable(token, CAP_BPF)) 1499 goto put_token; 1500 break; 1501 case BPF_MAP_TYPE_SOCKMAP: 1502 case BPF_MAP_TYPE_SOCKHASH: 1503 case BPF_MAP_TYPE_DEVMAP: 1504 case BPF_MAP_TYPE_DEVMAP_HASH: 1505 case BPF_MAP_TYPE_XSKMAP: 1506 if (!bpf_token_capable(token, CAP_NET_ADMIN)) 1507 goto put_token; 1508 break; 1509 default: 1510 WARN(1, "unsupported map type %d", map_type); 1511 goto put_token; 1512 } 1513 1514 map = ops->map_alloc(attr); 1515 if (IS_ERR(map)) { 1516 err = PTR_ERR(map); 1517 goto put_token; 1518 } 1519 map->ops = ops; 1520 map->map_type = map_type; 1521 1522 err = bpf_obj_name_cpy(map->name, attr->map_name, 1523 sizeof(attr->map_name)); 1524 if (err < 0) 1525 goto free_map; 1526 1527 preempt_disable(); 1528 map->cookie = gen_cookie_next(&bpf_map_cookie); 1529 preempt_enable(); 1530 1531 atomic64_set(&map->refcnt, 1); 1532 atomic64_set(&map->usercnt, 1); 1533 mutex_init(&map->freeze_mutex); 1534 spin_lock_init(&map->owner_lock); 1535 1536 if (attr->btf_key_type_id || attr->btf_value_type_id || 1537 /* Even the map's value is a kernel's struct, 1538 * the bpf_prog.o must have BTF to begin with 1539 * to figure out the corresponding kernel's 1540 * counter part. Thus, attr->btf_fd has 1541 * to be valid also. 1542 */ 1543 attr->btf_vmlinux_value_type_id) { 1544 struct btf *btf; 1545 1546 btf = btf_get_by_fd(attr->btf_fd); 1547 if (IS_ERR(btf)) { 1548 err = PTR_ERR(btf); 1549 goto free_map; 1550 } 1551 if (btf_is_kernel(btf)) { 1552 btf_put(btf); 1553 err = -EACCES; 1554 goto free_map; 1555 } 1556 map->btf = btf; 1557 1558 if (attr->btf_value_type_id) { 1559 err = map_check_btf(map, token, btf, attr->btf_key_type_id, 1560 attr->btf_value_type_id); 1561 if (err) 1562 goto free_map; 1563 } 1564 1565 map->btf_key_type_id = attr->btf_key_type_id; 1566 map->btf_value_type_id = attr->btf_value_type_id; 1567 map->btf_vmlinux_value_type_id = 1568 attr->btf_vmlinux_value_type_id; 1569 } 1570 1571 if (attr->excl_prog_hash) { 1572 bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); 1573 1574 if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { 1575 err = -EINVAL; 1576 goto free_map; 1577 } 1578 1579 map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 1580 if (!map->excl_prog_sha) { 1581 err = -ENOMEM; 1582 goto free_map; 1583 } 1584 1585 if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { 1586 err = -EFAULT; 1587 goto free_map; 1588 } 1589 } else if (attr->excl_prog_hash_size) { 1590 err = -EINVAL; 1591 goto free_map; 1592 } 1593 1594 err = security_bpf_map_create(map, attr, token, uattr.is_kernel); 1595 if (err) 1596 goto free_map_sec; 1597 1598 err = bpf_map_alloc_id(map); 1599 if (err) 1600 goto free_map_sec; 1601 1602 bpf_map_save_memcg(map); 1603 bpf_token_put(token); 1604 1605 err = bpf_map_new_fd(map, f_flags); 1606 if (err < 0) { 1607 /* failed to allocate fd. 1608 * bpf_map_put_with_uref() is needed because the above 1609 * bpf_map_alloc_id() has published the map 1610 * to the userspace and the userspace may 1611 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1612 */ 1613 bpf_map_put_with_uref(map); 1614 return err; 1615 } 1616 1617 return err; 1618 1619 free_map_sec: 1620 security_bpf_map_free(map); 1621 free_map: 1622 bpf_map_free(map); 1623 put_token: 1624 bpf_token_put(token); 1625 return err; 1626 } 1627 1628 void bpf_map_inc(struct bpf_map *map) 1629 { 1630 atomic64_inc(&map->refcnt); 1631 } 1632 EXPORT_SYMBOL_GPL(bpf_map_inc); 1633 1634 void bpf_map_inc_with_uref(struct bpf_map *map) 1635 { 1636 atomic64_inc(&map->refcnt); 1637 atomic64_inc(&map->usercnt); 1638 } 1639 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1640 1641 struct bpf_map *bpf_map_get(u32 ufd) 1642 { 1643 CLASS(fd, f)(ufd); 1644 struct bpf_map *map = __bpf_map_get(f); 1645 1646 if (!IS_ERR(map)) 1647 bpf_map_inc(map); 1648 1649 return map; 1650 } 1651 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); 1652 1653 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1654 { 1655 CLASS(fd, f)(ufd); 1656 struct bpf_map *map = __bpf_map_get(f); 1657 1658 if (!IS_ERR(map)) 1659 bpf_map_inc_with_uref(map); 1660 1661 return map; 1662 } 1663 1664 /* map_idr_lock should have been held or the map should have been 1665 * protected by rcu read lock. 1666 */ 1667 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1668 { 1669 int refold; 1670 1671 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1672 if (!refold) 1673 return ERR_PTR(-ENOENT); 1674 if (uref) 1675 atomic64_inc(&map->usercnt); 1676 1677 return map; 1678 } 1679 1680 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1681 { 1682 lockdep_assert(rcu_read_lock_held()); 1683 return __bpf_map_inc_not_zero(map, false); 1684 } 1685 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1686 1687 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 1688 bool delete) 1689 { 1690 return -ENOTSUPP; 1691 } 1692 1693 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1694 { 1695 if (key_size) 1696 return vmemdup_user(ukey, key_size); 1697 1698 if (ukey) 1699 return ERR_PTR(-EINVAL); 1700 1701 return NULL; 1702 } 1703 1704 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1705 { 1706 if (key_size) 1707 return kvmemdup_bpfptr(ukey, key_size); 1708 1709 if (!bpfptr_is_null(ukey)) 1710 return ERR_PTR(-EINVAL); 1711 1712 return NULL; 1713 } 1714 1715 /* last field in 'union bpf_attr' used by this command */ 1716 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1717 1718 static int map_lookup_elem(union bpf_attr *attr) 1719 { 1720 void __user *ukey = u64_to_user_ptr(attr->key); 1721 void __user *uvalue = u64_to_user_ptr(attr->value); 1722 struct bpf_map *map; 1723 void *key, *value; 1724 u32 value_size; 1725 int err; 1726 1727 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1728 return -EINVAL; 1729 1730 CLASS(fd, f)(attr->map_fd); 1731 map = __bpf_map_get(f); 1732 if (IS_ERR(map)) 1733 return PTR_ERR(map); 1734 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1735 return -EPERM; 1736 1737 err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK); 1738 if (err) 1739 return err; 1740 1741 key = __bpf_copy_key(ukey, map->key_size); 1742 if (IS_ERR(key)) 1743 return PTR_ERR(key); 1744 1745 value_size = bpf_map_value_size(map); 1746 1747 err = -ENOMEM; 1748 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1749 if (!value) 1750 goto free_key; 1751 1752 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1753 if (copy_from_user(value, uvalue, value_size)) 1754 err = -EFAULT; 1755 else 1756 err = bpf_map_copy_value(map, key, value, attr->flags); 1757 goto free_value; 1758 } 1759 1760 err = bpf_map_copy_value(map, key, value, attr->flags); 1761 if (err) 1762 goto free_value; 1763 1764 err = -EFAULT; 1765 if (copy_to_user(uvalue, value, value_size) != 0) 1766 goto free_value; 1767 1768 err = 0; 1769 1770 free_value: 1771 kvfree(value); 1772 free_key: 1773 kvfree(key); 1774 return err; 1775 } 1776 1777 1778 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1779 1780 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1781 { 1782 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1783 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1784 struct bpf_map *map; 1785 void *key, *value; 1786 u32 value_size; 1787 int err; 1788 1789 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1790 return -EINVAL; 1791 1792 CLASS(fd, f)(attr->map_fd); 1793 map = __bpf_map_get(f); 1794 if (IS_ERR(map)) 1795 return PTR_ERR(map); 1796 bpf_map_write_active_inc(map); 1797 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1798 err = -EPERM; 1799 goto err_put; 1800 } 1801 1802 err = bpf_map_check_op_flags(map, attr->flags, ~0); 1803 if (err) 1804 goto err_put; 1805 1806 key = ___bpf_copy_key(ukey, map->key_size); 1807 if (IS_ERR(key)) { 1808 err = PTR_ERR(key); 1809 goto err_put; 1810 } 1811 1812 value_size = bpf_map_value_size(map); 1813 value = kvmemdup_bpfptr(uvalue, value_size); 1814 if (IS_ERR(value)) { 1815 err = PTR_ERR(value); 1816 goto free_key; 1817 } 1818 1819 err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags); 1820 if (!err) 1821 maybe_wait_bpf_programs(map); 1822 1823 kvfree(value); 1824 free_key: 1825 kvfree(key); 1826 err_put: 1827 bpf_map_write_active_dec(map); 1828 return err; 1829 } 1830 1831 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1832 1833 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1834 { 1835 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1836 struct bpf_map *map; 1837 void *key; 1838 int err; 1839 1840 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1841 return -EINVAL; 1842 1843 CLASS(fd, f)(attr->map_fd); 1844 map = __bpf_map_get(f); 1845 if (IS_ERR(map)) 1846 return PTR_ERR(map); 1847 bpf_map_write_active_inc(map); 1848 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1849 err = -EPERM; 1850 goto err_put; 1851 } 1852 1853 key = ___bpf_copy_key(ukey, map->key_size); 1854 if (IS_ERR(key)) { 1855 err = PTR_ERR(key); 1856 goto err_put; 1857 } 1858 1859 if (bpf_map_is_offloaded(map)) { 1860 err = bpf_map_offload_delete_elem(map, key); 1861 goto out; 1862 } else if (IS_FD_PROG_ARRAY(map) || 1863 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1864 /* These maps require sleepable context */ 1865 err = map->ops->map_delete_elem(map, key); 1866 goto out; 1867 } 1868 1869 bpf_disable_instrumentation(); 1870 rcu_read_lock(); 1871 err = map->ops->map_delete_elem(map, key); 1872 rcu_read_unlock(); 1873 bpf_enable_instrumentation(); 1874 if (!err) 1875 maybe_wait_bpf_programs(map); 1876 out: 1877 kvfree(key); 1878 err_put: 1879 bpf_map_write_active_dec(map); 1880 return err; 1881 } 1882 1883 /* last field in 'union bpf_attr' used by this command */ 1884 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1885 1886 static int map_get_next_key(union bpf_attr *attr) 1887 { 1888 void __user *ukey = u64_to_user_ptr(attr->key); 1889 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1890 struct bpf_map *map; 1891 void *key, *next_key; 1892 int err; 1893 1894 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1895 return -EINVAL; 1896 1897 CLASS(fd, f)(attr->map_fd); 1898 map = __bpf_map_get(f); 1899 if (IS_ERR(map)) 1900 return PTR_ERR(map); 1901 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1902 return -EPERM; 1903 1904 if (ukey) { 1905 key = __bpf_copy_key(ukey, map->key_size); 1906 if (IS_ERR(key)) 1907 return PTR_ERR(key); 1908 } else { 1909 key = NULL; 1910 } 1911 1912 err = -ENOMEM; 1913 next_key = kvmalloc(map->key_size, GFP_USER); 1914 if (!next_key) 1915 goto free_key; 1916 1917 if (bpf_map_is_offloaded(map)) { 1918 err = bpf_map_offload_get_next_key(map, key, next_key); 1919 goto out; 1920 } 1921 1922 rcu_read_lock(); 1923 err = map->ops->map_get_next_key(map, key, next_key); 1924 rcu_read_unlock(); 1925 out: 1926 if (err) 1927 goto free_next_key; 1928 1929 err = -EFAULT; 1930 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1931 goto free_next_key; 1932 1933 err = 0; 1934 1935 free_next_key: 1936 kvfree(next_key); 1937 free_key: 1938 kvfree(key); 1939 return err; 1940 } 1941 1942 int generic_map_delete_batch(struct bpf_map *map, 1943 const union bpf_attr *attr, 1944 union bpf_attr __user *uattr) 1945 { 1946 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1947 u32 cp, max_count; 1948 int err = 0; 1949 void *key; 1950 1951 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1952 return -EINVAL; 1953 1954 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1955 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1956 return -EINVAL; 1957 } 1958 1959 max_count = attr->batch.count; 1960 if (!max_count) 1961 return 0; 1962 1963 if (put_user(0, &uattr->batch.count)) 1964 return -EFAULT; 1965 1966 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1967 if (!key) 1968 return -ENOMEM; 1969 1970 for (cp = 0; cp < max_count; cp++) { 1971 err = -EFAULT; 1972 if (copy_from_user(key, keys + cp * map->key_size, 1973 map->key_size)) 1974 break; 1975 1976 if (bpf_map_is_offloaded(map)) { 1977 err = bpf_map_offload_delete_elem(map, key); 1978 break; 1979 } 1980 1981 bpf_disable_instrumentation(); 1982 rcu_read_lock(); 1983 err = map->ops->map_delete_elem(map, key); 1984 rcu_read_unlock(); 1985 bpf_enable_instrumentation(); 1986 if (err) 1987 break; 1988 cond_resched(); 1989 } 1990 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1991 err = -EFAULT; 1992 1993 kvfree(key); 1994 1995 return err; 1996 } 1997 1998 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 1999 const union bpf_attr *attr, 2000 union bpf_attr __user *uattr) 2001 { 2002 void __user *values = u64_to_user_ptr(attr->batch.values); 2003 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2004 u32 value_size, cp, max_count; 2005 void *key, *value; 2006 int err = 0; 2007 2008 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); 2009 if (err) 2010 return err; 2011 2012 value_size = bpf_map_value_size(map); 2013 2014 max_count = attr->batch.count; 2015 if (!max_count) 2016 return 0; 2017 2018 if (put_user(0, &uattr->batch.count)) 2019 return -EFAULT; 2020 2021 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2022 if (!key) 2023 return -ENOMEM; 2024 2025 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2026 if (!value) { 2027 kvfree(key); 2028 return -ENOMEM; 2029 } 2030 2031 for (cp = 0; cp < max_count; cp++) { 2032 err = -EFAULT; 2033 if (copy_from_user(key, keys + cp * map->key_size, 2034 map->key_size) || 2035 copy_from_user(value, values + cp * value_size, value_size)) 2036 break; 2037 2038 err = bpf_map_update_value(map, map_file, key, value, 2039 attr->batch.elem_flags); 2040 2041 if (err) 2042 break; 2043 cond_resched(); 2044 } 2045 2046 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 2047 err = -EFAULT; 2048 2049 kvfree(value); 2050 kvfree(key); 2051 2052 return err; 2053 } 2054 2055 int generic_map_lookup_batch(struct bpf_map *map, 2056 const union bpf_attr *attr, 2057 union bpf_attr __user *uattr) 2058 { 2059 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 2060 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 2061 void __user *values = u64_to_user_ptr(attr->batch.values); 2062 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2063 void *buf, *buf_prevkey, *prev_key, *key, *value; 2064 u32 value_size, cp, max_count; 2065 int err; 2066 2067 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); 2068 if (err) 2069 return err; 2070 2071 value_size = bpf_map_value_size(map); 2072 2073 max_count = attr->batch.count; 2074 if (!max_count) 2075 return 0; 2076 2077 if (put_user(0, &uattr->batch.count)) 2078 return -EFAULT; 2079 2080 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2081 if (!buf_prevkey) 2082 return -ENOMEM; 2083 2084 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 2085 if (!buf) { 2086 kvfree(buf_prevkey); 2087 return -ENOMEM; 2088 } 2089 2090 err = -EFAULT; 2091 prev_key = NULL; 2092 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 2093 goto free_buf; 2094 key = buf; 2095 value = key + map->key_size; 2096 if (ubatch) 2097 prev_key = buf_prevkey; 2098 2099 for (cp = 0; cp < max_count;) { 2100 rcu_read_lock(); 2101 err = map->ops->map_get_next_key(map, prev_key, key); 2102 rcu_read_unlock(); 2103 if (err) 2104 break; 2105 err = bpf_map_copy_value(map, key, value, 2106 attr->batch.elem_flags); 2107 2108 if (err == -ENOENT) 2109 goto next_key; 2110 2111 if (err) 2112 goto free_buf; 2113 2114 if (copy_to_user(keys + cp * map->key_size, key, 2115 map->key_size)) { 2116 err = -EFAULT; 2117 goto free_buf; 2118 } 2119 if (copy_to_user(values + cp * value_size, value, value_size)) { 2120 err = -EFAULT; 2121 goto free_buf; 2122 } 2123 2124 cp++; 2125 next_key: 2126 if (!prev_key) 2127 prev_key = buf_prevkey; 2128 2129 swap(prev_key, key); 2130 cond_resched(); 2131 } 2132 2133 if (err == -EFAULT) 2134 goto free_buf; 2135 2136 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 2137 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 2138 err = -EFAULT; 2139 2140 free_buf: 2141 kvfree(buf_prevkey); 2142 kvfree(buf); 2143 return err; 2144 } 2145 2146 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 2147 2148 static int map_lookup_and_delete_elem(union bpf_attr *attr) 2149 { 2150 void __user *ukey = u64_to_user_ptr(attr->key); 2151 void __user *uvalue = u64_to_user_ptr(attr->value); 2152 struct bpf_map *map; 2153 void *key, *value; 2154 u32 value_size; 2155 int err; 2156 2157 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 2158 return -EINVAL; 2159 2160 if (attr->flags & ~BPF_F_LOCK) 2161 return -EINVAL; 2162 2163 CLASS(fd, f)(attr->map_fd); 2164 map = __bpf_map_get(f); 2165 if (IS_ERR(map)) 2166 return PTR_ERR(map); 2167 bpf_map_write_active_inc(map); 2168 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 2169 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 2170 err = -EPERM; 2171 goto err_put; 2172 } 2173 2174 if (attr->flags && 2175 (map->map_type == BPF_MAP_TYPE_QUEUE || 2176 map->map_type == BPF_MAP_TYPE_STACK)) { 2177 err = -EINVAL; 2178 goto err_put; 2179 } 2180 2181 if ((attr->flags & BPF_F_LOCK) && 2182 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2183 err = -EINVAL; 2184 goto err_put; 2185 } 2186 2187 key = __bpf_copy_key(ukey, map->key_size); 2188 if (IS_ERR(key)) { 2189 err = PTR_ERR(key); 2190 goto err_put; 2191 } 2192 2193 value_size = bpf_map_value_size(map); 2194 2195 err = -ENOMEM; 2196 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2197 if (!value) 2198 goto free_key; 2199 2200 err = -ENOTSUPP; 2201 if (map->map_type == BPF_MAP_TYPE_QUEUE || 2202 map->map_type == BPF_MAP_TYPE_STACK) { 2203 err = map->ops->map_pop_elem(map, value); 2204 } else if (map->map_type == BPF_MAP_TYPE_HASH || 2205 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2206 map->map_type == BPF_MAP_TYPE_LRU_HASH || 2207 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 2208 map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 2209 if (!bpf_map_is_offloaded(map)) { 2210 bpf_disable_instrumentation(); 2211 rcu_read_lock(); 2212 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 2213 rcu_read_unlock(); 2214 bpf_enable_instrumentation(); 2215 } 2216 } 2217 2218 if (err) 2219 goto free_value; 2220 2221 if (copy_to_user(uvalue, value, value_size) != 0) { 2222 err = -EFAULT; 2223 goto free_value; 2224 } 2225 2226 err = 0; 2227 2228 free_value: 2229 kvfree(value); 2230 free_key: 2231 kvfree(key); 2232 err_put: 2233 bpf_map_write_active_dec(map); 2234 return err; 2235 } 2236 2237 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 2238 2239 static int map_freeze(const union bpf_attr *attr) 2240 { 2241 int err = 0; 2242 struct bpf_map *map; 2243 2244 if (CHECK_ATTR(BPF_MAP_FREEZE)) 2245 return -EINVAL; 2246 2247 CLASS(fd, f)(attr->map_fd); 2248 map = __bpf_map_get(f); 2249 if (IS_ERR(map)) 2250 return PTR_ERR(map); 2251 2252 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) 2253 return -ENOTSUPP; 2254 2255 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) 2256 return -EPERM; 2257 2258 mutex_lock(&map->freeze_mutex); 2259 if (bpf_map_write_active(map)) { 2260 err = -EBUSY; 2261 goto err_put; 2262 } 2263 if (READ_ONCE(map->frozen)) { 2264 err = -EBUSY; 2265 goto err_put; 2266 } 2267 2268 WRITE_ONCE(map->frozen, true); 2269 err_put: 2270 mutex_unlock(&map->freeze_mutex); 2271 return err; 2272 } 2273 2274 static const struct bpf_prog_ops * const bpf_prog_types[] = { 2275 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 2276 [_id] = & _name ## _prog_ops, 2277 #define BPF_MAP_TYPE(_id, _ops) 2278 #define BPF_LINK_TYPE(_id, _name) 2279 #include <linux/bpf_types.h> 2280 #undef BPF_PROG_TYPE 2281 #undef BPF_MAP_TYPE 2282 #undef BPF_LINK_TYPE 2283 }; 2284 2285 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 2286 { 2287 const struct bpf_prog_ops *ops; 2288 2289 if (type >= ARRAY_SIZE(bpf_prog_types)) 2290 return -EINVAL; 2291 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 2292 ops = bpf_prog_types[type]; 2293 if (!ops) 2294 return -EINVAL; 2295 2296 if (!bpf_prog_is_offloaded(prog->aux)) 2297 prog->aux->ops = ops; 2298 else 2299 prog->aux->ops = &bpf_offload_prog_ops; 2300 prog->type = type; 2301 return 0; 2302 } 2303 2304 enum bpf_audit { 2305 BPF_AUDIT_LOAD, 2306 BPF_AUDIT_UNLOAD, 2307 BPF_AUDIT_MAX, 2308 }; 2309 2310 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2311 [BPF_AUDIT_LOAD] = "LOAD", 2312 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2313 }; 2314 2315 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2316 { 2317 struct audit_context *ctx = NULL; 2318 struct audit_buffer *ab; 2319 2320 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2321 return; 2322 if (audit_enabled == AUDIT_OFF) 2323 return; 2324 if (!in_hardirq() && !irqs_disabled()) 2325 ctx = audit_context(); 2326 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2327 if (unlikely(!ab)) 2328 return; 2329 audit_log_format(ab, "prog-id=%u op=%s", 2330 prog->aux->id, bpf_audit_str[op]); 2331 audit_log_end(ab); 2332 } 2333 2334 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2335 { 2336 int id; 2337 2338 idr_preload(GFP_KERNEL); 2339 spin_lock_bh(&prog_idr_lock); 2340 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2341 if (id > 0) 2342 prog->aux->id = id; 2343 spin_unlock_bh(&prog_idr_lock); 2344 idr_preload_end(); 2345 2346 /* id is in [1, INT_MAX) */ 2347 if (WARN_ON_ONCE(!id)) 2348 return -ENOSPC; 2349 2350 return id > 0 ? 0 : id; 2351 } 2352 2353 void bpf_prog_free_id(struct bpf_prog *prog) 2354 { 2355 unsigned long flags; 2356 2357 /* cBPF to eBPF migrations are currently not in the idr store. 2358 * Offloaded programs are removed from the store when their device 2359 * disappears - even if someone grabs an fd to them they are unusable, 2360 * simply waiting for refcnt to drop to be freed. 2361 */ 2362 if (!prog->aux->id) 2363 return; 2364 2365 spin_lock_irqsave(&prog_idr_lock, flags); 2366 idr_remove(&prog_idr, prog->aux->id); 2367 prog->aux->id = 0; 2368 spin_unlock_irqrestore(&prog_idr_lock, flags); 2369 } 2370 2371 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2372 { 2373 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2374 2375 kvfree(aux->func_info); 2376 kfree(aux->func_info_aux); 2377 free_uid(aux->user); 2378 security_bpf_prog_free(aux->prog); 2379 bpf_prog_free(aux->prog); 2380 } 2381 2382 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2383 { 2384 bpf_prog_kallsyms_del_all(prog); 2385 btf_put(prog->aux->btf); 2386 module_put(prog->aux->mod); 2387 kvfree(prog->aux->jited_linfo); 2388 kvfree(prog->aux->linfo); 2389 kfree(prog->aux->kfunc_tab); 2390 kfree(prog->aux->ctx_arg_info); 2391 if (prog->aux->attach_btf) 2392 btf_put(prog->aux->attach_btf); 2393 2394 if (deferred) { 2395 if (prog->sleepable) 2396 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2397 else 2398 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2399 } else { 2400 __bpf_prog_put_rcu(&prog->aux->rcu); 2401 } 2402 } 2403 2404 static void bpf_prog_put_deferred(struct work_struct *work) 2405 { 2406 struct bpf_prog_aux *aux; 2407 struct bpf_prog *prog; 2408 2409 aux = container_of(work, struct bpf_prog_aux, work); 2410 prog = aux->prog; 2411 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2412 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2413 bpf_prog_free_id(prog); 2414 __bpf_prog_put_noref(prog, true); 2415 } 2416 2417 static void __bpf_prog_put(struct bpf_prog *prog) 2418 { 2419 struct bpf_prog_aux *aux = prog->aux; 2420 2421 if (atomic64_dec_and_test(&aux->refcnt)) { 2422 if (in_hardirq() || irqs_disabled()) { 2423 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2424 schedule_work(&aux->work); 2425 } else { 2426 bpf_prog_put_deferred(&aux->work); 2427 } 2428 } 2429 } 2430 2431 void bpf_prog_put(struct bpf_prog *prog) 2432 { 2433 __bpf_prog_put(prog); 2434 } 2435 EXPORT_SYMBOL_GPL(bpf_prog_put); 2436 2437 static int bpf_prog_release(struct inode *inode, struct file *filp) 2438 { 2439 struct bpf_prog *prog = filp->private_data; 2440 2441 bpf_prog_put(prog); 2442 return 0; 2443 } 2444 2445 struct bpf_prog_kstats { 2446 u64 nsecs; 2447 u64 cnt; 2448 u64 misses; 2449 }; 2450 2451 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2452 { 2453 struct bpf_prog_stats *stats; 2454 unsigned int flags; 2455 2456 if (unlikely(!prog->stats)) 2457 return; 2458 2459 stats = this_cpu_ptr(prog->stats); 2460 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2461 u64_stats_inc(&stats->misses); 2462 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2463 } 2464 2465 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2466 struct bpf_prog_kstats *stats) 2467 { 2468 u64 nsecs = 0, cnt = 0, misses = 0; 2469 int cpu; 2470 2471 for_each_possible_cpu(cpu) { 2472 const struct bpf_prog_stats *st; 2473 unsigned int start; 2474 u64 tnsecs, tcnt, tmisses; 2475 2476 st = per_cpu_ptr(prog->stats, cpu); 2477 do { 2478 start = u64_stats_fetch_begin(&st->syncp); 2479 tnsecs = u64_stats_read(&st->nsecs); 2480 tcnt = u64_stats_read(&st->cnt); 2481 tmisses = u64_stats_read(&st->misses); 2482 } while (u64_stats_fetch_retry(&st->syncp, start)); 2483 nsecs += tnsecs; 2484 cnt += tcnt; 2485 misses += tmisses; 2486 } 2487 stats->nsecs = nsecs; 2488 stats->cnt = cnt; 2489 stats->misses = misses; 2490 } 2491 2492 #ifdef CONFIG_PROC_FS 2493 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2494 { 2495 const struct bpf_prog *prog = filp->private_data; 2496 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2497 struct bpf_prog_kstats stats; 2498 2499 bpf_prog_get_stats(prog, &stats); 2500 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2501 seq_printf(m, 2502 "prog_type:\t%u\n" 2503 "prog_jited:\t%u\n" 2504 "prog_tag:\t%s\n" 2505 "memlock:\t%llu\n" 2506 "prog_id:\t%u\n" 2507 "run_time_ns:\t%llu\n" 2508 "run_cnt:\t%llu\n" 2509 "recursion_misses:\t%llu\n" 2510 "verified_insns:\t%u\n", 2511 prog->type, 2512 prog->jited, 2513 prog_tag, 2514 prog->pages * 1ULL << PAGE_SHIFT, 2515 prog->aux->id, 2516 stats.nsecs, 2517 stats.cnt, 2518 stats.misses, 2519 prog->aux->verified_insns); 2520 } 2521 #endif 2522 2523 const struct file_operations bpf_prog_fops = { 2524 #ifdef CONFIG_PROC_FS 2525 .show_fdinfo = bpf_prog_show_fdinfo, 2526 #endif 2527 .release = bpf_prog_release, 2528 .read = bpf_dummy_read, 2529 .write = bpf_dummy_write, 2530 }; 2531 2532 int bpf_prog_new_fd(struct bpf_prog *prog) 2533 { 2534 int ret; 2535 2536 ret = security_bpf_prog(prog); 2537 if (ret < 0) 2538 return ret; 2539 2540 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2541 O_RDWR | O_CLOEXEC); 2542 } 2543 2544 void bpf_prog_add(struct bpf_prog *prog, int i) 2545 { 2546 atomic64_add(i, &prog->aux->refcnt); 2547 } 2548 EXPORT_SYMBOL_GPL(bpf_prog_add); 2549 2550 void bpf_prog_sub(struct bpf_prog *prog, int i) 2551 { 2552 /* Only to be used for undoing previous bpf_prog_add() in some 2553 * error path. We still know that another entity in our call 2554 * path holds a reference to the program, thus atomic_sub() can 2555 * be safely used in such cases! 2556 */ 2557 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2558 } 2559 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2560 2561 void bpf_prog_inc(struct bpf_prog *prog) 2562 { 2563 atomic64_inc(&prog->aux->refcnt); 2564 } 2565 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2566 2567 /* prog_idr_lock should have been held */ 2568 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2569 { 2570 int refold; 2571 2572 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2573 2574 if (!refold) 2575 return ERR_PTR(-ENOENT); 2576 2577 return prog; 2578 } 2579 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2580 2581 bool bpf_prog_get_ok(struct bpf_prog *prog, 2582 enum bpf_prog_type *attach_type, bool attach_drv) 2583 { 2584 /* not an attachment, just a refcount inc, always allow */ 2585 if (!attach_type) 2586 return true; 2587 2588 if (prog->type != *attach_type) 2589 return false; 2590 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2591 return false; 2592 2593 return true; 2594 } 2595 2596 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2597 bool attach_drv) 2598 { 2599 CLASS(fd, f)(ufd); 2600 struct bpf_prog *prog; 2601 2602 if (fd_empty(f)) 2603 return ERR_PTR(-EBADF); 2604 if (fd_file(f)->f_op != &bpf_prog_fops) 2605 return ERR_PTR(-EINVAL); 2606 2607 prog = fd_file(f)->private_data; 2608 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) 2609 return ERR_PTR(-EINVAL); 2610 2611 bpf_prog_inc(prog); 2612 return prog; 2613 } 2614 2615 struct bpf_prog *bpf_prog_get(u32 ufd) 2616 { 2617 return __bpf_prog_get(ufd, NULL, false); 2618 } 2619 2620 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2621 bool attach_drv) 2622 { 2623 return __bpf_prog_get(ufd, &type, attach_drv); 2624 } 2625 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2626 2627 /* Initially all BPF programs could be loaded w/o specifying 2628 * expected_attach_type. Later for some of them specifying expected_attach_type 2629 * at load time became required so that program could be validated properly. 2630 * Programs of types that are allowed to be loaded both w/ and w/o (for 2631 * backward compatibility) expected_attach_type, should have the default attach 2632 * type assigned to expected_attach_type for the latter case, so that it can be 2633 * validated later at attach time. 2634 * 2635 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2636 * prog type requires it but has some attach types that have to be backward 2637 * compatible. 2638 */ 2639 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2640 { 2641 switch (attr->prog_type) { 2642 case BPF_PROG_TYPE_CGROUP_SOCK: 2643 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2644 * exist so checking for non-zero is the way to go here. 2645 */ 2646 if (!attr->expected_attach_type) 2647 attr->expected_attach_type = 2648 BPF_CGROUP_INET_SOCK_CREATE; 2649 break; 2650 case BPF_PROG_TYPE_SK_REUSEPORT: 2651 if (!attr->expected_attach_type) 2652 attr->expected_attach_type = 2653 BPF_SK_REUSEPORT_SELECT; 2654 break; 2655 } 2656 } 2657 2658 static int 2659 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2660 enum bpf_attach_type expected_attach_type, 2661 struct btf *attach_btf, u32 btf_id, 2662 struct bpf_prog *dst_prog) 2663 { 2664 if (btf_id) { 2665 if (btf_id > BTF_MAX_TYPE) 2666 return -EINVAL; 2667 2668 if (!attach_btf && !dst_prog) 2669 return -EINVAL; 2670 2671 switch (prog_type) { 2672 case BPF_PROG_TYPE_TRACING: 2673 case BPF_PROG_TYPE_LSM: 2674 case BPF_PROG_TYPE_STRUCT_OPS: 2675 case BPF_PROG_TYPE_EXT: 2676 break; 2677 default: 2678 return -EINVAL; 2679 } 2680 } 2681 2682 if (attach_btf && (!btf_id || dst_prog)) 2683 return -EINVAL; 2684 2685 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2686 prog_type != BPF_PROG_TYPE_EXT) 2687 return -EINVAL; 2688 2689 switch (prog_type) { 2690 case BPF_PROG_TYPE_CGROUP_SOCK: 2691 switch (expected_attach_type) { 2692 case BPF_CGROUP_INET_SOCK_CREATE: 2693 case BPF_CGROUP_INET_SOCK_RELEASE: 2694 case BPF_CGROUP_INET4_POST_BIND: 2695 case BPF_CGROUP_INET6_POST_BIND: 2696 return 0; 2697 default: 2698 return -EINVAL; 2699 } 2700 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2701 switch (expected_attach_type) { 2702 case BPF_CGROUP_INET4_BIND: 2703 case BPF_CGROUP_INET6_BIND: 2704 case BPF_CGROUP_INET4_CONNECT: 2705 case BPF_CGROUP_INET6_CONNECT: 2706 case BPF_CGROUP_UNIX_CONNECT: 2707 case BPF_CGROUP_INET4_GETPEERNAME: 2708 case BPF_CGROUP_INET6_GETPEERNAME: 2709 case BPF_CGROUP_UNIX_GETPEERNAME: 2710 case BPF_CGROUP_INET4_GETSOCKNAME: 2711 case BPF_CGROUP_INET6_GETSOCKNAME: 2712 case BPF_CGROUP_UNIX_GETSOCKNAME: 2713 case BPF_CGROUP_UDP4_SENDMSG: 2714 case BPF_CGROUP_UDP6_SENDMSG: 2715 case BPF_CGROUP_UNIX_SENDMSG: 2716 case BPF_CGROUP_UDP4_RECVMSG: 2717 case BPF_CGROUP_UDP6_RECVMSG: 2718 case BPF_CGROUP_UNIX_RECVMSG: 2719 return 0; 2720 default: 2721 return -EINVAL; 2722 } 2723 case BPF_PROG_TYPE_CGROUP_SKB: 2724 switch (expected_attach_type) { 2725 case BPF_CGROUP_INET_INGRESS: 2726 case BPF_CGROUP_INET_EGRESS: 2727 return 0; 2728 default: 2729 return -EINVAL; 2730 } 2731 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2732 switch (expected_attach_type) { 2733 case BPF_CGROUP_SETSOCKOPT: 2734 case BPF_CGROUP_GETSOCKOPT: 2735 return 0; 2736 default: 2737 return -EINVAL; 2738 } 2739 case BPF_PROG_TYPE_SK_LOOKUP: 2740 if (expected_attach_type == BPF_SK_LOOKUP) 2741 return 0; 2742 return -EINVAL; 2743 case BPF_PROG_TYPE_SK_REUSEPORT: 2744 switch (expected_attach_type) { 2745 case BPF_SK_REUSEPORT_SELECT: 2746 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2747 return 0; 2748 default: 2749 return -EINVAL; 2750 } 2751 case BPF_PROG_TYPE_NETFILTER: 2752 if (expected_attach_type == BPF_NETFILTER) 2753 return 0; 2754 return -EINVAL; 2755 case BPF_PROG_TYPE_SYSCALL: 2756 case BPF_PROG_TYPE_EXT: 2757 if (expected_attach_type) 2758 return -EINVAL; 2759 fallthrough; 2760 default: 2761 return 0; 2762 } 2763 } 2764 2765 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2766 { 2767 switch (prog_type) { 2768 case BPF_PROG_TYPE_SCHED_CLS: 2769 case BPF_PROG_TYPE_SCHED_ACT: 2770 case BPF_PROG_TYPE_XDP: 2771 case BPF_PROG_TYPE_LWT_IN: 2772 case BPF_PROG_TYPE_LWT_OUT: 2773 case BPF_PROG_TYPE_LWT_XMIT: 2774 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2775 case BPF_PROG_TYPE_SK_SKB: 2776 case BPF_PROG_TYPE_SK_MSG: 2777 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2778 case BPF_PROG_TYPE_CGROUP_DEVICE: 2779 case BPF_PROG_TYPE_CGROUP_SOCK: 2780 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2781 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2782 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2783 case BPF_PROG_TYPE_SOCK_OPS: 2784 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2785 case BPF_PROG_TYPE_NETFILTER: 2786 return true; 2787 case BPF_PROG_TYPE_CGROUP_SKB: 2788 /* always unpriv */ 2789 case BPF_PROG_TYPE_SK_REUSEPORT: 2790 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2791 default: 2792 return false; 2793 } 2794 } 2795 2796 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2797 { 2798 switch (prog_type) { 2799 case BPF_PROG_TYPE_KPROBE: 2800 case BPF_PROG_TYPE_TRACEPOINT: 2801 case BPF_PROG_TYPE_PERF_EVENT: 2802 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2803 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2804 case BPF_PROG_TYPE_TRACING: 2805 case BPF_PROG_TYPE_LSM: 2806 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2807 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2808 return true; 2809 default: 2810 return false; 2811 } 2812 } 2813 2814 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, 2815 bool is_kernel) 2816 { 2817 bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); 2818 struct bpf_dynptr_kern sig_ptr, insns_ptr; 2819 struct bpf_key *key = NULL; 2820 void *sig; 2821 int err = 0; 2822 2823 if (system_keyring_id_check(attr->keyring_id) == 0) 2824 key = bpf_lookup_system_key(attr->keyring_id); 2825 else 2826 key = bpf_lookup_user_key(attr->keyring_id, 0); 2827 2828 if (!key) 2829 return -EINVAL; 2830 2831 sig = kvmemdup_bpfptr(usig, attr->signature_size); 2832 if (IS_ERR(sig)) { 2833 bpf_key_put(key); 2834 return -ENOMEM; 2835 } 2836 2837 bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, 2838 attr->signature_size); 2839 bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, 2840 prog->len * sizeof(struct bpf_insn)); 2841 2842 err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, 2843 (struct bpf_dynptr *)&sig_ptr, key); 2844 2845 bpf_key_put(key); 2846 kvfree(sig); 2847 return err; 2848 } 2849 2850 static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) 2851 { 2852 int err; 2853 int i; 2854 2855 for (i = 0; i < prog->aux->used_map_cnt; i++) { 2856 if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY) 2857 continue; 2858 2859 err = bpf_insn_array_ready(prog->aux->used_maps[i]); 2860 if (err) 2861 return err; 2862 } 2863 2864 return 0; 2865 } 2866 2867 /* last field in 'union bpf_attr' used by this command */ 2868 #define BPF_PROG_LOAD_LAST_FIELD keyring_id 2869 2870 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2871 { 2872 enum bpf_prog_type type = attr->prog_type; 2873 struct bpf_prog *prog, *dst_prog = NULL; 2874 struct btf *attach_btf = NULL; 2875 struct bpf_token *token = NULL; 2876 bool bpf_cap; 2877 int err; 2878 char license[128]; 2879 2880 if (CHECK_ATTR(BPF_PROG_LOAD)) 2881 return -EINVAL; 2882 2883 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2884 BPF_F_ANY_ALIGNMENT | 2885 BPF_F_TEST_STATE_FREQ | 2886 BPF_F_SLEEPABLE | 2887 BPF_F_TEST_RND_HI32 | 2888 BPF_F_XDP_HAS_FRAGS | 2889 BPF_F_XDP_DEV_BOUND_ONLY | 2890 BPF_F_TEST_REG_INVARIANTS | 2891 BPF_F_TOKEN_FD)) 2892 return -EINVAL; 2893 2894 bpf_prog_load_fixup_attach_type(attr); 2895 2896 if (attr->prog_flags & BPF_F_TOKEN_FD) { 2897 token = bpf_token_get_from_fd(attr->prog_token_fd); 2898 if (IS_ERR(token)) 2899 return PTR_ERR(token); 2900 /* if current token doesn't grant prog loading permissions, 2901 * then we can't use this token, so ignore it and rely on 2902 * system-wide capabilities checks 2903 */ 2904 if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || 2905 !bpf_token_allow_prog_type(token, attr->prog_type, 2906 attr->expected_attach_type)) { 2907 bpf_token_put(token); 2908 token = NULL; 2909 } 2910 } 2911 2912 bpf_cap = bpf_token_capable(token, CAP_BPF); 2913 err = -EPERM; 2914 2915 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2916 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2917 !bpf_cap) 2918 goto put_token; 2919 2920 /* Intent here is for unprivileged_bpf_disabled to block BPF program 2921 * creation for unprivileged users; other actions depend 2922 * on fd availability and access to bpffs, so are dependent on 2923 * object creation success. Even with unprivileged BPF disabled, 2924 * capability checks are still carried out for these 2925 * and other operations. 2926 */ 2927 if (sysctl_unprivileged_bpf_disabled && !bpf_cap) 2928 goto put_token; 2929 2930 if (attr->insn_cnt == 0 || 2931 attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2932 err = -E2BIG; 2933 goto put_token; 2934 } 2935 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2936 type != BPF_PROG_TYPE_CGROUP_SKB && 2937 !bpf_cap) 2938 goto put_token; 2939 2940 if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) 2941 goto put_token; 2942 if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) 2943 goto put_token; 2944 2945 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2946 * or btf, we need to check which one it is 2947 */ 2948 if (attr->attach_prog_fd) { 2949 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2950 if (IS_ERR(dst_prog)) { 2951 dst_prog = NULL; 2952 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2953 if (IS_ERR(attach_btf)) { 2954 err = -EINVAL; 2955 goto put_token; 2956 } 2957 if (!btf_is_kernel(attach_btf)) { 2958 /* attaching through specifying bpf_prog's BTF 2959 * objects directly might be supported eventually 2960 */ 2961 btf_put(attach_btf); 2962 err = -ENOTSUPP; 2963 goto put_token; 2964 } 2965 } 2966 } else if (attr->attach_btf_id) { 2967 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2968 attach_btf = bpf_get_btf_vmlinux(); 2969 if (IS_ERR(attach_btf)) { 2970 err = PTR_ERR(attach_btf); 2971 goto put_token; 2972 } 2973 if (!attach_btf) { 2974 err = -EINVAL; 2975 goto put_token; 2976 } 2977 btf_get(attach_btf); 2978 } 2979 2980 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2981 attach_btf, attr->attach_btf_id, 2982 dst_prog)) { 2983 if (dst_prog) 2984 bpf_prog_put(dst_prog); 2985 if (attach_btf) 2986 btf_put(attach_btf); 2987 err = -EINVAL; 2988 goto put_token; 2989 } 2990 2991 /* plain bpf_prog allocation */ 2992 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2993 if (!prog) { 2994 if (dst_prog) 2995 bpf_prog_put(dst_prog); 2996 if (attach_btf) 2997 btf_put(attach_btf); 2998 err = -EINVAL; 2999 goto put_token; 3000 } 3001 3002 prog->expected_attach_type = attr->expected_attach_type; 3003 prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); 3004 prog->aux->attach_btf = attach_btf; 3005 prog->aux->attach_btf_id = attr->attach_btf_id; 3006 prog->aux->dst_prog = dst_prog; 3007 prog->aux->dev_bound = !!attr->prog_ifindex; 3008 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 3009 3010 /* move token into prog->aux, reuse taken refcnt */ 3011 prog->aux->token = token; 3012 token = NULL; 3013 3014 prog->aux->user = get_current_user(); 3015 prog->len = attr->insn_cnt; 3016 3017 err = -EFAULT; 3018 if (copy_from_bpfptr(prog->insns, 3019 make_bpfptr(attr->insns, uattr.is_kernel), 3020 bpf_prog_insn_size(prog)) != 0) 3021 goto free_prog; 3022 /* copy eBPF program license from user space */ 3023 if (strncpy_from_bpfptr(license, 3024 make_bpfptr(attr->license, uattr.is_kernel), 3025 sizeof(license) - 1) < 0) 3026 goto free_prog; 3027 license[sizeof(license) - 1] = 0; 3028 3029 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 3030 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 3031 3032 if (attr->signature) { 3033 err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); 3034 if (err) 3035 goto free_prog; 3036 } 3037 3038 prog->orig_prog = NULL; 3039 prog->jited = 0; 3040 3041 atomic64_set(&prog->aux->refcnt, 1); 3042 3043 if (bpf_prog_is_dev_bound(prog->aux)) { 3044 err = bpf_prog_dev_bound_init(prog, attr); 3045 if (err) 3046 goto free_prog; 3047 } 3048 3049 if (type == BPF_PROG_TYPE_EXT && dst_prog && 3050 bpf_prog_is_dev_bound(dst_prog->aux)) { 3051 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 3052 if (err) 3053 goto free_prog; 3054 } 3055 3056 /* 3057 * Bookkeeping for managing the program attachment chain. 3058 * 3059 * It might be tempting to set attach_tracing_prog flag at the attachment 3060 * time, but this will not prevent from loading bunch of tracing prog 3061 * first, then attach them one to another. 3062 * 3063 * The flag attach_tracing_prog is set for the whole program lifecycle, and 3064 * doesn't have to be cleared in bpf_tracing_link_release, since tracing 3065 * programs cannot change attachment target. 3066 */ 3067 if (type == BPF_PROG_TYPE_TRACING && dst_prog && 3068 dst_prog->type == BPF_PROG_TYPE_TRACING) { 3069 prog->aux->attach_tracing_prog = true; 3070 } 3071 3072 /* find program type: socket_filter vs tracing_filter */ 3073 err = find_prog_type(type, prog); 3074 if (err < 0) 3075 goto free_prog; 3076 3077 prog->aux->load_time = ktime_get_boottime_ns(); 3078 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 3079 sizeof(attr->prog_name)); 3080 if (err < 0) 3081 goto free_prog; 3082 3083 err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 3084 if (err) 3085 goto free_prog_sec; 3086 3087 /* run eBPF verifier */ 3088 err = bpf_check(&prog, attr, uattr, uattr_size); 3089 if (err < 0) 3090 goto free_used_maps; 3091 3092 prog = bpf_prog_select_runtime(prog, &err); 3093 if (err < 0) 3094 goto free_used_maps; 3095 3096 err = bpf_prog_mark_insn_arrays_ready(prog); 3097 if (err < 0) 3098 goto free_used_maps; 3099 3100 err = bpf_prog_alloc_id(prog); 3101 if (err) 3102 goto free_used_maps; 3103 3104 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 3105 * effectively publicly exposed. However, retrieving via 3106 * bpf_prog_get_fd_by_id() will take another reference, 3107 * therefore it cannot be gone underneath us. 3108 * 3109 * Only for the time /after/ successful bpf_prog_new_fd() 3110 * and before returning to userspace, we might just hold 3111 * one reference and any parallel close on that fd could 3112 * rip everything out. Hence, below notifications must 3113 * happen before bpf_prog_new_fd(). 3114 * 3115 * Also, any failure handling from this point onwards must 3116 * be using bpf_prog_put() given the program is exposed. 3117 */ 3118 bpf_prog_kallsyms_add(prog); 3119 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 3120 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 3121 3122 err = bpf_prog_new_fd(prog); 3123 if (err < 0) 3124 bpf_prog_put(prog); 3125 return err; 3126 3127 free_used_maps: 3128 /* In case we have subprogs, we need to wait for a grace 3129 * period before we can tear down JIT memory since symbols 3130 * are already exposed under kallsyms. 3131 */ 3132 __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); 3133 return err; 3134 3135 free_prog_sec: 3136 security_bpf_prog_free(prog); 3137 free_prog: 3138 free_uid(prog->aux->user); 3139 if (prog->aux->attach_btf) 3140 btf_put(prog->aux->attach_btf); 3141 bpf_prog_free(prog); 3142 put_token: 3143 bpf_token_put(token); 3144 return err; 3145 } 3146 3147 #define BPF_OBJ_LAST_FIELD path_fd 3148 3149 static int bpf_obj_pin(const union bpf_attr *attr) 3150 { 3151 int path_fd; 3152 3153 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 3154 return -EINVAL; 3155 3156 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3157 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3158 return -EINVAL; 3159 3160 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3161 return bpf_obj_pin_user(attr->bpf_fd, path_fd, 3162 u64_to_user_ptr(attr->pathname)); 3163 } 3164 3165 static int bpf_obj_get(const union bpf_attr *attr) 3166 { 3167 int path_fd; 3168 3169 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 3170 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 3171 return -EINVAL; 3172 3173 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3174 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3175 return -EINVAL; 3176 3177 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3178 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 3179 attr->file_flags); 3180 } 3181 3182 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has 3183 * "sleepable" semantics, which normally would mean that BPF link's attach 3184 * hook can dereference link or link's underlying program for some time after 3185 * detachment due to RCU Tasks Trace-based lifetime protection scheme. 3186 * BPF program itself can be non-sleepable, yet, because it's transitively 3187 * reachable through BPF link, its freeing has to be delayed until after RCU 3188 * Tasks Trace GP. 3189 */ 3190 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, 3191 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3192 enum bpf_attach_type attach_type, bool sleepable) 3193 { 3194 WARN_ON(ops->dealloc && ops->dealloc_deferred); 3195 atomic64_set(&link->refcnt, 1); 3196 link->type = type; 3197 link->sleepable = sleepable; 3198 link->id = 0; 3199 link->ops = ops; 3200 link->prog = prog; 3201 link->attach_type = attach_type; 3202 } 3203 3204 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 3205 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3206 enum bpf_attach_type attach_type) 3207 { 3208 bpf_link_init_sleepable(link, type, ops, prog, attach_type, false); 3209 } 3210 3211 static void bpf_link_free_id(int id) 3212 { 3213 if (!id) 3214 return; 3215 3216 spin_lock_bh(&link_idr_lock); 3217 idr_remove(&link_idr, id); 3218 spin_unlock_bh(&link_idr_lock); 3219 } 3220 3221 /* Clean up bpf_link and corresponding anon_inode file and FD. After 3222 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 3223 * anon_inode's release() call. This helper marks bpf_link as 3224 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 3225 * is not decremented, it's the responsibility of a calling code that failed 3226 * to complete bpf_link initialization. 3227 * This helper eventually calls link's dealloc callback, but does not call 3228 * link's release callback. 3229 */ 3230 void bpf_link_cleanup(struct bpf_link_primer *primer) 3231 { 3232 primer->link->prog = NULL; 3233 bpf_link_free_id(primer->id); 3234 fput(primer->file); 3235 put_unused_fd(primer->fd); 3236 } 3237 3238 void bpf_link_inc(struct bpf_link *link) 3239 { 3240 atomic64_inc(&link->refcnt); 3241 } 3242 3243 static void bpf_link_dealloc(struct bpf_link *link) 3244 { 3245 /* now that we know that bpf_link itself can't be reached, put underlying BPF program */ 3246 if (link->prog) 3247 bpf_prog_put(link->prog); 3248 3249 /* free bpf_link and its containing memory */ 3250 if (link->ops->dealloc_deferred) 3251 link->ops->dealloc_deferred(link); 3252 else 3253 link->ops->dealloc(link); 3254 } 3255 3256 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) 3257 { 3258 struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); 3259 3260 bpf_link_dealloc(link); 3261 } 3262 3263 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3264 { 3265 if (rcu_trace_implies_rcu_gp()) 3266 bpf_link_defer_dealloc_rcu_gp(rcu); 3267 else 3268 call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3269 } 3270 3271 /* bpf_link_free is guaranteed to be called from process context */ 3272 static void bpf_link_free(struct bpf_link *link) 3273 { 3274 const struct bpf_link_ops *ops = link->ops; 3275 3276 bpf_link_free_id(link->id); 3277 /* detach BPF program, clean up used resources */ 3278 if (link->prog) 3279 ops->release(link); 3280 if (ops->dealloc_deferred) { 3281 /* Schedule BPF link deallocation, which will only then 3282 * trigger putting BPF program refcount. 3283 * If underlying BPF program is sleepable or BPF link's target 3284 * attach hookpoint is sleepable or otherwise requires RCU GPs 3285 * to ensure link and its underlying BPF program is not 3286 * reachable anymore, we need to first wait for RCU tasks 3287 * trace sync, and then go through "classic" RCU grace period 3288 */ 3289 if (link->sleepable || (link->prog && link->prog->sleepable)) 3290 call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3291 else 3292 call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3293 } else if (ops->dealloc) { 3294 bpf_link_dealloc(link); 3295 } 3296 } 3297 3298 static void bpf_link_put_deferred(struct work_struct *work) 3299 { 3300 struct bpf_link *link = container_of(work, struct bpf_link, work); 3301 3302 bpf_link_free(link); 3303 } 3304 3305 /* bpf_link_put might be called from atomic context. It needs to be called 3306 * from sleepable context in order to acquire sleeping locks during the process. 3307 */ 3308 void bpf_link_put(struct bpf_link *link) 3309 { 3310 if (!atomic64_dec_and_test(&link->refcnt)) 3311 return; 3312 3313 INIT_WORK(&link->work, bpf_link_put_deferred); 3314 schedule_work(&link->work); 3315 } 3316 EXPORT_SYMBOL(bpf_link_put); 3317 3318 static void bpf_link_put_direct(struct bpf_link *link) 3319 { 3320 if (!atomic64_dec_and_test(&link->refcnt)) 3321 return; 3322 bpf_link_free(link); 3323 } 3324 3325 static int bpf_link_release(struct inode *inode, struct file *filp) 3326 { 3327 struct bpf_link *link = filp->private_data; 3328 3329 bpf_link_put_direct(link); 3330 return 0; 3331 } 3332 3333 #ifdef CONFIG_PROC_FS 3334 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 3335 #define BPF_MAP_TYPE(_id, _ops) 3336 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 3337 static const char *bpf_link_type_strs[] = { 3338 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 3339 #include <linux/bpf_types.h> 3340 }; 3341 #undef BPF_PROG_TYPE 3342 #undef BPF_MAP_TYPE 3343 #undef BPF_LINK_TYPE 3344 3345 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 3346 { 3347 const struct bpf_link *link = filp->private_data; 3348 const struct bpf_prog *prog = link->prog; 3349 enum bpf_link_type type = link->type; 3350 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 3351 3352 if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { 3353 if (link->type == BPF_LINK_TYPE_KPROBE_MULTI) 3354 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? 3355 "kretprobe_multi" : "kprobe_multi"); 3356 else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) 3357 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? 3358 "uretprobe_multi" : "uprobe_multi"); 3359 else 3360 seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); 3361 } else { 3362 WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); 3363 seq_printf(m, "link_type:\t<%u>\n", type); 3364 } 3365 seq_printf(m, "link_id:\t%u\n", link->id); 3366 3367 if (prog) { 3368 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 3369 seq_printf(m, 3370 "prog_tag:\t%s\n" 3371 "prog_id:\t%u\n", 3372 prog_tag, 3373 prog->aux->id); 3374 } 3375 if (link->ops->show_fdinfo) 3376 link->ops->show_fdinfo(link, m); 3377 } 3378 #endif 3379 3380 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) 3381 { 3382 struct bpf_link *link = file->private_data; 3383 3384 return link->ops->poll(file, pts); 3385 } 3386 3387 static const struct file_operations bpf_link_fops = { 3388 #ifdef CONFIG_PROC_FS 3389 .show_fdinfo = bpf_link_show_fdinfo, 3390 #endif 3391 .release = bpf_link_release, 3392 .read = bpf_dummy_read, 3393 .write = bpf_dummy_write, 3394 }; 3395 3396 static const struct file_operations bpf_link_fops_poll = { 3397 #ifdef CONFIG_PROC_FS 3398 .show_fdinfo = bpf_link_show_fdinfo, 3399 #endif 3400 .release = bpf_link_release, 3401 .read = bpf_dummy_read, 3402 .write = bpf_dummy_write, 3403 .poll = bpf_link_poll, 3404 }; 3405 3406 static int bpf_link_alloc_id(struct bpf_link *link) 3407 { 3408 int id; 3409 3410 idr_preload(GFP_KERNEL); 3411 spin_lock_bh(&link_idr_lock); 3412 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 3413 spin_unlock_bh(&link_idr_lock); 3414 idr_preload_end(); 3415 3416 return id; 3417 } 3418 3419 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 3420 * reserving unused FD and allocating ID from link_idr. This is to be paired 3421 * with bpf_link_settle() to install FD and ID and expose bpf_link to 3422 * user-space, if bpf_link is successfully attached. If not, bpf_link and 3423 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 3424 * transient state is passed around in struct bpf_link_primer. 3425 * This is preferred way to create and initialize bpf_link, especially when 3426 * there are complicated and expensive operations in between creating bpf_link 3427 * itself and attaching it to BPF hook. By using bpf_link_prime() and 3428 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 3429 * expensive (and potentially failing) roll back operations in a rare case 3430 * that file, FD, or ID can't be allocated. 3431 */ 3432 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 3433 { 3434 struct file *file; 3435 int fd, id; 3436 3437 fd = get_unused_fd_flags(O_CLOEXEC); 3438 if (fd < 0) 3439 return fd; 3440 3441 3442 id = bpf_link_alloc_id(link); 3443 if (id < 0) { 3444 put_unused_fd(fd); 3445 return id; 3446 } 3447 3448 file = anon_inode_getfile("bpf_link", 3449 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3450 link, O_CLOEXEC); 3451 if (IS_ERR(file)) { 3452 bpf_link_free_id(id); 3453 put_unused_fd(fd); 3454 return PTR_ERR(file); 3455 } 3456 3457 primer->link = link; 3458 primer->file = file; 3459 primer->fd = fd; 3460 primer->id = id; 3461 return 0; 3462 } 3463 3464 int bpf_link_settle(struct bpf_link_primer *primer) 3465 { 3466 /* make bpf_link fetchable by ID */ 3467 spin_lock_bh(&link_idr_lock); 3468 primer->link->id = primer->id; 3469 spin_unlock_bh(&link_idr_lock); 3470 /* make bpf_link fetchable by FD */ 3471 fd_install(primer->fd, primer->file); 3472 /* pass through installed FD */ 3473 return primer->fd; 3474 } 3475 3476 int bpf_link_new_fd(struct bpf_link *link) 3477 { 3478 return anon_inode_getfd("bpf-link", 3479 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3480 link, O_CLOEXEC); 3481 } 3482 3483 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 3484 { 3485 CLASS(fd, f)(ufd); 3486 struct bpf_link *link; 3487 3488 if (fd_empty(f)) 3489 return ERR_PTR(-EBADF); 3490 if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) 3491 return ERR_PTR(-EINVAL); 3492 3493 link = fd_file(f)->private_data; 3494 bpf_link_inc(link); 3495 return link; 3496 } 3497 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); 3498 3499 static void bpf_tracing_link_release(struct bpf_link *link) 3500 { 3501 struct bpf_tracing_link *tr_link = 3502 container_of(link, struct bpf_tracing_link, link.link); 3503 3504 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3505 tr_link->trampoline, 3506 tr_link->tgt_prog)); 3507 3508 bpf_trampoline_put(tr_link->trampoline); 3509 3510 /* tgt_prog is NULL if target is a kernel function */ 3511 if (tr_link->tgt_prog) 3512 bpf_prog_put(tr_link->tgt_prog); 3513 } 3514 3515 static void bpf_tracing_link_dealloc(struct bpf_link *link) 3516 { 3517 struct bpf_tracing_link *tr_link = 3518 container_of(link, struct bpf_tracing_link, link.link); 3519 3520 kfree(tr_link); 3521 } 3522 3523 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 3524 struct seq_file *seq) 3525 { 3526 struct bpf_tracing_link *tr_link = 3527 container_of(link, struct bpf_tracing_link, link.link); 3528 u32 target_btf_id, target_obj_id; 3529 3530 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3531 &target_obj_id, &target_btf_id); 3532 seq_printf(seq, 3533 "attach_type:\t%d\n" 3534 "target_obj_id:\t%u\n" 3535 "target_btf_id:\t%u\n" 3536 "cookie:\t%llu\n", 3537 link->attach_type, 3538 target_obj_id, 3539 target_btf_id, 3540 tr_link->link.cookie); 3541 } 3542 3543 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 3544 struct bpf_link_info *info) 3545 { 3546 struct bpf_tracing_link *tr_link = 3547 container_of(link, struct bpf_tracing_link, link.link); 3548 3549 info->tracing.attach_type = link->attach_type; 3550 info->tracing.cookie = tr_link->link.cookie; 3551 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3552 &info->tracing.target_obj_id, 3553 &info->tracing.target_btf_id); 3554 3555 return 0; 3556 } 3557 3558 static const struct bpf_link_ops bpf_tracing_link_lops = { 3559 .release = bpf_tracing_link_release, 3560 .dealloc = bpf_tracing_link_dealloc, 3561 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3562 .fill_link_info = bpf_tracing_link_fill_link_info, 3563 }; 3564 3565 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3566 int tgt_prog_fd, 3567 u32 btf_id, 3568 u64 bpf_cookie, 3569 enum bpf_attach_type attach_type) 3570 { 3571 struct bpf_link_primer link_primer; 3572 struct bpf_prog *tgt_prog = NULL; 3573 struct bpf_trampoline *tr = NULL; 3574 struct bpf_tracing_link *link; 3575 u64 key = 0; 3576 int err; 3577 3578 switch (prog->type) { 3579 case BPF_PROG_TYPE_TRACING: 3580 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3581 prog->expected_attach_type != BPF_TRACE_FEXIT && 3582 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3583 err = -EINVAL; 3584 goto out_put_prog; 3585 } 3586 break; 3587 case BPF_PROG_TYPE_EXT: 3588 if (prog->expected_attach_type != 0) { 3589 err = -EINVAL; 3590 goto out_put_prog; 3591 } 3592 break; 3593 case BPF_PROG_TYPE_LSM: 3594 if (prog->expected_attach_type != BPF_LSM_MAC) { 3595 err = -EINVAL; 3596 goto out_put_prog; 3597 } 3598 break; 3599 default: 3600 err = -EINVAL; 3601 goto out_put_prog; 3602 } 3603 3604 if (!!tgt_prog_fd != !!btf_id) { 3605 err = -EINVAL; 3606 goto out_put_prog; 3607 } 3608 3609 if (tgt_prog_fd) { 3610 /* 3611 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this 3612 * part would be changed to implement the same for 3613 * BPF_PROG_TYPE_TRACING, do not forget to update the way how 3614 * attach_tracing_prog flag is set. 3615 */ 3616 if (prog->type != BPF_PROG_TYPE_EXT) { 3617 err = -EINVAL; 3618 goto out_put_prog; 3619 } 3620 3621 tgt_prog = bpf_prog_get(tgt_prog_fd); 3622 if (IS_ERR(tgt_prog)) { 3623 err = PTR_ERR(tgt_prog); 3624 tgt_prog = NULL; 3625 goto out_put_prog; 3626 } 3627 3628 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3629 } 3630 3631 link = kzalloc(sizeof(*link), GFP_USER); 3632 if (!link) { 3633 err = -ENOMEM; 3634 goto out_put_prog; 3635 } 3636 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3637 &bpf_tracing_link_lops, prog, attach_type); 3638 3639 link->link.cookie = bpf_cookie; 3640 3641 mutex_lock(&prog->aux->dst_mutex); 3642 3643 /* There are a few possible cases here: 3644 * 3645 * - if prog->aux->dst_trampoline is set, the program was just loaded 3646 * and not yet attached to anything, so we can use the values stored 3647 * in prog->aux 3648 * 3649 * - if prog->aux->dst_trampoline is NULL, the program has already been 3650 * attached to a target and its initial target was cleared (below) 3651 * 3652 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3653 * target_btf_id using the link_create API. 3654 * 3655 * - if tgt_prog == NULL when this function was called using the old 3656 * raw_tracepoint_open API, and we need a target from prog->aux 3657 * 3658 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3659 * was detached and is going for re-attachment. 3660 * 3661 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf 3662 * are NULL, then program was already attached and user did not provide 3663 * tgt_prog_fd so we have no way to find out or create trampoline 3664 */ 3665 if (!prog->aux->dst_trampoline && !tgt_prog) { 3666 /* 3667 * Allow re-attach for TRACING and LSM programs. If it's 3668 * currently linked, bpf_trampoline_link_prog will fail. 3669 * EXT programs need to specify tgt_prog_fd, so they 3670 * re-attach in separate code path. 3671 */ 3672 if (prog->type != BPF_PROG_TYPE_TRACING && 3673 prog->type != BPF_PROG_TYPE_LSM) { 3674 err = -EINVAL; 3675 goto out_unlock; 3676 } 3677 /* We can allow re-attach only if we have valid attach_btf. */ 3678 if (!prog->aux->attach_btf) { 3679 err = -EINVAL; 3680 goto out_unlock; 3681 } 3682 btf_id = prog->aux->attach_btf_id; 3683 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3684 } 3685 3686 if (!prog->aux->dst_trampoline || 3687 (key && key != prog->aux->dst_trampoline->key)) { 3688 /* If there is no saved target, or the specified target is 3689 * different from the destination specified at load time, we 3690 * need a new trampoline and a check for compatibility 3691 */ 3692 struct bpf_attach_target_info tgt_info = {}; 3693 3694 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3695 &tgt_info); 3696 if (err) 3697 goto out_unlock; 3698 3699 if (tgt_info.tgt_mod) { 3700 module_put(prog->aux->mod); 3701 prog->aux->mod = tgt_info.tgt_mod; 3702 } 3703 3704 tr = bpf_trampoline_get(key, &tgt_info); 3705 if (!tr) { 3706 err = -ENOMEM; 3707 goto out_unlock; 3708 } 3709 } else { 3710 /* The caller didn't specify a target, or the target was the 3711 * same as the destination supplied during program load. This 3712 * means we can reuse the trampoline and reference from program 3713 * load time, and there is no need to allocate a new one. This 3714 * can only happen once for any program, as the saved values in 3715 * prog->aux are cleared below. 3716 */ 3717 tr = prog->aux->dst_trampoline; 3718 tgt_prog = prog->aux->dst_prog; 3719 } 3720 3721 err = bpf_link_prime(&link->link.link, &link_primer); 3722 if (err) 3723 goto out_unlock; 3724 3725 err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3726 if (err) { 3727 bpf_link_cleanup(&link_primer); 3728 link = NULL; 3729 goto out_unlock; 3730 } 3731 3732 link->tgt_prog = tgt_prog; 3733 link->trampoline = tr; 3734 3735 /* Always clear the trampoline and target prog from prog->aux to make 3736 * sure the original attach destination is not kept alive after a 3737 * program is (re-)attached to another target. 3738 */ 3739 if (prog->aux->dst_prog && 3740 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3741 /* got extra prog ref from syscall, or attaching to different prog */ 3742 bpf_prog_put(prog->aux->dst_prog); 3743 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3744 /* we allocated a new trampoline, so free the old one */ 3745 bpf_trampoline_put(prog->aux->dst_trampoline); 3746 3747 prog->aux->dst_prog = NULL; 3748 prog->aux->dst_trampoline = NULL; 3749 mutex_unlock(&prog->aux->dst_mutex); 3750 3751 return bpf_link_settle(&link_primer); 3752 out_unlock: 3753 if (tr && tr != prog->aux->dst_trampoline) 3754 bpf_trampoline_put(tr); 3755 mutex_unlock(&prog->aux->dst_mutex); 3756 kfree(link); 3757 out_put_prog: 3758 if (tgt_prog_fd && tgt_prog) 3759 bpf_prog_put(tgt_prog); 3760 return err; 3761 } 3762 3763 static void bpf_raw_tp_link_release(struct bpf_link *link) 3764 { 3765 struct bpf_raw_tp_link *raw_tp = 3766 container_of(link, struct bpf_raw_tp_link, link); 3767 3768 bpf_probe_unregister(raw_tp->btp, raw_tp); 3769 bpf_put_raw_tracepoint(raw_tp->btp); 3770 } 3771 3772 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3773 { 3774 struct bpf_raw_tp_link *raw_tp = 3775 container_of(link, struct bpf_raw_tp_link, link); 3776 3777 kfree(raw_tp); 3778 } 3779 3780 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3781 struct seq_file *seq) 3782 { 3783 struct bpf_raw_tp_link *raw_tp_link = 3784 container_of(link, struct bpf_raw_tp_link, link); 3785 3786 seq_printf(seq, 3787 "tp_name:\t%s\n" 3788 "cookie:\t%llu\n", 3789 raw_tp_link->btp->tp->name, 3790 raw_tp_link->cookie); 3791 } 3792 3793 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, 3794 u32 len) 3795 { 3796 if (ulen >= len + 1) { 3797 if (copy_to_user(ubuf, buf, len + 1)) 3798 return -EFAULT; 3799 } else { 3800 char zero = '\0'; 3801 3802 if (copy_to_user(ubuf, buf, ulen - 1)) 3803 return -EFAULT; 3804 if (put_user(zero, ubuf + ulen - 1)) 3805 return -EFAULT; 3806 return -ENOSPC; 3807 } 3808 3809 return 0; 3810 } 3811 3812 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3813 struct bpf_link_info *info) 3814 { 3815 struct bpf_raw_tp_link *raw_tp_link = 3816 container_of(link, struct bpf_raw_tp_link, link); 3817 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3818 const char *tp_name = raw_tp_link->btp->tp->name; 3819 u32 ulen = info->raw_tracepoint.tp_name_len; 3820 size_t tp_len = strlen(tp_name); 3821 3822 if (!ulen ^ !ubuf) 3823 return -EINVAL; 3824 3825 info->raw_tracepoint.tp_name_len = tp_len + 1; 3826 info->raw_tracepoint.cookie = raw_tp_link->cookie; 3827 3828 if (!ubuf) 3829 return 0; 3830 3831 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len); 3832 } 3833 3834 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3835 .release = bpf_raw_tp_link_release, 3836 .dealloc_deferred = bpf_raw_tp_link_dealloc, 3837 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3838 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3839 }; 3840 3841 #ifdef CONFIG_PERF_EVENTS 3842 struct bpf_perf_link { 3843 struct bpf_link link; 3844 struct file *perf_file; 3845 }; 3846 3847 static void bpf_perf_link_release(struct bpf_link *link) 3848 { 3849 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3850 struct perf_event *event = perf_link->perf_file->private_data; 3851 3852 perf_event_free_bpf_prog(event); 3853 fput(perf_link->perf_file); 3854 } 3855 3856 static void bpf_perf_link_dealloc(struct bpf_link *link) 3857 { 3858 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3859 3860 kfree(perf_link); 3861 } 3862 3863 static int bpf_perf_link_fill_common(const struct perf_event *event, 3864 char __user *uname, u32 *ulenp, 3865 u64 *probe_offset, u64 *probe_addr, 3866 u32 *fd_type, unsigned long *missed) 3867 { 3868 const char *buf; 3869 u32 prog_id, ulen; 3870 size_t len; 3871 int err; 3872 3873 ulen = *ulenp; 3874 if (!ulen ^ !uname) 3875 return -EINVAL; 3876 3877 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, 3878 probe_offset, probe_addr, missed); 3879 if (err) 3880 return err; 3881 3882 if (buf) { 3883 len = strlen(buf); 3884 *ulenp = len + 1; 3885 } else { 3886 *ulenp = 1; 3887 } 3888 if (!uname) 3889 return 0; 3890 3891 if (buf) { 3892 err = bpf_copy_to_user(uname, buf, ulen, len); 3893 if (err) 3894 return err; 3895 } else { 3896 char zero = '\0'; 3897 3898 if (put_user(zero, uname)) 3899 return -EFAULT; 3900 } 3901 return 0; 3902 } 3903 3904 #ifdef CONFIG_KPROBE_EVENTS 3905 static int bpf_perf_link_fill_kprobe(const struct perf_event *event, 3906 struct bpf_link_info *info) 3907 { 3908 unsigned long missed; 3909 char __user *uname; 3910 u64 addr, offset; 3911 u32 ulen, type; 3912 int err; 3913 3914 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); 3915 ulen = info->perf_event.kprobe.name_len; 3916 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, 3917 &type, &missed); 3918 if (err) 3919 return err; 3920 if (type == BPF_FD_TYPE_KRETPROBE) 3921 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; 3922 else 3923 info->perf_event.type = BPF_PERF_EVENT_KPROBE; 3924 info->perf_event.kprobe.name_len = ulen; 3925 info->perf_event.kprobe.offset = offset; 3926 info->perf_event.kprobe.missed = missed; 3927 if (!kallsyms_show_value(current_cred())) 3928 addr = 0; 3929 info->perf_event.kprobe.addr = addr; 3930 info->perf_event.kprobe.cookie = event->bpf_cookie; 3931 return 0; 3932 } 3933 3934 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, 3935 struct seq_file *seq) 3936 { 3937 const char *name; 3938 int err; 3939 u32 prog_id, type; 3940 u64 offset, addr; 3941 unsigned long missed; 3942 3943 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3944 &offset, &addr, &missed); 3945 if (err) 3946 return; 3947 3948 seq_printf(seq, 3949 "name:\t%s\n" 3950 "offset:\t%#llx\n" 3951 "missed:\t%lu\n" 3952 "addr:\t%#llx\n" 3953 "event_type:\t%s\n" 3954 "cookie:\t%llu\n", 3955 name, offset, missed, addr, 3956 type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", 3957 event->bpf_cookie); 3958 } 3959 #endif 3960 3961 #ifdef CONFIG_UPROBE_EVENTS 3962 static int bpf_perf_link_fill_uprobe(const struct perf_event *event, 3963 struct bpf_link_info *info) 3964 { 3965 u64 ref_ctr_offset, offset; 3966 char __user *uname; 3967 u32 ulen, type; 3968 int err; 3969 3970 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); 3971 ulen = info->perf_event.uprobe.name_len; 3972 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, 3973 &type, NULL); 3974 if (err) 3975 return err; 3976 3977 if (type == BPF_FD_TYPE_URETPROBE) 3978 info->perf_event.type = BPF_PERF_EVENT_URETPROBE; 3979 else 3980 info->perf_event.type = BPF_PERF_EVENT_UPROBE; 3981 info->perf_event.uprobe.name_len = ulen; 3982 info->perf_event.uprobe.offset = offset; 3983 info->perf_event.uprobe.cookie = event->bpf_cookie; 3984 info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; 3985 return 0; 3986 } 3987 3988 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, 3989 struct seq_file *seq) 3990 { 3991 const char *name; 3992 int err; 3993 u32 prog_id, type; 3994 u64 offset, ref_ctr_offset; 3995 unsigned long missed; 3996 3997 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3998 &offset, &ref_ctr_offset, &missed); 3999 if (err) 4000 return; 4001 4002 seq_printf(seq, 4003 "name:\t%s\n" 4004 "offset:\t%#llx\n" 4005 "ref_ctr_offset:\t%#llx\n" 4006 "event_type:\t%s\n" 4007 "cookie:\t%llu\n", 4008 name, offset, ref_ctr_offset, 4009 type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", 4010 event->bpf_cookie); 4011 } 4012 #endif 4013 4014 static int bpf_perf_link_fill_probe(const struct perf_event *event, 4015 struct bpf_link_info *info) 4016 { 4017 #ifdef CONFIG_KPROBE_EVENTS 4018 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4019 return bpf_perf_link_fill_kprobe(event, info); 4020 #endif 4021 #ifdef CONFIG_UPROBE_EVENTS 4022 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4023 return bpf_perf_link_fill_uprobe(event, info); 4024 #endif 4025 return -EOPNOTSUPP; 4026 } 4027 4028 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, 4029 struct bpf_link_info *info) 4030 { 4031 char __user *uname; 4032 u32 ulen; 4033 int err; 4034 4035 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); 4036 ulen = info->perf_event.tracepoint.name_len; 4037 err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); 4038 if (err) 4039 return err; 4040 4041 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; 4042 info->perf_event.tracepoint.name_len = ulen; 4043 info->perf_event.tracepoint.cookie = event->bpf_cookie; 4044 return 0; 4045 } 4046 4047 static int bpf_perf_link_fill_perf_event(const struct perf_event *event, 4048 struct bpf_link_info *info) 4049 { 4050 info->perf_event.event.type = event->attr.type; 4051 info->perf_event.event.config = event->attr.config; 4052 info->perf_event.event.cookie = event->bpf_cookie; 4053 info->perf_event.type = BPF_PERF_EVENT_EVENT; 4054 return 0; 4055 } 4056 4057 static int bpf_perf_link_fill_link_info(const struct bpf_link *link, 4058 struct bpf_link_info *info) 4059 { 4060 struct bpf_perf_link *perf_link; 4061 const struct perf_event *event; 4062 4063 perf_link = container_of(link, struct bpf_perf_link, link); 4064 event = perf_get_event(perf_link->perf_file); 4065 if (IS_ERR(event)) 4066 return PTR_ERR(event); 4067 4068 switch (event->prog->type) { 4069 case BPF_PROG_TYPE_PERF_EVENT: 4070 return bpf_perf_link_fill_perf_event(event, info); 4071 case BPF_PROG_TYPE_TRACEPOINT: 4072 return bpf_perf_link_fill_tracepoint(event, info); 4073 case BPF_PROG_TYPE_KPROBE: 4074 return bpf_perf_link_fill_probe(event, info); 4075 default: 4076 return -EOPNOTSUPP; 4077 } 4078 } 4079 4080 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, 4081 struct seq_file *seq) 4082 { 4083 seq_printf(seq, 4084 "type:\t%u\n" 4085 "config:\t%llu\n" 4086 "event_type:\t%s\n" 4087 "cookie:\t%llu\n", 4088 event->attr.type, event->attr.config, 4089 "event", event->bpf_cookie); 4090 } 4091 4092 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, 4093 struct seq_file *seq) 4094 { 4095 int err; 4096 const char *name; 4097 u32 prog_id; 4098 4099 err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, 4100 NULL, NULL); 4101 if (err) 4102 return; 4103 4104 seq_printf(seq, 4105 "tp_name:\t%s\n" 4106 "event_type:\t%s\n" 4107 "cookie:\t%llu\n", 4108 name, "tracepoint", event->bpf_cookie); 4109 } 4110 4111 static void bpf_probe_link_show_fdinfo(const struct perf_event *event, 4112 struct seq_file *seq) 4113 { 4114 #ifdef CONFIG_KPROBE_EVENTS 4115 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4116 return bpf_perf_link_fdinfo_kprobe(event, seq); 4117 #endif 4118 4119 #ifdef CONFIG_UPROBE_EVENTS 4120 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4121 return bpf_perf_link_fdinfo_uprobe(event, seq); 4122 #endif 4123 } 4124 4125 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, 4126 struct seq_file *seq) 4127 { 4128 struct bpf_perf_link *perf_link; 4129 const struct perf_event *event; 4130 4131 perf_link = container_of(link, struct bpf_perf_link, link); 4132 event = perf_get_event(perf_link->perf_file); 4133 if (IS_ERR(event)) 4134 return; 4135 4136 switch (event->prog->type) { 4137 case BPF_PROG_TYPE_PERF_EVENT: 4138 return bpf_perf_event_link_show_fdinfo(event, seq); 4139 case BPF_PROG_TYPE_TRACEPOINT: 4140 return bpf_tracepoint_link_show_fdinfo(event, seq); 4141 case BPF_PROG_TYPE_KPROBE: 4142 return bpf_probe_link_show_fdinfo(event, seq); 4143 default: 4144 return; 4145 } 4146 } 4147 4148 static const struct bpf_link_ops bpf_perf_link_lops = { 4149 .release = bpf_perf_link_release, 4150 .dealloc = bpf_perf_link_dealloc, 4151 .fill_link_info = bpf_perf_link_fill_link_info, 4152 .show_fdinfo = bpf_perf_link_show_fdinfo, 4153 }; 4154 4155 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4156 { 4157 struct bpf_link_primer link_primer; 4158 struct bpf_perf_link *link; 4159 struct perf_event *event; 4160 struct file *perf_file; 4161 int err; 4162 4163 if (attr->link_create.flags) 4164 return -EINVAL; 4165 4166 perf_file = perf_event_get(attr->link_create.target_fd); 4167 if (IS_ERR(perf_file)) 4168 return PTR_ERR(perf_file); 4169 4170 link = kzalloc(sizeof(*link), GFP_USER); 4171 if (!link) { 4172 err = -ENOMEM; 4173 goto out_put_file; 4174 } 4175 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog, 4176 attr->link_create.attach_type); 4177 link->perf_file = perf_file; 4178 4179 err = bpf_link_prime(&link->link, &link_primer); 4180 if (err) { 4181 kfree(link); 4182 goto out_put_file; 4183 } 4184 4185 event = perf_file->private_data; 4186 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 4187 if (err) { 4188 bpf_link_cleanup(&link_primer); 4189 goto out_put_file; 4190 } 4191 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 4192 bpf_prog_inc(prog); 4193 4194 return bpf_link_settle(&link_primer); 4195 4196 out_put_file: 4197 fput(perf_file); 4198 return err; 4199 } 4200 #else 4201 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4202 { 4203 return -EOPNOTSUPP; 4204 } 4205 #endif /* CONFIG_PERF_EVENTS */ 4206 4207 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 4208 const char __user *user_tp_name, u64 cookie, 4209 enum bpf_attach_type attach_type) 4210 { 4211 struct bpf_link_primer link_primer; 4212 struct bpf_raw_tp_link *link; 4213 struct bpf_raw_event_map *btp; 4214 const char *tp_name; 4215 char buf[128]; 4216 int err; 4217 4218 switch (prog->type) { 4219 case BPF_PROG_TYPE_TRACING: 4220 case BPF_PROG_TYPE_EXT: 4221 case BPF_PROG_TYPE_LSM: 4222 if (user_tp_name) 4223 /* The attach point for this category of programs 4224 * should be specified via btf_id during program load. 4225 */ 4226 return -EINVAL; 4227 if (prog->type == BPF_PROG_TYPE_TRACING && 4228 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 4229 tp_name = prog->aux->attach_func_name; 4230 break; 4231 } 4232 return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type); 4233 case BPF_PROG_TYPE_RAW_TRACEPOINT: 4234 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 4235 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 4236 return -EFAULT; 4237 buf[sizeof(buf) - 1] = 0; 4238 tp_name = buf; 4239 break; 4240 default: 4241 return -EINVAL; 4242 } 4243 4244 btp = bpf_get_raw_tracepoint(tp_name); 4245 if (!btp) 4246 return -ENOENT; 4247 4248 link = kzalloc(sizeof(*link), GFP_USER); 4249 if (!link) { 4250 err = -ENOMEM; 4251 goto out_put_btp; 4252 } 4253 bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 4254 &bpf_raw_tp_link_lops, prog, attach_type, 4255 tracepoint_is_faultable(btp->tp)); 4256 link->btp = btp; 4257 link->cookie = cookie; 4258 4259 err = bpf_link_prime(&link->link, &link_primer); 4260 if (err) { 4261 kfree(link); 4262 goto out_put_btp; 4263 } 4264 4265 err = bpf_probe_register(link->btp, link); 4266 if (err) { 4267 bpf_link_cleanup(&link_primer); 4268 goto out_put_btp; 4269 } 4270 4271 return bpf_link_settle(&link_primer); 4272 4273 out_put_btp: 4274 bpf_put_raw_tracepoint(btp); 4275 return err; 4276 } 4277 4278 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie 4279 4280 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 4281 { 4282 struct bpf_prog *prog; 4283 void __user *tp_name; 4284 __u64 cookie; 4285 int fd; 4286 4287 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 4288 return -EINVAL; 4289 4290 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 4291 if (IS_ERR(prog)) 4292 return PTR_ERR(prog); 4293 4294 tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); 4295 cookie = attr->raw_tracepoint.cookie; 4296 fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type); 4297 if (fd < 0) 4298 bpf_prog_put(prog); 4299 return fd; 4300 } 4301 4302 static enum bpf_prog_type 4303 attach_type_to_prog_type(enum bpf_attach_type attach_type) 4304 { 4305 switch (attach_type) { 4306 case BPF_CGROUP_INET_INGRESS: 4307 case BPF_CGROUP_INET_EGRESS: 4308 return BPF_PROG_TYPE_CGROUP_SKB; 4309 case BPF_CGROUP_INET_SOCK_CREATE: 4310 case BPF_CGROUP_INET_SOCK_RELEASE: 4311 case BPF_CGROUP_INET4_POST_BIND: 4312 case BPF_CGROUP_INET6_POST_BIND: 4313 return BPF_PROG_TYPE_CGROUP_SOCK; 4314 case BPF_CGROUP_INET4_BIND: 4315 case BPF_CGROUP_INET6_BIND: 4316 case BPF_CGROUP_INET4_CONNECT: 4317 case BPF_CGROUP_INET6_CONNECT: 4318 case BPF_CGROUP_UNIX_CONNECT: 4319 case BPF_CGROUP_INET4_GETPEERNAME: 4320 case BPF_CGROUP_INET6_GETPEERNAME: 4321 case BPF_CGROUP_UNIX_GETPEERNAME: 4322 case BPF_CGROUP_INET4_GETSOCKNAME: 4323 case BPF_CGROUP_INET6_GETSOCKNAME: 4324 case BPF_CGROUP_UNIX_GETSOCKNAME: 4325 case BPF_CGROUP_UDP4_SENDMSG: 4326 case BPF_CGROUP_UDP6_SENDMSG: 4327 case BPF_CGROUP_UNIX_SENDMSG: 4328 case BPF_CGROUP_UDP4_RECVMSG: 4329 case BPF_CGROUP_UDP6_RECVMSG: 4330 case BPF_CGROUP_UNIX_RECVMSG: 4331 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 4332 case BPF_CGROUP_SOCK_OPS: 4333 return BPF_PROG_TYPE_SOCK_OPS; 4334 case BPF_CGROUP_DEVICE: 4335 return BPF_PROG_TYPE_CGROUP_DEVICE; 4336 case BPF_SK_MSG_VERDICT: 4337 return BPF_PROG_TYPE_SK_MSG; 4338 case BPF_SK_SKB_STREAM_PARSER: 4339 case BPF_SK_SKB_STREAM_VERDICT: 4340 case BPF_SK_SKB_VERDICT: 4341 return BPF_PROG_TYPE_SK_SKB; 4342 case BPF_LIRC_MODE2: 4343 return BPF_PROG_TYPE_LIRC_MODE2; 4344 case BPF_FLOW_DISSECTOR: 4345 return BPF_PROG_TYPE_FLOW_DISSECTOR; 4346 case BPF_CGROUP_SYSCTL: 4347 return BPF_PROG_TYPE_CGROUP_SYSCTL; 4348 case BPF_CGROUP_GETSOCKOPT: 4349 case BPF_CGROUP_SETSOCKOPT: 4350 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 4351 case BPF_TRACE_ITER: 4352 case BPF_TRACE_RAW_TP: 4353 case BPF_TRACE_FENTRY: 4354 case BPF_TRACE_FEXIT: 4355 case BPF_MODIFY_RETURN: 4356 return BPF_PROG_TYPE_TRACING; 4357 case BPF_LSM_MAC: 4358 return BPF_PROG_TYPE_LSM; 4359 case BPF_SK_LOOKUP: 4360 return BPF_PROG_TYPE_SK_LOOKUP; 4361 case BPF_XDP: 4362 return BPF_PROG_TYPE_XDP; 4363 case BPF_LSM_CGROUP: 4364 return BPF_PROG_TYPE_LSM; 4365 case BPF_TCX_INGRESS: 4366 case BPF_TCX_EGRESS: 4367 case BPF_NETKIT_PRIMARY: 4368 case BPF_NETKIT_PEER: 4369 return BPF_PROG_TYPE_SCHED_CLS; 4370 default: 4371 return BPF_PROG_TYPE_UNSPEC; 4372 } 4373 } 4374 4375 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 4376 enum bpf_attach_type attach_type) 4377 { 4378 enum bpf_prog_type ptype; 4379 4380 switch (prog->type) { 4381 case BPF_PROG_TYPE_CGROUP_SOCK: 4382 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4383 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4384 case BPF_PROG_TYPE_SK_LOOKUP: 4385 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 4386 case BPF_PROG_TYPE_CGROUP_SKB: 4387 if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) 4388 /* cg-skb progs can be loaded by unpriv user. 4389 * check permissions at attach time. 4390 */ 4391 return -EPERM; 4392 4393 ptype = attach_type_to_prog_type(attach_type); 4394 if (prog->type != ptype) 4395 return -EINVAL; 4396 4397 return prog->enforce_expected_attach_type && 4398 prog->expected_attach_type != attach_type ? 4399 -EINVAL : 0; 4400 case BPF_PROG_TYPE_EXT: 4401 return 0; 4402 case BPF_PROG_TYPE_NETFILTER: 4403 if (attach_type != BPF_NETFILTER) 4404 return -EINVAL; 4405 return 0; 4406 case BPF_PROG_TYPE_PERF_EVENT: 4407 case BPF_PROG_TYPE_TRACEPOINT: 4408 if (attach_type != BPF_PERF_EVENT) 4409 return -EINVAL; 4410 return 0; 4411 case BPF_PROG_TYPE_KPROBE: 4412 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 4413 attach_type != BPF_TRACE_KPROBE_MULTI) 4414 return -EINVAL; 4415 if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && 4416 attach_type != BPF_TRACE_KPROBE_SESSION) 4417 return -EINVAL; 4418 if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 4419 attach_type != BPF_TRACE_UPROBE_MULTI) 4420 return -EINVAL; 4421 if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION && 4422 attach_type != BPF_TRACE_UPROBE_SESSION) 4423 return -EINVAL; 4424 if (attach_type != BPF_PERF_EVENT && 4425 attach_type != BPF_TRACE_KPROBE_MULTI && 4426 attach_type != BPF_TRACE_KPROBE_SESSION && 4427 attach_type != BPF_TRACE_UPROBE_MULTI && 4428 attach_type != BPF_TRACE_UPROBE_SESSION) 4429 return -EINVAL; 4430 return 0; 4431 case BPF_PROG_TYPE_SCHED_CLS: 4432 if (attach_type != BPF_TCX_INGRESS && 4433 attach_type != BPF_TCX_EGRESS && 4434 attach_type != BPF_NETKIT_PRIMARY && 4435 attach_type != BPF_NETKIT_PEER) 4436 return -EINVAL; 4437 return 0; 4438 default: 4439 ptype = attach_type_to_prog_type(attach_type); 4440 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 4441 return -EINVAL; 4442 return 0; 4443 } 4444 } 4445 4446 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, 4447 bool check_atype) 4448 { 4449 switch (ptype) { 4450 case BPF_PROG_TYPE_CGROUP_DEVICE: 4451 case BPF_PROG_TYPE_CGROUP_SKB: 4452 case BPF_PROG_TYPE_CGROUP_SOCK: 4453 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4454 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4455 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4456 case BPF_PROG_TYPE_SOCK_OPS: 4457 return true; 4458 case BPF_PROG_TYPE_LSM: 4459 return check_atype ? atype == BPF_LSM_CGROUP : true; 4460 default: 4461 return false; 4462 } 4463 } 4464 4465 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision 4466 4467 #define BPF_F_ATTACH_MASK_BASE \ 4468 (BPF_F_ALLOW_OVERRIDE | \ 4469 BPF_F_ALLOW_MULTI | \ 4470 BPF_F_REPLACE | \ 4471 BPF_F_PREORDER) 4472 4473 #define BPF_F_ATTACH_MASK_MPROG \ 4474 (BPF_F_REPLACE | \ 4475 BPF_F_BEFORE | \ 4476 BPF_F_AFTER | \ 4477 BPF_F_ID | \ 4478 BPF_F_LINK) 4479 4480 static int bpf_prog_attach(const union bpf_attr *attr) 4481 { 4482 enum bpf_prog_type ptype; 4483 struct bpf_prog *prog; 4484 int ret; 4485 4486 if (CHECK_ATTR(BPF_PROG_ATTACH)) 4487 return -EINVAL; 4488 4489 ptype = attach_type_to_prog_type(attr->attach_type); 4490 if (ptype == BPF_PROG_TYPE_UNSPEC) 4491 return -EINVAL; 4492 if (bpf_mprog_supported(ptype)) { 4493 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4494 return -EINVAL; 4495 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4496 if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) 4497 return -EINVAL; 4498 } else { 4499 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) 4500 return -EINVAL; 4501 if (attr->relative_fd || 4502 attr->expected_revision) 4503 return -EINVAL; 4504 } 4505 4506 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4507 if (IS_ERR(prog)) 4508 return PTR_ERR(prog); 4509 4510 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 4511 bpf_prog_put(prog); 4512 return -EINVAL; 4513 } 4514 4515 if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { 4516 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 4517 goto out; 4518 } 4519 4520 switch (ptype) { 4521 case BPF_PROG_TYPE_SK_SKB: 4522 case BPF_PROG_TYPE_SK_MSG: 4523 ret = sock_map_get_from_fd(attr, prog); 4524 break; 4525 case BPF_PROG_TYPE_LIRC_MODE2: 4526 ret = lirc_prog_attach(attr, prog); 4527 break; 4528 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4529 ret = netns_bpf_prog_attach(attr, prog); 4530 break; 4531 case BPF_PROG_TYPE_SCHED_CLS: 4532 if (attr->attach_type == BPF_TCX_INGRESS || 4533 attr->attach_type == BPF_TCX_EGRESS) 4534 ret = tcx_prog_attach(attr, prog); 4535 else 4536 ret = netkit_prog_attach(attr, prog); 4537 break; 4538 default: 4539 ret = -EINVAL; 4540 } 4541 out: 4542 if (ret) 4543 bpf_prog_put(prog); 4544 return ret; 4545 } 4546 4547 #define BPF_PROG_DETACH_LAST_FIELD expected_revision 4548 4549 static int bpf_prog_detach(const union bpf_attr *attr) 4550 { 4551 struct bpf_prog *prog = NULL; 4552 enum bpf_prog_type ptype; 4553 int ret; 4554 4555 if (CHECK_ATTR(BPF_PROG_DETACH)) 4556 return -EINVAL; 4557 4558 ptype = attach_type_to_prog_type(attr->attach_type); 4559 if (bpf_mprog_supported(ptype)) { 4560 if (ptype == BPF_PROG_TYPE_UNSPEC) 4561 return -EINVAL; 4562 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4563 return -EINVAL; 4564 if (attr->attach_bpf_fd) { 4565 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4566 if (IS_ERR(prog)) 4567 return PTR_ERR(prog); 4568 } 4569 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4570 if (attr->attach_flags || attr->relative_fd) 4571 return -EINVAL; 4572 } else if (attr->attach_flags || 4573 attr->relative_fd || 4574 attr->expected_revision) { 4575 return -EINVAL; 4576 } 4577 4578 switch (ptype) { 4579 case BPF_PROG_TYPE_SK_MSG: 4580 case BPF_PROG_TYPE_SK_SKB: 4581 ret = sock_map_prog_detach(attr, ptype); 4582 break; 4583 case BPF_PROG_TYPE_LIRC_MODE2: 4584 ret = lirc_prog_detach(attr); 4585 break; 4586 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4587 ret = netns_bpf_prog_detach(attr, ptype); 4588 break; 4589 case BPF_PROG_TYPE_CGROUP_DEVICE: 4590 case BPF_PROG_TYPE_CGROUP_SKB: 4591 case BPF_PROG_TYPE_CGROUP_SOCK: 4592 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4593 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4594 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4595 case BPF_PROG_TYPE_SOCK_OPS: 4596 case BPF_PROG_TYPE_LSM: 4597 ret = cgroup_bpf_prog_detach(attr, ptype); 4598 break; 4599 case BPF_PROG_TYPE_SCHED_CLS: 4600 if (attr->attach_type == BPF_TCX_INGRESS || 4601 attr->attach_type == BPF_TCX_EGRESS) 4602 ret = tcx_prog_detach(attr, prog); 4603 else 4604 ret = netkit_prog_detach(attr, prog); 4605 break; 4606 default: 4607 ret = -EINVAL; 4608 } 4609 4610 if (prog) 4611 bpf_prog_put(prog); 4612 return ret; 4613 } 4614 4615 #define BPF_PROG_QUERY_LAST_FIELD query.revision 4616 4617 static int bpf_prog_query(const union bpf_attr *attr, 4618 union bpf_attr __user *uattr) 4619 { 4620 if (!bpf_net_capable()) 4621 return -EPERM; 4622 if (CHECK_ATTR(BPF_PROG_QUERY)) 4623 return -EINVAL; 4624 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 4625 return -EINVAL; 4626 4627 switch (attr->query.attach_type) { 4628 case BPF_CGROUP_INET_INGRESS: 4629 case BPF_CGROUP_INET_EGRESS: 4630 case BPF_CGROUP_INET_SOCK_CREATE: 4631 case BPF_CGROUP_INET_SOCK_RELEASE: 4632 case BPF_CGROUP_INET4_BIND: 4633 case BPF_CGROUP_INET6_BIND: 4634 case BPF_CGROUP_INET4_POST_BIND: 4635 case BPF_CGROUP_INET6_POST_BIND: 4636 case BPF_CGROUP_INET4_CONNECT: 4637 case BPF_CGROUP_INET6_CONNECT: 4638 case BPF_CGROUP_UNIX_CONNECT: 4639 case BPF_CGROUP_INET4_GETPEERNAME: 4640 case BPF_CGROUP_INET6_GETPEERNAME: 4641 case BPF_CGROUP_UNIX_GETPEERNAME: 4642 case BPF_CGROUP_INET4_GETSOCKNAME: 4643 case BPF_CGROUP_INET6_GETSOCKNAME: 4644 case BPF_CGROUP_UNIX_GETSOCKNAME: 4645 case BPF_CGROUP_UDP4_SENDMSG: 4646 case BPF_CGROUP_UDP6_SENDMSG: 4647 case BPF_CGROUP_UNIX_SENDMSG: 4648 case BPF_CGROUP_UDP4_RECVMSG: 4649 case BPF_CGROUP_UDP6_RECVMSG: 4650 case BPF_CGROUP_UNIX_RECVMSG: 4651 case BPF_CGROUP_SOCK_OPS: 4652 case BPF_CGROUP_DEVICE: 4653 case BPF_CGROUP_SYSCTL: 4654 case BPF_CGROUP_GETSOCKOPT: 4655 case BPF_CGROUP_SETSOCKOPT: 4656 case BPF_LSM_CGROUP: 4657 return cgroup_bpf_prog_query(attr, uattr); 4658 case BPF_LIRC_MODE2: 4659 return lirc_prog_query(attr, uattr); 4660 case BPF_FLOW_DISSECTOR: 4661 case BPF_SK_LOOKUP: 4662 return netns_bpf_prog_query(attr, uattr); 4663 case BPF_SK_SKB_STREAM_PARSER: 4664 case BPF_SK_SKB_STREAM_VERDICT: 4665 case BPF_SK_MSG_VERDICT: 4666 case BPF_SK_SKB_VERDICT: 4667 return sock_map_bpf_prog_query(attr, uattr); 4668 case BPF_TCX_INGRESS: 4669 case BPF_TCX_EGRESS: 4670 return tcx_prog_query(attr, uattr); 4671 case BPF_NETKIT_PRIMARY: 4672 case BPF_NETKIT_PEER: 4673 return netkit_prog_query(attr, uattr); 4674 default: 4675 return -EINVAL; 4676 } 4677 } 4678 4679 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 4680 4681 static int bpf_prog_test_run(const union bpf_attr *attr, 4682 union bpf_attr __user *uattr) 4683 { 4684 struct bpf_prog *prog; 4685 int ret = -ENOTSUPP; 4686 4687 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 4688 return -EINVAL; 4689 4690 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 4691 (!attr->test.ctx_size_in && attr->test.ctx_in)) 4692 return -EINVAL; 4693 4694 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 4695 (!attr->test.ctx_size_out && attr->test.ctx_out)) 4696 return -EINVAL; 4697 4698 prog = bpf_prog_get(attr->test.prog_fd); 4699 if (IS_ERR(prog)) 4700 return PTR_ERR(prog); 4701 4702 if (prog->aux->ops->test_run) 4703 ret = prog->aux->ops->test_run(prog, attr, uattr); 4704 4705 bpf_prog_put(prog); 4706 return ret; 4707 } 4708 4709 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 4710 4711 static int bpf_obj_get_next_id(const union bpf_attr *attr, 4712 union bpf_attr __user *uattr, 4713 struct idr *idr, 4714 spinlock_t *lock) 4715 { 4716 u32 next_id = attr->start_id; 4717 int err = 0; 4718 4719 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 4720 return -EINVAL; 4721 4722 if (!capable(CAP_SYS_ADMIN)) 4723 return -EPERM; 4724 4725 next_id++; 4726 spin_lock_bh(lock); 4727 if (!idr_get_next(idr, &next_id)) 4728 err = -ENOENT; 4729 spin_unlock_bh(lock); 4730 4731 if (!err) 4732 err = put_user(next_id, &uattr->next_id); 4733 4734 return err; 4735 } 4736 4737 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 4738 { 4739 struct bpf_map *map; 4740 4741 spin_lock_bh(&map_idr_lock); 4742 again: 4743 map = idr_get_next(&map_idr, id); 4744 if (map) { 4745 map = __bpf_map_inc_not_zero(map, false); 4746 if (IS_ERR(map)) { 4747 (*id)++; 4748 goto again; 4749 } 4750 } 4751 spin_unlock_bh(&map_idr_lock); 4752 4753 return map; 4754 } 4755 4756 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 4757 { 4758 struct bpf_prog *prog; 4759 4760 spin_lock_bh(&prog_idr_lock); 4761 again: 4762 prog = idr_get_next(&prog_idr, id); 4763 if (prog) { 4764 prog = bpf_prog_inc_not_zero(prog); 4765 if (IS_ERR(prog)) { 4766 (*id)++; 4767 goto again; 4768 } 4769 } 4770 spin_unlock_bh(&prog_idr_lock); 4771 4772 return prog; 4773 } 4774 4775 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 4776 4777 struct bpf_prog *bpf_prog_by_id(u32 id) 4778 { 4779 struct bpf_prog *prog; 4780 4781 if (!id) 4782 return ERR_PTR(-ENOENT); 4783 4784 spin_lock_bh(&prog_idr_lock); 4785 prog = idr_find(&prog_idr, id); 4786 if (prog) 4787 prog = bpf_prog_inc_not_zero(prog); 4788 else 4789 prog = ERR_PTR(-ENOENT); 4790 spin_unlock_bh(&prog_idr_lock); 4791 return prog; 4792 } 4793 4794 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 4795 { 4796 struct bpf_prog *prog; 4797 u32 id = attr->prog_id; 4798 int fd; 4799 4800 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 4801 return -EINVAL; 4802 4803 if (!capable(CAP_SYS_ADMIN)) 4804 return -EPERM; 4805 4806 prog = bpf_prog_by_id(id); 4807 if (IS_ERR(prog)) 4808 return PTR_ERR(prog); 4809 4810 fd = bpf_prog_new_fd(prog); 4811 if (fd < 0) 4812 bpf_prog_put(prog); 4813 4814 return fd; 4815 } 4816 4817 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 4818 4819 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 4820 { 4821 struct bpf_map *map; 4822 u32 id = attr->map_id; 4823 int f_flags; 4824 int fd; 4825 4826 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 4827 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 4828 return -EINVAL; 4829 4830 if (!capable(CAP_SYS_ADMIN)) 4831 return -EPERM; 4832 4833 f_flags = bpf_get_file_flag(attr->open_flags); 4834 if (f_flags < 0) 4835 return f_flags; 4836 4837 spin_lock_bh(&map_idr_lock); 4838 map = idr_find(&map_idr, id); 4839 if (map) 4840 map = __bpf_map_inc_not_zero(map, true); 4841 else 4842 map = ERR_PTR(-ENOENT); 4843 spin_unlock_bh(&map_idr_lock); 4844 4845 if (IS_ERR(map)) 4846 return PTR_ERR(map); 4847 4848 fd = bpf_map_new_fd(map, f_flags); 4849 if (fd < 0) 4850 bpf_map_put_with_uref(map); 4851 4852 return fd; 4853 } 4854 4855 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 4856 unsigned long addr, u32 *off, 4857 u32 *type) 4858 { 4859 const struct bpf_map *map; 4860 int i; 4861 4862 mutex_lock(&prog->aux->used_maps_mutex); 4863 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 4864 map = prog->aux->used_maps[i]; 4865 if (map == (void *)addr) { 4866 *type = BPF_PSEUDO_MAP_FD; 4867 goto out; 4868 } 4869 if (!map->ops->map_direct_value_meta) 4870 continue; 4871 if (!map->ops->map_direct_value_meta(map, addr, off)) { 4872 *type = BPF_PSEUDO_MAP_VALUE; 4873 goto out; 4874 } 4875 } 4876 map = NULL; 4877 4878 out: 4879 mutex_unlock(&prog->aux->used_maps_mutex); 4880 return map; 4881 } 4882 4883 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 4884 const struct cred *f_cred) 4885 { 4886 const struct bpf_map *map; 4887 struct bpf_insn *insns; 4888 u32 off, type; 4889 u64 imm; 4890 u8 code; 4891 int i; 4892 4893 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 4894 GFP_USER); 4895 if (!insns) 4896 return insns; 4897 4898 for (i = 0; i < prog->len; i++) { 4899 code = insns[i].code; 4900 4901 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 4902 insns[i].code = BPF_JMP | BPF_CALL; 4903 insns[i].imm = BPF_FUNC_tail_call; 4904 /* fall-through */ 4905 } 4906 if (code == (BPF_JMP | BPF_CALL) || 4907 code == (BPF_JMP | BPF_CALL_ARGS)) { 4908 if (code == (BPF_JMP | BPF_CALL_ARGS)) 4909 insns[i].code = BPF_JMP | BPF_CALL; 4910 if (!bpf_dump_raw_ok(f_cred)) 4911 insns[i].imm = 0; 4912 continue; 4913 } 4914 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 4915 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 4916 continue; 4917 } 4918 4919 if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX || 4920 BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) { 4921 insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM; 4922 continue; 4923 } 4924 4925 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 4926 continue; 4927 4928 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 4929 map = bpf_map_from_imm(prog, imm, &off, &type); 4930 if (map) { 4931 insns[i].src_reg = type; 4932 insns[i].imm = map->id; 4933 insns[i + 1].imm = off; 4934 continue; 4935 } 4936 } 4937 4938 return insns; 4939 } 4940 4941 static int set_info_rec_size(struct bpf_prog_info *info) 4942 { 4943 /* 4944 * Ensure info.*_rec_size is the same as kernel expected size 4945 * 4946 * or 4947 * 4948 * Only allow zero *_rec_size if both _rec_size and _cnt are 4949 * zero. In this case, the kernel will set the expected 4950 * _rec_size back to the info. 4951 */ 4952 4953 if ((info->nr_func_info || info->func_info_rec_size) && 4954 info->func_info_rec_size != sizeof(struct bpf_func_info)) 4955 return -EINVAL; 4956 4957 if ((info->nr_line_info || info->line_info_rec_size) && 4958 info->line_info_rec_size != sizeof(struct bpf_line_info)) 4959 return -EINVAL; 4960 4961 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 4962 info->jited_line_info_rec_size != sizeof(__u64)) 4963 return -EINVAL; 4964 4965 info->func_info_rec_size = sizeof(struct bpf_func_info); 4966 info->line_info_rec_size = sizeof(struct bpf_line_info); 4967 info->jited_line_info_rec_size = sizeof(__u64); 4968 4969 return 0; 4970 } 4971 4972 static int bpf_prog_get_info_by_fd(struct file *file, 4973 struct bpf_prog *prog, 4974 const union bpf_attr *attr, 4975 union bpf_attr __user *uattr) 4976 { 4977 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4978 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 4979 struct bpf_prog_info info; 4980 u32 info_len = attr->info.info_len; 4981 struct bpf_prog_kstats stats; 4982 char __user *uinsns; 4983 u32 ulen; 4984 int err; 4985 4986 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4987 if (err) 4988 return err; 4989 info_len = min_t(u32, sizeof(info), info_len); 4990 4991 memset(&info, 0, sizeof(info)); 4992 if (copy_from_user(&info, uinfo, info_len)) 4993 return -EFAULT; 4994 4995 info.type = prog->type; 4996 info.id = prog->aux->id; 4997 info.load_time = prog->aux->load_time; 4998 info.created_by_uid = from_kuid_munged(current_user_ns(), 4999 prog->aux->user->uid); 5000 info.gpl_compatible = prog->gpl_compatible; 5001 5002 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 5003 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 5004 5005 mutex_lock(&prog->aux->used_maps_mutex); 5006 ulen = info.nr_map_ids; 5007 info.nr_map_ids = prog->aux->used_map_cnt; 5008 ulen = min_t(u32, info.nr_map_ids, ulen); 5009 if (ulen) { 5010 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 5011 u32 i; 5012 5013 for (i = 0; i < ulen; i++) 5014 if (put_user(prog->aux->used_maps[i]->id, 5015 &user_map_ids[i])) { 5016 mutex_unlock(&prog->aux->used_maps_mutex); 5017 return -EFAULT; 5018 } 5019 } 5020 mutex_unlock(&prog->aux->used_maps_mutex); 5021 5022 err = set_info_rec_size(&info); 5023 if (err) 5024 return err; 5025 5026 bpf_prog_get_stats(prog, &stats); 5027 info.run_time_ns = stats.nsecs; 5028 info.run_cnt = stats.cnt; 5029 info.recursion_misses = stats.misses; 5030 5031 info.verified_insns = prog->aux->verified_insns; 5032 if (prog->aux->btf) 5033 info.btf_id = btf_obj_id(prog->aux->btf); 5034 5035 if (!bpf_capable()) { 5036 info.jited_prog_len = 0; 5037 info.xlated_prog_len = 0; 5038 info.nr_jited_ksyms = 0; 5039 info.nr_jited_func_lens = 0; 5040 info.nr_func_info = 0; 5041 info.nr_line_info = 0; 5042 info.nr_jited_line_info = 0; 5043 goto done; 5044 } 5045 5046 ulen = info.xlated_prog_len; 5047 info.xlated_prog_len = bpf_prog_insn_size(prog); 5048 if (info.xlated_prog_len && ulen) { 5049 struct bpf_insn *insns_sanitized; 5050 bool fault; 5051 5052 if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) { 5053 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 5054 if (!insns_sanitized) 5055 return -ENOMEM; 5056 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 5057 ulen = min_t(u32, info.xlated_prog_len, ulen); 5058 fault = copy_to_user(uinsns, insns_sanitized, ulen); 5059 kfree(insns_sanitized); 5060 if (fault) 5061 return -EFAULT; 5062 } else { 5063 info.xlated_prog_insns = 0; 5064 } 5065 } 5066 5067 if (bpf_prog_is_offloaded(prog->aux)) { 5068 err = bpf_prog_offload_info_fill(&info, prog); 5069 if (err) 5070 return err; 5071 goto done; 5072 } 5073 5074 /* NOTE: the following code is supposed to be skipped for offload. 5075 * bpf_prog_offload_info_fill() is the place to fill similar fields 5076 * for offload. 5077 */ 5078 ulen = info.jited_prog_len; 5079 if (prog->aux->func_cnt) { 5080 u32 i; 5081 5082 info.jited_prog_len = 0; 5083 for (i = 0; i < prog->aux->func_cnt; i++) 5084 info.jited_prog_len += prog->aux->func[i]->jited_len; 5085 } else { 5086 info.jited_prog_len = prog->jited_len; 5087 } 5088 5089 if (info.jited_prog_len && ulen) { 5090 if (bpf_dump_raw_ok(file->f_cred)) { 5091 uinsns = u64_to_user_ptr(info.jited_prog_insns); 5092 ulen = min_t(u32, info.jited_prog_len, ulen); 5093 5094 /* for multi-function programs, copy the JITed 5095 * instructions for all the functions 5096 */ 5097 if (prog->aux->func_cnt) { 5098 u32 len, free, i; 5099 u8 *img; 5100 5101 free = ulen; 5102 for (i = 0; i < prog->aux->func_cnt; i++) { 5103 len = prog->aux->func[i]->jited_len; 5104 len = min_t(u32, len, free); 5105 img = (u8 *) prog->aux->func[i]->bpf_func; 5106 if (copy_to_user(uinsns, img, len)) 5107 return -EFAULT; 5108 uinsns += len; 5109 free -= len; 5110 if (!free) 5111 break; 5112 } 5113 } else { 5114 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 5115 return -EFAULT; 5116 } 5117 } else { 5118 info.jited_prog_insns = 0; 5119 } 5120 } 5121 5122 ulen = info.nr_jited_ksyms; 5123 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 5124 if (ulen) { 5125 if (bpf_dump_raw_ok(file->f_cred)) { 5126 unsigned long ksym_addr; 5127 u64 __user *user_ksyms; 5128 u32 i; 5129 5130 /* copy the address of the kernel symbol 5131 * corresponding to each function 5132 */ 5133 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 5134 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 5135 if (prog->aux->func_cnt) { 5136 for (i = 0; i < ulen; i++) { 5137 ksym_addr = (unsigned long) 5138 prog->aux->func[i]->bpf_func; 5139 if (put_user((u64) ksym_addr, 5140 &user_ksyms[i])) 5141 return -EFAULT; 5142 } 5143 } else { 5144 ksym_addr = (unsigned long) prog->bpf_func; 5145 if (put_user((u64) ksym_addr, &user_ksyms[0])) 5146 return -EFAULT; 5147 } 5148 } else { 5149 info.jited_ksyms = 0; 5150 } 5151 } 5152 5153 ulen = info.nr_jited_func_lens; 5154 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 5155 if (ulen) { 5156 if (bpf_dump_raw_ok(file->f_cred)) { 5157 u32 __user *user_lens; 5158 u32 func_len, i; 5159 5160 /* copy the JITed image lengths for each function */ 5161 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 5162 user_lens = u64_to_user_ptr(info.jited_func_lens); 5163 if (prog->aux->func_cnt) { 5164 for (i = 0; i < ulen; i++) { 5165 func_len = 5166 prog->aux->func[i]->jited_len; 5167 if (put_user(func_len, &user_lens[i])) 5168 return -EFAULT; 5169 } 5170 } else { 5171 func_len = prog->jited_len; 5172 if (put_user(func_len, &user_lens[0])) 5173 return -EFAULT; 5174 } 5175 } else { 5176 info.jited_func_lens = 0; 5177 } 5178 } 5179 5180 info.attach_btf_id = prog->aux->attach_btf_id; 5181 if (attach_btf) 5182 info.attach_btf_obj_id = btf_obj_id(attach_btf); 5183 5184 ulen = info.nr_func_info; 5185 info.nr_func_info = prog->aux->func_info_cnt; 5186 if (info.nr_func_info && ulen) { 5187 char __user *user_finfo; 5188 5189 user_finfo = u64_to_user_ptr(info.func_info); 5190 ulen = min_t(u32, info.nr_func_info, ulen); 5191 if (copy_to_user(user_finfo, prog->aux->func_info, 5192 info.func_info_rec_size * ulen)) 5193 return -EFAULT; 5194 } 5195 5196 ulen = info.nr_line_info; 5197 info.nr_line_info = prog->aux->nr_linfo; 5198 if (info.nr_line_info && ulen) { 5199 __u8 __user *user_linfo; 5200 5201 user_linfo = u64_to_user_ptr(info.line_info); 5202 ulen = min_t(u32, info.nr_line_info, ulen); 5203 if (copy_to_user(user_linfo, prog->aux->linfo, 5204 info.line_info_rec_size * ulen)) 5205 return -EFAULT; 5206 } 5207 5208 ulen = info.nr_jited_line_info; 5209 if (prog->aux->jited_linfo) 5210 info.nr_jited_line_info = prog->aux->nr_linfo; 5211 else 5212 info.nr_jited_line_info = 0; 5213 if (info.nr_jited_line_info && ulen) { 5214 if (bpf_dump_raw_ok(file->f_cred)) { 5215 unsigned long line_addr; 5216 __u64 __user *user_linfo; 5217 u32 i; 5218 5219 user_linfo = u64_to_user_ptr(info.jited_line_info); 5220 ulen = min_t(u32, info.nr_jited_line_info, ulen); 5221 for (i = 0; i < ulen; i++) { 5222 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 5223 if (put_user((__u64)line_addr, &user_linfo[i])) 5224 return -EFAULT; 5225 } 5226 } else { 5227 info.jited_line_info = 0; 5228 } 5229 } 5230 5231 ulen = info.nr_prog_tags; 5232 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 5233 if (ulen) { 5234 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 5235 u32 i; 5236 5237 user_prog_tags = u64_to_user_ptr(info.prog_tags); 5238 ulen = min_t(u32, info.nr_prog_tags, ulen); 5239 if (prog->aux->func_cnt) { 5240 for (i = 0; i < ulen; i++) { 5241 if (copy_to_user(user_prog_tags[i], 5242 prog->aux->func[i]->tag, 5243 BPF_TAG_SIZE)) 5244 return -EFAULT; 5245 } 5246 } else { 5247 if (copy_to_user(user_prog_tags[0], 5248 prog->tag, BPF_TAG_SIZE)) 5249 return -EFAULT; 5250 } 5251 } 5252 5253 done: 5254 if (copy_to_user(uinfo, &info, info_len) || 5255 put_user(info_len, &uattr->info.info_len)) 5256 return -EFAULT; 5257 5258 return 0; 5259 } 5260 5261 static int bpf_map_get_info_by_fd(struct file *file, 5262 struct bpf_map *map, 5263 const union bpf_attr *attr, 5264 union bpf_attr __user *uattr) 5265 { 5266 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5267 struct bpf_map_info info; 5268 u32 info_len = attr->info.info_len; 5269 int err; 5270 5271 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5272 if (err) 5273 return err; 5274 info_len = min_t(u32, sizeof(info), info_len); 5275 5276 memset(&info, 0, sizeof(info)); 5277 if (copy_from_user(&info, uinfo, info_len)) 5278 return -EFAULT; 5279 5280 info.type = map->map_type; 5281 info.id = map->id; 5282 info.key_size = map->key_size; 5283 info.value_size = map->value_size; 5284 info.max_entries = map->max_entries; 5285 info.map_flags = map->map_flags; 5286 info.map_extra = map->map_extra; 5287 memcpy(info.name, map->name, sizeof(map->name)); 5288 5289 if (map->btf) { 5290 info.btf_id = btf_obj_id(map->btf); 5291 info.btf_key_type_id = map->btf_key_type_id; 5292 info.btf_value_type_id = map->btf_value_type_id; 5293 } 5294 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5295 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) 5296 bpf_map_struct_ops_info_fill(&info, map); 5297 5298 if (bpf_map_is_offloaded(map)) { 5299 err = bpf_map_offload_info_fill(&info, map); 5300 if (err) 5301 return err; 5302 } 5303 5304 if (info.hash) { 5305 char __user *uhash = u64_to_user_ptr(info.hash); 5306 5307 if (!map->ops->map_get_hash) 5308 return -EINVAL; 5309 5310 if (info.hash_size != SHA256_DIGEST_SIZE) 5311 return -EINVAL; 5312 5313 err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); 5314 if (err != 0) 5315 return err; 5316 5317 if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) 5318 return -EFAULT; 5319 } else if (info.hash_size) { 5320 return -EINVAL; 5321 } 5322 5323 if (copy_to_user(uinfo, &info, info_len) || 5324 put_user(info_len, &uattr->info.info_len)) 5325 return -EFAULT; 5326 5327 return 0; 5328 } 5329 5330 static int bpf_btf_get_info_by_fd(struct file *file, 5331 struct btf *btf, 5332 const union bpf_attr *attr, 5333 union bpf_attr __user *uattr) 5334 { 5335 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5336 u32 info_len = attr->info.info_len; 5337 int err; 5338 5339 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5340 if (err) 5341 return err; 5342 5343 return btf_get_info_by_fd(btf, attr, uattr); 5344 } 5345 5346 static int bpf_link_get_info_by_fd(struct file *file, 5347 struct bpf_link *link, 5348 const union bpf_attr *attr, 5349 union bpf_attr __user *uattr) 5350 { 5351 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5352 struct bpf_link_info info; 5353 u32 info_len = attr->info.info_len; 5354 int err; 5355 5356 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5357 if (err) 5358 return err; 5359 info_len = min_t(u32, sizeof(info), info_len); 5360 5361 memset(&info, 0, sizeof(info)); 5362 if (copy_from_user(&info, uinfo, info_len)) 5363 return -EFAULT; 5364 5365 info.type = link->type; 5366 info.id = link->id; 5367 if (link->prog) 5368 info.prog_id = link->prog->aux->id; 5369 5370 if (link->ops->fill_link_info) { 5371 err = link->ops->fill_link_info(link, &info); 5372 if (err) 5373 return err; 5374 } 5375 5376 if (copy_to_user(uinfo, &info, info_len) || 5377 put_user(info_len, &uattr->info.info_len)) 5378 return -EFAULT; 5379 5380 return 0; 5381 } 5382 5383 5384 static int token_get_info_by_fd(struct file *file, 5385 struct bpf_token *token, 5386 const union bpf_attr *attr, 5387 union bpf_attr __user *uattr) 5388 { 5389 struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5390 u32 info_len = attr->info.info_len; 5391 int err; 5392 5393 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5394 if (err) 5395 return err; 5396 return bpf_token_get_info_by_fd(token, attr, uattr); 5397 } 5398 5399 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 5400 5401 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 5402 union bpf_attr __user *uattr) 5403 { 5404 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 5405 return -EINVAL; 5406 5407 CLASS(fd, f)(attr->info.bpf_fd); 5408 if (fd_empty(f)) 5409 return -EBADFD; 5410 5411 if (fd_file(f)->f_op == &bpf_prog_fops) 5412 return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5413 uattr); 5414 else if (fd_file(f)->f_op == &bpf_map_fops) 5415 return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5416 uattr); 5417 else if (fd_file(f)->f_op == &btf_fops) 5418 return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); 5419 else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) 5420 return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5421 attr, uattr); 5422 else if (fd_file(f)->f_op == &bpf_token_fops) 5423 return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5424 attr, uattr); 5425 return -EINVAL; 5426 } 5427 5428 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd 5429 5430 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 5431 { 5432 struct bpf_token *token = NULL; 5433 5434 if (CHECK_ATTR(BPF_BTF_LOAD)) 5435 return -EINVAL; 5436 5437 if (attr->btf_flags & ~BPF_F_TOKEN_FD) 5438 return -EINVAL; 5439 5440 if (attr->btf_flags & BPF_F_TOKEN_FD) { 5441 token = bpf_token_get_from_fd(attr->btf_token_fd); 5442 if (IS_ERR(token)) 5443 return PTR_ERR(token); 5444 if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { 5445 bpf_token_put(token); 5446 token = NULL; 5447 } 5448 } 5449 5450 if (!bpf_token_capable(token, CAP_BPF)) { 5451 bpf_token_put(token); 5452 return -EPERM; 5453 } 5454 5455 bpf_token_put(token); 5456 5457 return btf_new_fd(attr, uattr, uattr_size); 5458 } 5459 5460 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5461 5462 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5463 { 5464 struct bpf_token *token = NULL; 5465 5466 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5467 return -EINVAL; 5468 5469 if (attr->open_flags & ~BPF_F_TOKEN_FD) 5470 return -EINVAL; 5471 5472 if (attr->open_flags & BPF_F_TOKEN_FD) { 5473 token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5474 if (IS_ERR(token)) 5475 return PTR_ERR(token); 5476 if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5477 bpf_token_put(token); 5478 token = NULL; 5479 } 5480 } 5481 5482 if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5483 bpf_token_put(token); 5484 return -EPERM; 5485 } 5486 5487 bpf_token_put(token); 5488 5489 return btf_get_fd_by_id(attr->btf_id); 5490 } 5491 5492 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 5493 union bpf_attr __user *uattr, 5494 u32 prog_id, u32 fd_type, 5495 const char *buf, u64 probe_offset, 5496 u64 probe_addr) 5497 { 5498 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 5499 u32 len = buf ? strlen(buf) : 0, input_len; 5500 int err = 0; 5501 5502 if (put_user(len, &uattr->task_fd_query.buf_len)) 5503 return -EFAULT; 5504 input_len = attr->task_fd_query.buf_len; 5505 if (input_len && ubuf) { 5506 if (!len) { 5507 /* nothing to copy, just make ubuf NULL terminated */ 5508 char zero = '\0'; 5509 5510 if (put_user(zero, ubuf)) 5511 return -EFAULT; 5512 } else { 5513 err = bpf_copy_to_user(ubuf, buf, input_len, len); 5514 if (err == -EFAULT) 5515 return err; 5516 } 5517 } 5518 5519 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 5520 put_user(fd_type, &uattr->task_fd_query.fd_type) || 5521 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 5522 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 5523 return -EFAULT; 5524 5525 return err; 5526 } 5527 5528 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 5529 5530 static int bpf_task_fd_query(const union bpf_attr *attr, 5531 union bpf_attr __user *uattr) 5532 { 5533 pid_t pid = attr->task_fd_query.pid; 5534 u32 fd = attr->task_fd_query.fd; 5535 const struct perf_event *event; 5536 struct task_struct *task; 5537 struct file *file; 5538 int err; 5539 5540 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 5541 return -EINVAL; 5542 5543 if (!capable(CAP_SYS_ADMIN)) 5544 return -EPERM; 5545 5546 if (attr->task_fd_query.flags != 0) 5547 return -EINVAL; 5548 5549 rcu_read_lock(); 5550 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 5551 rcu_read_unlock(); 5552 if (!task) 5553 return -ENOENT; 5554 5555 err = 0; 5556 file = fget_task(task, fd); 5557 put_task_struct(task); 5558 if (!file) 5559 return -EBADF; 5560 5561 if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { 5562 struct bpf_link *link = file->private_data; 5563 5564 if (link->ops == &bpf_raw_tp_link_lops) { 5565 struct bpf_raw_tp_link *raw_tp = 5566 container_of(link, struct bpf_raw_tp_link, link); 5567 struct bpf_raw_event_map *btp = raw_tp->btp; 5568 5569 err = bpf_task_fd_query_copy(attr, uattr, 5570 raw_tp->link.prog->aux->id, 5571 BPF_FD_TYPE_RAW_TRACEPOINT, 5572 btp->tp->name, 0, 0); 5573 goto put_file; 5574 } 5575 goto out_not_supp; 5576 } 5577 5578 event = perf_get_event(file); 5579 if (!IS_ERR(event)) { 5580 u64 probe_offset, probe_addr; 5581 u32 prog_id, fd_type; 5582 const char *buf; 5583 5584 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 5585 &buf, &probe_offset, 5586 &probe_addr, NULL); 5587 if (!err) 5588 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 5589 fd_type, buf, 5590 probe_offset, 5591 probe_addr); 5592 goto put_file; 5593 } 5594 5595 out_not_supp: 5596 err = -ENOTSUPP; 5597 put_file: 5598 fput(file); 5599 return err; 5600 } 5601 5602 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 5603 5604 #define BPF_DO_BATCH(fn, ...) \ 5605 do { \ 5606 if (!fn) { \ 5607 err = -ENOTSUPP; \ 5608 goto err_put; \ 5609 } \ 5610 err = fn(__VA_ARGS__); \ 5611 } while (0) 5612 5613 static int bpf_map_do_batch(const union bpf_attr *attr, 5614 union bpf_attr __user *uattr, 5615 int cmd) 5616 { 5617 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 5618 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 5619 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 5620 struct bpf_map *map; 5621 int err; 5622 5623 if (CHECK_ATTR(BPF_MAP_BATCH)) 5624 return -EINVAL; 5625 5626 CLASS(fd, f)(attr->batch.map_fd); 5627 5628 map = __bpf_map_get(f); 5629 if (IS_ERR(map)) 5630 return PTR_ERR(map); 5631 if (has_write) 5632 bpf_map_write_active_inc(map); 5633 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 5634 err = -EPERM; 5635 goto err_put; 5636 } 5637 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 5638 err = -EPERM; 5639 goto err_put; 5640 } 5641 5642 if (cmd == BPF_MAP_LOOKUP_BATCH) 5643 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 5644 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 5645 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 5646 else if (cmd == BPF_MAP_UPDATE_BATCH) 5647 BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr); 5648 else 5649 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 5650 err_put: 5651 if (has_write) { 5652 maybe_wait_bpf_programs(map); 5653 bpf_map_write_active_dec(map); 5654 } 5655 return err; 5656 } 5657 5658 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 5659 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 5660 { 5661 struct bpf_prog *prog; 5662 int ret; 5663 5664 if (CHECK_ATTR(BPF_LINK_CREATE)) 5665 return -EINVAL; 5666 5667 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 5668 return bpf_struct_ops_link_create(attr); 5669 5670 prog = bpf_prog_get(attr->link_create.prog_fd); 5671 if (IS_ERR(prog)) 5672 return PTR_ERR(prog); 5673 5674 ret = bpf_prog_attach_check_attach_type(prog, 5675 attr->link_create.attach_type); 5676 if (ret) 5677 goto out; 5678 5679 switch (prog->type) { 5680 case BPF_PROG_TYPE_CGROUP_SKB: 5681 case BPF_PROG_TYPE_CGROUP_SOCK: 5682 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5683 case BPF_PROG_TYPE_SOCK_OPS: 5684 case BPF_PROG_TYPE_CGROUP_DEVICE: 5685 case BPF_PROG_TYPE_CGROUP_SYSCTL: 5686 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 5687 ret = cgroup_bpf_link_attach(attr, prog); 5688 break; 5689 case BPF_PROG_TYPE_EXT: 5690 ret = bpf_tracing_prog_attach(prog, 5691 attr->link_create.target_fd, 5692 attr->link_create.target_btf_id, 5693 attr->link_create.tracing.cookie, 5694 attr->link_create.attach_type); 5695 break; 5696 case BPF_PROG_TYPE_LSM: 5697 case BPF_PROG_TYPE_TRACING: 5698 if (attr->link_create.attach_type != prog->expected_attach_type) { 5699 ret = -EINVAL; 5700 goto out; 5701 } 5702 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 5703 ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie, 5704 attr->link_create.attach_type); 5705 else if (prog->expected_attach_type == BPF_TRACE_ITER) 5706 ret = bpf_iter_link_attach(attr, uattr, prog); 5707 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 5708 ret = cgroup_bpf_link_attach(attr, prog); 5709 else 5710 ret = bpf_tracing_prog_attach(prog, 5711 attr->link_create.target_fd, 5712 attr->link_create.target_btf_id, 5713 attr->link_create.tracing.cookie, 5714 attr->link_create.attach_type); 5715 break; 5716 case BPF_PROG_TYPE_FLOW_DISSECTOR: 5717 case BPF_PROG_TYPE_SK_LOOKUP: 5718 ret = netns_bpf_link_create(attr, prog); 5719 break; 5720 case BPF_PROG_TYPE_SK_MSG: 5721 case BPF_PROG_TYPE_SK_SKB: 5722 ret = sock_map_link_create(attr, prog); 5723 break; 5724 #ifdef CONFIG_NET 5725 case BPF_PROG_TYPE_XDP: 5726 ret = bpf_xdp_link_attach(attr, prog); 5727 break; 5728 case BPF_PROG_TYPE_SCHED_CLS: 5729 if (attr->link_create.attach_type == BPF_TCX_INGRESS || 5730 attr->link_create.attach_type == BPF_TCX_EGRESS) 5731 ret = tcx_link_attach(attr, prog); 5732 else 5733 ret = netkit_link_attach(attr, prog); 5734 break; 5735 case BPF_PROG_TYPE_NETFILTER: 5736 ret = bpf_nf_link_attach(attr, prog); 5737 break; 5738 #endif 5739 case BPF_PROG_TYPE_PERF_EVENT: 5740 case BPF_PROG_TYPE_TRACEPOINT: 5741 ret = bpf_perf_link_attach(attr, prog); 5742 break; 5743 case BPF_PROG_TYPE_KPROBE: 5744 if (attr->link_create.attach_type == BPF_PERF_EVENT) 5745 ret = bpf_perf_link_attach(attr, prog); 5746 else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || 5747 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) 5748 ret = bpf_kprobe_multi_link_attach(attr, prog); 5749 else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI || 5750 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION) 5751 ret = bpf_uprobe_multi_link_attach(attr, prog); 5752 break; 5753 default: 5754 ret = -EINVAL; 5755 } 5756 5757 out: 5758 if (ret < 0) 5759 bpf_prog_put(prog); 5760 return ret; 5761 } 5762 5763 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 5764 { 5765 struct bpf_map *new_map, *old_map = NULL; 5766 int ret; 5767 5768 new_map = bpf_map_get(attr->link_update.new_map_fd); 5769 if (IS_ERR(new_map)) 5770 return PTR_ERR(new_map); 5771 5772 if (attr->link_update.flags & BPF_F_REPLACE) { 5773 old_map = bpf_map_get(attr->link_update.old_map_fd); 5774 if (IS_ERR(old_map)) { 5775 ret = PTR_ERR(old_map); 5776 goto out_put; 5777 } 5778 } else if (attr->link_update.old_map_fd) { 5779 ret = -EINVAL; 5780 goto out_put; 5781 } 5782 5783 ret = link->ops->update_map(link, new_map, old_map); 5784 5785 if (old_map) 5786 bpf_map_put(old_map); 5787 out_put: 5788 bpf_map_put(new_map); 5789 return ret; 5790 } 5791 5792 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 5793 5794 static int link_update(union bpf_attr *attr) 5795 { 5796 struct bpf_prog *old_prog = NULL, *new_prog; 5797 struct bpf_link *link; 5798 u32 flags; 5799 int ret; 5800 5801 if (CHECK_ATTR(BPF_LINK_UPDATE)) 5802 return -EINVAL; 5803 5804 flags = attr->link_update.flags; 5805 if (flags & ~BPF_F_REPLACE) 5806 return -EINVAL; 5807 5808 link = bpf_link_get_from_fd(attr->link_update.link_fd); 5809 if (IS_ERR(link)) 5810 return PTR_ERR(link); 5811 5812 if (link->ops->update_map) { 5813 ret = link_update_map(link, attr); 5814 goto out_put_link; 5815 } 5816 5817 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 5818 if (IS_ERR(new_prog)) { 5819 ret = PTR_ERR(new_prog); 5820 goto out_put_link; 5821 } 5822 5823 if (flags & BPF_F_REPLACE) { 5824 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 5825 if (IS_ERR(old_prog)) { 5826 ret = PTR_ERR(old_prog); 5827 old_prog = NULL; 5828 goto out_put_progs; 5829 } 5830 } else if (attr->link_update.old_prog_fd) { 5831 ret = -EINVAL; 5832 goto out_put_progs; 5833 } 5834 5835 if (link->ops->update_prog) 5836 ret = link->ops->update_prog(link, new_prog, old_prog); 5837 else 5838 ret = -EINVAL; 5839 5840 out_put_progs: 5841 if (old_prog) 5842 bpf_prog_put(old_prog); 5843 if (ret) 5844 bpf_prog_put(new_prog); 5845 out_put_link: 5846 bpf_link_put_direct(link); 5847 return ret; 5848 } 5849 5850 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 5851 5852 static int link_detach(union bpf_attr *attr) 5853 { 5854 struct bpf_link *link; 5855 int ret; 5856 5857 if (CHECK_ATTR(BPF_LINK_DETACH)) 5858 return -EINVAL; 5859 5860 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 5861 if (IS_ERR(link)) 5862 return PTR_ERR(link); 5863 5864 if (link->ops->detach) 5865 ret = link->ops->detach(link); 5866 else 5867 ret = -EOPNOTSUPP; 5868 5869 bpf_link_put_direct(link); 5870 return ret; 5871 } 5872 5873 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 5874 { 5875 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 5876 } 5877 EXPORT_SYMBOL(bpf_link_inc_not_zero); 5878 5879 struct bpf_link *bpf_link_by_id(u32 id) 5880 { 5881 struct bpf_link *link; 5882 5883 if (!id) 5884 return ERR_PTR(-ENOENT); 5885 5886 spin_lock_bh(&link_idr_lock); 5887 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 5888 link = idr_find(&link_idr, id); 5889 if (link) { 5890 if (link->id) 5891 link = bpf_link_inc_not_zero(link); 5892 else 5893 link = ERR_PTR(-EAGAIN); 5894 } else { 5895 link = ERR_PTR(-ENOENT); 5896 } 5897 spin_unlock_bh(&link_idr_lock); 5898 return link; 5899 } 5900 5901 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 5902 { 5903 struct bpf_link *link; 5904 5905 spin_lock_bh(&link_idr_lock); 5906 again: 5907 link = idr_get_next(&link_idr, id); 5908 if (link) { 5909 link = bpf_link_inc_not_zero(link); 5910 if (IS_ERR(link)) { 5911 (*id)++; 5912 goto again; 5913 } 5914 } 5915 spin_unlock_bh(&link_idr_lock); 5916 5917 return link; 5918 } 5919 5920 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 5921 5922 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 5923 { 5924 struct bpf_link *link; 5925 u32 id = attr->link_id; 5926 int fd; 5927 5928 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 5929 return -EINVAL; 5930 5931 if (!capable(CAP_SYS_ADMIN)) 5932 return -EPERM; 5933 5934 link = bpf_link_by_id(id); 5935 if (IS_ERR(link)) 5936 return PTR_ERR(link); 5937 5938 fd = bpf_link_new_fd(link); 5939 if (fd < 0) 5940 bpf_link_put_direct(link); 5941 5942 return fd; 5943 } 5944 5945 DEFINE_MUTEX(bpf_stats_enabled_mutex); 5946 5947 static int bpf_stats_release(struct inode *inode, struct file *file) 5948 { 5949 mutex_lock(&bpf_stats_enabled_mutex); 5950 static_key_slow_dec(&bpf_stats_enabled_key.key); 5951 mutex_unlock(&bpf_stats_enabled_mutex); 5952 return 0; 5953 } 5954 5955 static const struct file_operations bpf_stats_fops = { 5956 .release = bpf_stats_release, 5957 }; 5958 5959 static int bpf_enable_runtime_stats(void) 5960 { 5961 int fd; 5962 5963 mutex_lock(&bpf_stats_enabled_mutex); 5964 5965 /* Set a very high limit to avoid overflow */ 5966 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 5967 mutex_unlock(&bpf_stats_enabled_mutex); 5968 return -EBUSY; 5969 } 5970 5971 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 5972 if (fd >= 0) 5973 static_key_slow_inc(&bpf_stats_enabled_key.key); 5974 5975 mutex_unlock(&bpf_stats_enabled_mutex); 5976 return fd; 5977 } 5978 5979 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 5980 5981 static int bpf_enable_stats(union bpf_attr *attr) 5982 { 5983 5984 if (CHECK_ATTR(BPF_ENABLE_STATS)) 5985 return -EINVAL; 5986 5987 if (!capable(CAP_SYS_ADMIN)) 5988 return -EPERM; 5989 5990 switch (attr->enable_stats.type) { 5991 case BPF_STATS_RUN_TIME: 5992 return bpf_enable_runtime_stats(); 5993 default: 5994 break; 5995 } 5996 return -EINVAL; 5997 } 5998 5999 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 6000 6001 static int bpf_iter_create(union bpf_attr *attr) 6002 { 6003 struct bpf_link *link; 6004 int err; 6005 6006 if (CHECK_ATTR(BPF_ITER_CREATE)) 6007 return -EINVAL; 6008 6009 if (attr->iter_create.flags) 6010 return -EINVAL; 6011 6012 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 6013 if (IS_ERR(link)) 6014 return PTR_ERR(link); 6015 6016 err = bpf_iter_new_fd(link); 6017 bpf_link_put_direct(link); 6018 6019 return err; 6020 } 6021 6022 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 6023 6024 static int bpf_prog_bind_map(union bpf_attr *attr) 6025 { 6026 struct bpf_prog *prog; 6027 struct bpf_map *map; 6028 struct bpf_map **used_maps_old, **used_maps_new; 6029 int i, ret = 0; 6030 6031 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 6032 return -EINVAL; 6033 6034 if (attr->prog_bind_map.flags) 6035 return -EINVAL; 6036 6037 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 6038 if (IS_ERR(prog)) 6039 return PTR_ERR(prog); 6040 6041 map = bpf_map_get(attr->prog_bind_map.map_fd); 6042 if (IS_ERR(map)) { 6043 ret = PTR_ERR(map); 6044 goto out_prog_put; 6045 } 6046 6047 mutex_lock(&prog->aux->used_maps_mutex); 6048 6049 used_maps_old = prog->aux->used_maps; 6050 6051 for (i = 0; i < prog->aux->used_map_cnt; i++) 6052 if (used_maps_old[i] == map) { 6053 bpf_map_put(map); 6054 goto out_unlock; 6055 } 6056 6057 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 6058 sizeof(used_maps_new[0]), 6059 GFP_KERNEL); 6060 if (!used_maps_new) { 6061 ret = -ENOMEM; 6062 goto out_unlock; 6063 } 6064 6065 /* The bpf program will not access the bpf map, but for the sake of 6066 * simplicity, increase sleepable_refcnt for sleepable program as well. 6067 */ 6068 if (prog->sleepable) 6069 atomic64_inc(&map->sleepable_refcnt); 6070 memcpy(used_maps_new, used_maps_old, 6071 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 6072 used_maps_new[prog->aux->used_map_cnt] = map; 6073 6074 prog->aux->used_map_cnt++; 6075 prog->aux->used_maps = used_maps_new; 6076 6077 kfree(used_maps_old); 6078 6079 out_unlock: 6080 mutex_unlock(&prog->aux->used_maps_mutex); 6081 6082 if (ret) 6083 bpf_map_put(map); 6084 out_prog_put: 6085 bpf_prog_put(prog); 6086 return ret; 6087 } 6088 6089 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd 6090 6091 static int token_create(union bpf_attr *attr) 6092 { 6093 if (CHECK_ATTR(BPF_TOKEN_CREATE)) 6094 return -EINVAL; 6095 6096 /* no flags are supported yet */ 6097 if (attr->token_create.flags) 6098 return -EINVAL; 6099 6100 return bpf_token_create(attr); 6101 } 6102 6103 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd 6104 6105 static int prog_stream_read(union bpf_attr *attr) 6106 { 6107 char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); 6108 u32 len = attr->prog_stream_read.stream_buf_len; 6109 struct bpf_prog *prog; 6110 int ret; 6111 6112 if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) 6113 return -EINVAL; 6114 6115 prog = bpf_prog_get(attr->prog_stream_read.prog_fd); 6116 if (IS_ERR(prog)) 6117 return PTR_ERR(prog); 6118 6119 ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); 6120 bpf_prog_put(prog); 6121 6122 return ret; 6123 } 6124 6125 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) 6126 { 6127 union bpf_attr attr; 6128 int err; 6129 6130 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 6131 if (err) 6132 return err; 6133 size = min_t(u32, size, sizeof(attr)); 6134 6135 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 6136 memset(&attr, 0, sizeof(attr)); 6137 if (copy_from_bpfptr(&attr, uattr, size) != 0) 6138 return -EFAULT; 6139 6140 err = security_bpf(cmd, &attr, size, uattr.is_kernel); 6141 if (err < 0) 6142 return err; 6143 6144 switch (cmd) { 6145 case BPF_MAP_CREATE: 6146 err = map_create(&attr, uattr); 6147 break; 6148 case BPF_MAP_LOOKUP_ELEM: 6149 err = map_lookup_elem(&attr); 6150 break; 6151 case BPF_MAP_UPDATE_ELEM: 6152 err = map_update_elem(&attr, uattr); 6153 break; 6154 case BPF_MAP_DELETE_ELEM: 6155 err = map_delete_elem(&attr, uattr); 6156 break; 6157 case BPF_MAP_GET_NEXT_KEY: 6158 err = map_get_next_key(&attr); 6159 break; 6160 case BPF_MAP_FREEZE: 6161 err = map_freeze(&attr); 6162 break; 6163 case BPF_PROG_LOAD: 6164 err = bpf_prog_load(&attr, uattr, size); 6165 break; 6166 case BPF_OBJ_PIN: 6167 err = bpf_obj_pin(&attr); 6168 break; 6169 case BPF_OBJ_GET: 6170 err = bpf_obj_get(&attr); 6171 break; 6172 case BPF_PROG_ATTACH: 6173 err = bpf_prog_attach(&attr); 6174 break; 6175 case BPF_PROG_DETACH: 6176 err = bpf_prog_detach(&attr); 6177 break; 6178 case BPF_PROG_QUERY: 6179 err = bpf_prog_query(&attr, uattr.user); 6180 break; 6181 case BPF_PROG_TEST_RUN: 6182 err = bpf_prog_test_run(&attr, uattr.user); 6183 break; 6184 case BPF_PROG_GET_NEXT_ID: 6185 err = bpf_obj_get_next_id(&attr, uattr.user, 6186 &prog_idr, &prog_idr_lock); 6187 break; 6188 case BPF_MAP_GET_NEXT_ID: 6189 err = bpf_obj_get_next_id(&attr, uattr.user, 6190 &map_idr, &map_idr_lock); 6191 break; 6192 case BPF_BTF_GET_NEXT_ID: 6193 err = bpf_obj_get_next_id(&attr, uattr.user, 6194 &btf_idr, &btf_idr_lock); 6195 break; 6196 case BPF_PROG_GET_FD_BY_ID: 6197 err = bpf_prog_get_fd_by_id(&attr); 6198 break; 6199 case BPF_MAP_GET_FD_BY_ID: 6200 err = bpf_map_get_fd_by_id(&attr); 6201 break; 6202 case BPF_OBJ_GET_INFO_BY_FD: 6203 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 6204 break; 6205 case BPF_RAW_TRACEPOINT_OPEN: 6206 err = bpf_raw_tracepoint_open(&attr); 6207 break; 6208 case BPF_BTF_LOAD: 6209 err = bpf_btf_load(&attr, uattr, size); 6210 break; 6211 case BPF_BTF_GET_FD_BY_ID: 6212 err = bpf_btf_get_fd_by_id(&attr); 6213 break; 6214 case BPF_TASK_FD_QUERY: 6215 err = bpf_task_fd_query(&attr, uattr.user); 6216 break; 6217 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 6218 err = map_lookup_and_delete_elem(&attr); 6219 break; 6220 case BPF_MAP_LOOKUP_BATCH: 6221 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 6222 break; 6223 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 6224 err = bpf_map_do_batch(&attr, uattr.user, 6225 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 6226 break; 6227 case BPF_MAP_UPDATE_BATCH: 6228 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 6229 break; 6230 case BPF_MAP_DELETE_BATCH: 6231 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 6232 break; 6233 case BPF_LINK_CREATE: 6234 err = link_create(&attr, uattr); 6235 break; 6236 case BPF_LINK_UPDATE: 6237 err = link_update(&attr); 6238 break; 6239 case BPF_LINK_GET_FD_BY_ID: 6240 err = bpf_link_get_fd_by_id(&attr); 6241 break; 6242 case BPF_LINK_GET_NEXT_ID: 6243 err = bpf_obj_get_next_id(&attr, uattr.user, 6244 &link_idr, &link_idr_lock); 6245 break; 6246 case BPF_ENABLE_STATS: 6247 err = bpf_enable_stats(&attr); 6248 break; 6249 case BPF_ITER_CREATE: 6250 err = bpf_iter_create(&attr); 6251 break; 6252 case BPF_LINK_DETACH: 6253 err = link_detach(&attr); 6254 break; 6255 case BPF_PROG_BIND_MAP: 6256 err = bpf_prog_bind_map(&attr); 6257 break; 6258 case BPF_TOKEN_CREATE: 6259 err = token_create(&attr); 6260 break; 6261 case BPF_PROG_STREAM_READ_BY_FD: 6262 err = prog_stream_read(&attr); 6263 break; 6264 default: 6265 err = -EINVAL; 6266 break; 6267 } 6268 6269 return err; 6270 } 6271 6272 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 6273 { 6274 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 6275 } 6276 6277 static bool syscall_prog_is_valid_access(int off, int size, 6278 enum bpf_access_type type, 6279 const struct bpf_prog *prog, 6280 struct bpf_insn_access_aux *info) 6281 { 6282 if (off < 0 || off >= U16_MAX) 6283 return false; 6284 if (off % size != 0) 6285 return false; 6286 return true; 6287 } 6288 6289 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 6290 { 6291 switch (cmd) { 6292 case BPF_MAP_CREATE: 6293 case BPF_MAP_DELETE_ELEM: 6294 case BPF_MAP_UPDATE_ELEM: 6295 case BPF_MAP_FREEZE: 6296 case BPF_MAP_GET_FD_BY_ID: 6297 case BPF_PROG_LOAD: 6298 case BPF_BTF_LOAD: 6299 case BPF_LINK_CREATE: 6300 case BPF_RAW_TRACEPOINT_OPEN: 6301 break; 6302 default: 6303 return -EINVAL; 6304 } 6305 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 6306 } 6307 6308 6309 /* To shut up -Wmissing-prototypes. 6310 * This function is used by the kernel light skeleton 6311 * to load bpf programs when modules are loaded or during kernel boot. 6312 * See tools/lib/bpf/skel_internal.h 6313 */ 6314 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 6315 6316 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 6317 { 6318 struct bpf_prog * __maybe_unused prog; 6319 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 6320 6321 switch (cmd) { 6322 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 6323 case BPF_PROG_TEST_RUN: 6324 if (attr->test.data_in || attr->test.data_out || 6325 attr->test.ctx_out || attr->test.duration || 6326 attr->test.repeat || attr->test.flags) 6327 return -EINVAL; 6328 6329 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 6330 if (IS_ERR(prog)) 6331 return PTR_ERR(prog); 6332 6333 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 6334 attr->test.ctx_size_in > U16_MAX) { 6335 bpf_prog_put(prog); 6336 return -EINVAL; 6337 } 6338 6339 run_ctx.bpf_cookie = 0; 6340 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 6341 /* recursion detected */ 6342 __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx); 6343 bpf_prog_put(prog); 6344 return -EBUSY; 6345 } 6346 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 6347 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 6348 &run_ctx); 6349 bpf_prog_put(prog); 6350 return 0; 6351 #endif 6352 default: 6353 return ____bpf_sys_bpf(cmd, attr, size); 6354 } 6355 } 6356 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); 6357 6358 static const struct bpf_func_proto bpf_sys_bpf_proto = { 6359 .func = bpf_sys_bpf, 6360 .gpl_only = false, 6361 .ret_type = RET_INTEGER, 6362 .arg1_type = ARG_ANYTHING, 6363 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6364 .arg3_type = ARG_CONST_SIZE, 6365 }; 6366 6367 const struct bpf_func_proto * __weak 6368 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6369 { 6370 return bpf_base_func_proto(func_id, prog); 6371 } 6372 6373 BPF_CALL_1(bpf_sys_close, u32, fd) 6374 { 6375 /* When bpf program calls this helper there should not be 6376 * an fdget() without matching completed fdput(). 6377 * This helper is allowed in the following callchain only: 6378 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 6379 */ 6380 return close_fd(fd); 6381 } 6382 6383 static const struct bpf_func_proto bpf_sys_close_proto = { 6384 .func = bpf_sys_close, 6385 .gpl_only = false, 6386 .ret_type = RET_INTEGER, 6387 .arg1_type = ARG_ANYTHING, 6388 }; 6389 6390 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 6391 { 6392 *res = 0; 6393 if (flags) 6394 return -EINVAL; 6395 6396 if (name_sz <= 1 || name[name_sz - 1]) 6397 return -EINVAL; 6398 6399 if (!bpf_dump_raw_ok(current_cred())) 6400 return -EPERM; 6401 6402 *res = kallsyms_lookup_name(name); 6403 return *res ? 0 : -ENOENT; 6404 } 6405 6406 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 6407 .func = bpf_kallsyms_lookup_name, 6408 .gpl_only = false, 6409 .ret_type = RET_INTEGER, 6410 .arg1_type = ARG_PTR_TO_MEM, 6411 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 6412 .arg3_type = ARG_ANYTHING, 6413 .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 6414 .arg4_size = sizeof(u64), 6415 }; 6416 6417 static const struct bpf_func_proto * 6418 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6419 { 6420 switch (func_id) { 6421 case BPF_FUNC_sys_bpf: 6422 return !bpf_token_capable(prog->aux->token, CAP_PERFMON) 6423 ? NULL : &bpf_sys_bpf_proto; 6424 case BPF_FUNC_btf_find_by_name_kind: 6425 return &bpf_btf_find_by_name_kind_proto; 6426 case BPF_FUNC_sys_close: 6427 return &bpf_sys_close_proto; 6428 case BPF_FUNC_kallsyms_lookup_name: 6429 return &bpf_kallsyms_lookup_name_proto; 6430 default: 6431 return tracing_prog_func_proto(func_id, prog); 6432 } 6433 } 6434 6435 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 6436 .get_func_proto = syscall_prog_func_proto, 6437 .is_valid_access = syscall_prog_is_valid_access, 6438 }; 6439 6440 const struct bpf_prog_ops bpf_syscall_prog_ops = { 6441 .test_run = bpf_prog_test_run_syscall, 6442 }; 6443 6444 #ifdef CONFIG_SYSCTL 6445 static int bpf_stats_handler(const struct ctl_table *table, int write, 6446 void *buffer, size_t *lenp, loff_t *ppos) 6447 { 6448 struct static_key *key = (struct static_key *)table->data; 6449 static int saved_val; 6450 int val, ret; 6451 struct ctl_table tmp = { 6452 .data = &val, 6453 .maxlen = sizeof(val), 6454 .mode = table->mode, 6455 .extra1 = SYSCTL_ZERO, 6456 .extra2 = SYSCTL_ONE, 6457 }; 6458 6459 if (write && !capable(CAP_SYS_ADMIN)) 6460 return -EPERM; 6461 6462 mutex_lock(&bpf_stats_enabled_mutex); 6463 val = saved_val; 6464 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6465 if (write && !ret && val != saved_val) { 6466 if (val) 6467 static_key_slow_inc(key); 6468 else 6469 static_key_slow_dec(key); 6470 saved_val = val; 6471 } 6472 mutex_unlock(&bpf_stats_enabled_mutex); 6473 return ret; 6474 } 6475 6476 void __weak unpriv_ebpf_notify(int new_state) 6477 { 6478 } 6479 6480 static int bpf_unpriv_handler(const struct ctl_table *table, int write, 6481 void *buffer, size_t *lenp, loff_t *ppos) 6482 { 6483 int ret, unpriv_enable = *(int *)table->data; 6484 bool locked_state = unpriv_enable == 1; 6485 struct ctl_table tmp = *table; 6486 6487 if (write && !capable(CAP_SYS_ADMIN)) 6488 return -EPERM; 6489 6490 tmp.data = &unpriv_enable; 6491 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6492 if (write && !ret) { 6493 if (locked_state && unpriv_enable != 1) 6494 return -EPERM; 6495 *(int *)table->data = unpriv_enable; 6496 } 6497 6498 if (write) 6499 unpriv_ebpf_notify(unpriv_enable); 6500 6501 return ret; 6502 } 6503 6504 static const struct ctl_table bpf_syscall_table[] = { 6505 { 6506 .procname = "unprivileged_bpf_disabled", 6507 .data = &sysctl_unprivileged_bpf_disabled, 6508 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 6509 .mode = 0644, 6510 .proc_handler = bpf_unpriv_handler, 6511 .extra1 = SYSCTL_ZERO, 6512 .extra2 = SYSCTL_TWO, 6513 }, 6514 { 6515 .procname = "bpf_stats_enabled", 6516 .data = &bpf_stats_enabled_key.key, 6517 .mode = 0644, 6518 .proc_handler = bpf_stats_handler, 6519 }, 6520 }; 6521 6522 static int __init bpf_syscall_sysctl_init(void) 6523 { 6524 register_sysctl_init("kernel", bpf_syscall_table); 6525 return 0; 6526 } 6527 late_initcall(bpf_syscall_sysctl_init); 6528 #endif /* CONFIG_SYSCTL */ 6529