1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <crypto/sha2.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf-cgroup.h> 7 #include <linux/bpf_trace.h> 8 #include <linux/bpf_lirc.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bsearch.h> 11 #include <linux/btf.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched/signal.h> 15 #include <linux/vmalloc.h> 16 #include <linux/mmzone.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/fdtable.h> 19 #include <linux/file.h> 20 #include <linux/fs.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/kernel.h> 24 #include <linux/idr.h> 25 #include <linux/cred.h> 26 #include <linux/timekeeping.h> 27 #include <linux/ctype.h> 28 #include <linux/nospec.h> 29 #include <linux/audit.h> 30 #include <uapi/linux/btf.h> 31 #include <linux/pgtable.h> 32 #include <linux/bpf_lsm.h> 33 #include <linux/poll.h> 34 #include <linux/sort.h> 35 #include <linux/bpf-netns.h> 36 #include <linux/rcupdate_trace.h> 37 #include <linux/memcontrol.h> 38 #include <linux/trace_events.h> 39 #include <linux/tracepoint.h> 40 #include <linux/overflow.h> 41 #include <linux/cookie.h> 42 #include <linux/verification.h> 43 44 #include <net/netfilter/nf_bpf_link.h> 45 #include <net/netkit.h> 46 #include <net/tcx.h> 47 48 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 49 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 50 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 51 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 52 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 53 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 54 IS_FD_HASH(map)) 55 56 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 57 58 DEFINE_PER_CPU(int, bpf_prog_active); 59 DEFINE_COOKIE(bpf_map_cookie); 60 static DEFINE_IDR(prog_idr); 61 static DEFINE_SPINLOCK(prog_idr_lock); 62 static DEFINE_IDR(map_idr); 63 static DEFINE_SPINLOCK(map_idr_lock); 64 static DEFINE_IDR(link_idr); 65 static DEFINE_SPINLOCK(link_idr_lock); 66 67 int sysctl_unprivileged_bpf_disabled __read_mostly = 68 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 69 70 static const struct bpf_map_ops * const bpf_map_types[] = { 71 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 72 #define BPF_MAP_TYPE(_id, _ops) \ 73 [_id] = &_ops, 74 #define BPF_LINK_TYPE(_id, _name) 75 #include <linux/bpf_types.h> 76 #undef BPF_PROG_TYPE 77 #undef BPF_MAP_TYPE 78 #undef BPF_LINK_TYPE 79 }; 80 81 /* 82 * If we're handed a bigger struct than we know of, ensure all the unknown bits 83 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 84 * we don't know about yet. 85 * 86 * There is a ToCToU between this function call and the following 87 * copy_from_user() call. However, this is not a concern since this function is 88 * meant to be a future-proofing of bits. 89 */ 90 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 91 size_t expected_size, 92 size_t actual_size) 93 { 94 int res; 95 96 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 97 return -E2BIG; 98 99 if (actual_size <= expected_size) 100 return 0; 101 102 if (uaddr.is_kernel) 103 res = memchr_inv(uaddr.kernel + expected_size, 0, 104 actual_size - expected_size) == NULL; 105 else 106 res = check_zeroed_user(uaddr.user + expected_size, 107 actual_size - expected_size); 108 if (res < 0) 109 return res; 110 return res ? 0 : -E2BIG; 111 } 112 113 const struct bpf_map_ops bpf_map_offload_ops = { 114 .map_meta_equal = bpf_map_meta_equal, 115 .map_alloc = bpf_map_offload_map_alloc, 116 .map_free = bpf_map_offload_map_free, 117 .map_check_btf = map_check_no_btf, 118 .map_mem_usage = bpf_map_offload_map_mem_usage, 119 }; 120 121 static void bpf_map_write_active_inc(struct bpf_map *map) 122 { 123 atomic64_inc(&map->writecnt); 124 } 125 126 static void bpf_map_write_active_dec(struct bpf_map *map) 127 { 128 atomic64_dec(&map->writecnt); 129 } 130 131 bool bpf_map_write_active(const struct bpf_map *map) 132 { 133 return atomic64_read(&map->writecnt) != 0; 134 } 135 136 static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) 137 { 138 if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) 139 return map->value_size; 140 else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 141 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 142 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 143 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 144 return round_up(map->value_size, 8) * num_possible_cpus(); 145 else if (IS_FD_MAP(map)) 146 return sizeof(u32); 147 else 148 return map->value_size; 149 } 150 151 static void maybe_wait_bpf_programs(struct bpf_map *map) 152 { 153 /* Wait for any running non-sleepable BPF programs to complete so that 154 * userspace, when we return to it, knows that all non-sleepable 155 * programs that could be running use the new map value. For sleepable 156 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait 157 * for the completions of these programs, but considering the waiting 158 * time can be very long and userspace may think it will hang forever, 159 * so don't handle sleepable BPF programs now. 160 */ 161 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 162 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 163 synchronize_rcu_expedited(); 164 } 165 166 static void unpin_uptr_kaddr(void *kaddr) 167 { 168 if (kaddr) 169 unpin_user_page(virt_to_page(kaddr)); 170 } 171 172 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) 173 { 174 const struct btf_field *field; 175 void **uptr_addr; 176 int i; 177 178 for (i = 0, field = rec->fields; i < cnt; i++, field++) { 179 if (field->type != BPF_UPTR) 180 continue; 181 182 uptr_addr = obj + field->offset; 183 unpin_uptr_kaddr(*uptr_addr); 184 } 185 } 186 187 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) 188 { 189 if (!btf_record_has_field(rec, BPF_UPTR)) 190 return; 191 192 __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); 193 } 194 195 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) 196 { 197 const struct btf_field *field; 198 const struct btf_type *t; 199 unsigned long start, end; 200 struct page *page; 201 void **uptr_addr; 202 int i, err; 203 204 if (!btf_record_has_field(rec, BPF_UPTR)) 205 return 0; 206 207 for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { 208 if (field->type != BPF_UPTR) 209 continue; 210 211 uptr_addr = obj + field->offset; 212 start = *(unsigned long *)uptr_addr; 213 if (!start) 214 continue; 215 216 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 217 /* t->size was checked for zero before */ 218 if (check_add_overflow(start, t->size - 1, &end)) { 219 err = -EFAULT; 220 goto unpin_all; 221 } 222 223 /* The uptr's struct cannot span across two pages */ 224 if ((start & PAGE_MASK) != (end & PAGE_MASK)) { 225 err = -EOPNOTSUPP; 226 goto unpin_all; 227 } 228 229 err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); 230 if (err != 1) 231 goto unpin_all; 232 233 if (PageHighMem(page)) { 234 err = -EOPNOTSUPP; 235 unpin_user_page(page); 236 goto unpin_all; 237 } 238 239 *uptr_addr = page_address(page) + offset_in_page(start); 240 } 241 242 return 0; 243 244 unpin_all: 245 __bpf_obj_unpin_uptrs(rec, i, obj); 246 return err; 247 } 248 249 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 250 void *key, void *value, __u64 flags) 251 { 252 int err; 253 254 /* Need to create a kthread, thus must support schedule */ 255 if (bpf_map_is_offloaded(map)) { 256 return bpf_map_offload_update_elem(map, key, value, flags); 257 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 258 map->map_type == BPF_MAP_TYPE_ARENA || 259 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 260 return map->ops->map_update_elem(map, key, value, flags); 261 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 262 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 263 return sock_map_update_elem_sys(map, key, value, flags); 264 } else if (IS_FD_PROG_ARRAY(map)) { 265 return bpf_fd_array_map_update_elem(map, map_file, key, value, 266 flags); 267 } 268 269 bpf_disable_instrumentation(); 270 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 271 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 272 err = bpf_percpu_hash_update(map, key, value, flags); 273 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 274 err = bpf_percpu_array_update(map, key, value, flags); 275 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 276 err = bpf_percpu_cgroup_storage_update(map, key, value, 277 flags); 278 } else if (IS_FD_ARRAY(map)) { 279 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 280 flags); 281 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 282 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 283 flags); 284 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 285 /* rcu_read_lock() is not needed */ 286 err = bpf_fd_reuseport_array_update_elem(map, key, value, 287 flags); 288 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 289 map->map_type == BPF_MAP_TYPE_STACK || 290 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 291 err = map->ops->map_push_elem(map, value, flags); 292 } else { 293 err = bpf_obj_pin_uptrs(map->record, value); 294 if (!err) { 295 rcu_read_lock(); 296 err = map->ops->map_update_elem(map, key, value, flags); 297 rcu_read_unlock(); 298 if (err) 299 bpf_obj_unpin_uptrs(map->record, value); 300 } 301 } 302 bpf_enable_instrumentation(); 303 304 return err; 305 } 306 307 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 308 __u64 flags) 309 { 310 void *ptr; 311 int err; 312 313 if (bpf_map_is_offloaded(map)) 314 return bpf_map_offload_lookup_elem(map, key, value); 315 316 bpf_disable_instrumentation(); 317 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 318 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 319 err = bpf_percpu_hash_copy(map, key, value, flags); 320 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 321 err = bpf_percpu_array_copy(map, key, value, flags); 322 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 323 err = bpf_percpu_cgroup_storage_copy(map, key, value, flags); 324 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 325 err = bpf_stackmap_extract(map, key, value, false); 326 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 327 err = bpf_fd_array_map_lookup_elem(map, key, value); 328 } else if (IS_FD_HASH(map)) { 329 err = bpf_fd_htab_map_lookup_elem(map, key, value); 330 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 331 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 332 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 333 map->map_type == BPF_MAP_TYPE_STACK || 334 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 335 err = map->ops->map_peek_elem(map, value); 336 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 337 /* struct_ops map requires directly updating "value" */ 338 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 339 } else { 340 rcu_read_lock(); 341 if (map->ops->map_lookup_elem_sys_only) 342 ptr = map->ops->map_lookup_elem_sys_only(map, key); 343 else 344 ptr = map->ops->map_lookup_elem(map, key); 345 if (IS_ERR(ptr)) { 346 err = PTR_ERR(ptr); 347 } else if (!ptr) { 348 err = -ENOENT; 349 } else { 350 err = 0; 351 if (flags & BPF_F_LOCK) 352 /* lock 'ptr' and copy everything but lock */ 353 copy_map_value_locked(map, value, ptr, true); 354 else 355 copy_map_value(map, value, ptr); 356 /* mask lock and timer, since value wasn't zero inited */ 357 check_and_init_map_value(map, value); 358 } 359 rcu_read_unlock(); 360 } 361 362 bpf_enable_instrumentation(); 363 364 return err; 365 } 366 367 /* Please, do not use this function outside from the map creation path 368 * (e.g. in map update path) without taking care of setting the active 369 * memory cgroup (see at bpf_map_kmalloc_node() for example). 370 */ 371 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 372 { 373 /* We really just want to fail instead of triggering OOM killer 374 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 375 * which is used for lower order allocation requests. 376 * 377 * It has been observed that higher order allocation requests done by 378 * vmalloc with __GFP_NORETRY being set might fail due to not trying 379 * to reclaim memory from the page cache, thus we set 380 * __GFP_RETRY_MAYFAIL to avoid such situations. 381 */ 382 383 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 384 unsigned int flags = 0; 385 unsigned long align = 1; 386 void *area; 387 388 if (size >= SIZE_MAX) 389 return NULL; 390 391 /* kmalloc()'ed memory can't be mmap()'ed */ 392 if (mmapable) { 393 BUG_ON(!PAGE_ALIGNED(size)); 394 align = SHMLBA; 395 flags = VM_USERMAP; 396 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 397 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 398 numa_node); 399 if (area != NULL) 400 return area; 401 } 402 403 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 404 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 405 flags, numa_node, __builtin_return_address(0)); 406 } 407 408 void *bpf_map_area_alloc(u64 size, int numa_node) 409 { 410 return __bpf_map_area_alloc(size, numa_node, false); 411 } 412 413 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 414 { 415 return __bpf_map_area_alloc(size, numa_node, true); 416 } 417 418 void bpf_map_area_free(void *area) 419 { 420 kvfree(area); 421 } 422 423 static u32 bpf_map_flags_retain_permanent(u32 flags) 424 { 425 /* Some map creation flags are not tied to the map object but 426 * rather to the map fd instead, so they have no meaning upon 427 * map object inspection since multiple file descriptors with 428 * different (access) properties can exist here. Thus, given 429 * this has zero meaning for the map itself, lets clear these 430 * from here. 431 */ 432 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 433 } 434 435 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 436 { 437 map->map_type = attr->map_type; 438 map->key_size = attr->key_size; 439 map->value_size = attr->value_size; 440 map->max_entries = attr->max_entries; 441 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 442 map->numa_node = bpf_map_attr_numa_node(attr); 443 map->map_extra = attr->map_extra; 444 } 445 446 static int bpf_map_alloc_id(struct bpf_map *map) 447 { 448 int id; 449 450 idr_preload(GFP_KERNEL); 451 spin_lock_bh(&map_idr_lock); 452 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 453 if (id > 0) 454 map->id = id; 455 spin_unlock_bh(&map_idr_lock); 456 idr_preload_end(); 457 458 if (WARN_ON_ONCE(!id)) 459 return -ENOSPC; 460 461 return id > 0 ? 0 : id; 462 } 463 464 void bpf_map_free_id(struct bpf_map *map) 465 { 466 unsigned long flags; 467 468 /* Offloaded maps are removed from the IDR store when their device 469 * disappears - even if someone holds an fd to them they are unusable, 470 * the memory is gone, all ops will fail; they are simply waiting for 471 * refcnt to drop to be freed. 472 */ 473 if (!map->id) 474 return; 475 476 spin_lock_irqsave(&map_idr_lock, flags); 477 478 idr_remove(&map_idr, map->id); 479 map->id = 0; 480 481 spin_unlock_irqrestore(&map_idr_lock, flags); 482 } 483 484 #ifdef CONFIG_MEMCG 485 static void bpf_map_save_memcg(struct bpf_map *map) 486 { 487 /* Currently if a map is created by a process belonging to the root 488 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 489 * So we have to check map->objcg for being NULL each time it's 490 * being used. 491 */ 492 if (memcg_bpf_enabled()) 493 map->objcg = get_obj_cgroup_from_current(); 494 } 495 496 static void bpf_map_release_memcg(struct bpf_map *map) 497 { 498 if (map->objcg) 499 obj_cgroup_put(map->objcg); 500 } 501 502 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 503 { 504 if (map->objcg) 505 return get_mem_cgroup_from_objcg(map->objcg); 506 507 return root_mem_cgroup; 508 } 509 510 void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, 511 struct mem_cgroup **new_memcg) 512 { 513 *new_memcg = bpf_map_get_memcg(map); 514 *old_memcg = set_active_memcg(*new_memcg); 515 } 516 517 void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, 518 struct mem_cgroup *new_memcg) 519 { 520 set_active_memcg(old_memcg); 521 mem_cgroup_put(new_memcg); 522 } 523 524 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 525 int node) 526 { 527 struct mem_cgroup *memcg, *old_memcg; 528 void *ptr; 529 530 bpf_map_memcg_enter(map, &old_memcg, &memcg); 531 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 532 bpf_map_memcg_exit(old_memcg, memcg); 533 534 return ptr; 535 } 536 537 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 538 int node) 539 { 540 struct mem_cgroup *memcg, *old_memcg; 541 void *ptr; 542 543 bpf_map_memcg_enter(map, &old_memcg, &memcg); 544 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 545 bpf_map_memcg_exit(old_memcg, memcg); 546 547 return ptr; 548 } 549 550 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 551 { 552 struct mem_cgroup *memcg, *old_memcg; 553 void *ptr; 554 555 bpf_map_memcg_enter(map, &old_memcg, &memcg); 556 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 557 bpf_map_memcg_exit(old_memcg, memcg); 558 559 return ptr; 560 } 561 562 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 563 gfp_t flags) 564 { 565 struct mem_cgroup *memcg, *old_memcg; 566 void *ptr; 567 568 bpf_map_memcg_enter(map, &old_memcg, &memcg); 569 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 570 bpf_map_memcg_exit(old_memcg, memcg); 571 572 return ptr; 573 } 574 575 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 576 size_t align, gfp_t flags) 577 { 578 struct mem_cgroup *memcg, *old_memcg; 579 void __percpu *ptr; 580 581 bpf_map_memcg_enter(map, &old_memcg, &memcg); 582 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 583 bpf_map_memcg_exit(old_memcg, memcg); 584 585 return ptr; 586 } 587 588 #else 589 static void bpf_map_save_memcg(struct bpf_map *map) 590 { 591 } 592 593 static void bpf_map_release_memcg(struct bpf_map *map) 594 { 595 } 596 #endif 597 598 static bool can_alloc_pages(void) 599 { 600 return preempt_count() == 0 && !irqs_disabled() && 601 !IS_ENABLED(CONFIG_PREEMPT_RT); 602 } 603 604 static struct page *__bpf_alloc_page(int nid) 605 { 606 if (!can_alloc_pages()) 607 return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); 608 609 return alloc_pages_node(nid, 610 GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT 611 | __GFP_NOWARN, 612 0); 613 } 614 615 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 616 unsigned long nr_pages, struct page **pages) 617 { 618 unsigned long i, j; 619 struct page *pg; 620 int ret = 0; 621 622 for (i = 0; i < nr_pages; i++) { 623 pg = __bpf_alloc_page(nid); 624 625 if (pg) { 626 pages[i] = pg; 627 continue; 628 } 629 for (j = 0; j < i; j++) 630 free_pages_nolock(pages[j], 0); 631 ret = -ENOMEM; 632 break; 633 } 634 635 return ret; 636 } 637 638 639 static int btf_field_cmp(const void *a, const void *b) 640 { 641 const struct btf_field *f1 = a, *f2 = b; 642 643 if (f1->offset < f2->offset) 644 return -1; 645 else if (f1->offset > f2->offset) 646 return 1; 647 return 0; 648 } 649 650 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 651 u32 field_mask) 652 { 653 struct btf_field *field; 654 655 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 656 return NULL; 657 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 658 if (!field || !(field->type & field_mask)) 659 return NULL; 660 return field; 661 } 662 663 void btf_record_free(struct btf_record *rec) 664 { 665 int i; 666 667 if (IS_ERR_OR_NULL(rec)) 668 return; 669 for (i = 0; i < rec->cnt; i++) { 670 switch (rec->fields[i].type) { 671 case BPF_KPTR_UNREF: 672 case BPF_KPTR_REF: 673 case BPF_KPTR_PERCPU: 674 case BPF_UPTR: 675 if (rec->fields[i].kptr.module) 676 module_put(rec->fields[i].kptr.module); 677 if (btf_is_kernel(rec->fields[i].kptr.btf)) 678 btf_put(rec->fields[i].kptr.btf); 679 break; 680 case BPF_LIST_HEAD: 681 case BPF_LIST_NODE: 682 case BPF_RB_ROOT: 683 case BPF_RB_NODE: 684 case BPF_SPIN_LOCK: 685 case BPF_RES_SPIN_LOCK: 686 case BPF_TIMER: 687 case BPF_REFCOUNT: 688 case BPF_WORKQUEUE: 689 case BPF_TASK_WORK: 690 /* Nothing to release */ 691 break; 692 default: 693 WARN_ON_ONCE(1); 694 continue; 695 } 696 } 697 kfree(rec); 698 } 699 700 void bpf_map_free_record(struct bpf_map *map) 701 { 702 btf_record_free(map->record); 703 map->record = NULL; 704 } 705 706 struct btf_record *btf_record_dup(const struct btf_record *rec) 707 { 708 const struct btf_field *fields; 709 struct btf_record *new_rec; 710 int ret, size, i; 711 712 if (IS_ERR_OR_NULL(rec)) 713 return NULL; 714 size = struct_size(rec, fields, rec->cnt); 715 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 716 if (!new_rec) 717 return ERR_PTR(-ENOMEM); 718 /* Do a deep copy of the btf_record */ 719 fields = rec->fields; 720 new_rec->cnt = 0; 721 for (i = 0; i < rec->cnt; i++) { 722 switch (fields[i].type) { 723 case BPF_KPTR_UNREF: 724 case BPF_KPTR_REF: 725 case BPF_KPTR_PERCPU: 726 case BPF_UPTR: 727 if (btf_is_kernel(fields[i].kptr.btf)) 728 btf_get(fields[i].kptr.btf); 729 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 730 ret = -ENXIO; 731 goto free; 732 } 733 break; 734 case BPF_LIST_HEAD: 735 case BPF_LIST_NODE: 736 case BPF_RB_ROOT: 737 case BPF_RB_NODE: 738 case BPF_SPIN_LOCK: 739 case BPF_RES_SPIN_LOCK: 740 case BPF_TIMER: 741 case BPF_REFCOUNT: 742 case BPF_WORKQUEUE: 743 case BPF_TASK_WORK: 744 /* Nothing to acquire */ 745 break; 746 default: 747 ret = -EFAULT; 748 WARN_ON_ONCE(1); 749 goto free; 750 } 751 new_rec->cnt++; 752 } 753 return new_rec; 754 free: 755 btf_record_free(new_rec); 756 return ERR_PTR(ret); 757 } 758 759 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 760 { 761 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 762 int size; 763 764 if (!a_has_fields && !b_has_fields) 765 return true; 766 if (a_has_fields != b_has_fields) 767 return false; 768 if (rec_a->cnt != rec_b->cnt) 769 return false; 770 size = struct_size(rec_a, fields, rec_a->cnt); 771 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 772 * members are zeroed out. So memcmp is safe to do without worrying 773 * about padding/unused fields. 774 * 775 * While spin_lock, timer, and kptr have no relation to map BTF, 776 * list_head metadata is specific to map BTF, the btf and value_rec 777 * members in particular. btf is the map BTF, while value_rec points to 778 * btf_record in that map BTF. 779 * 780 * So while by default, we don't rely on the map BTF (which the records 781 * were parsed from) matching for both records, which is not backwards 782 * compatible, in case list_head is part of it, we implicitly rely on 783 * that by way of depending on memcmp succeeding for it. 784 */ 785 return !memcmp(rec_a, rec_b, size); 786 } 787 788 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 789 { 790 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 791 return; 792 bpf_timer_cancel_and_free(obj + rec->timer_off); 793 } 794 795 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) 796 { 797 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) 798 return; 799 bpf_wq_cancel_and_free(obj + rec->wq_off); 800 } 801 802 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) 803 { 804 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) 805 return; 806 bpf_task_work_cancel_and_free(obj + rec->task_work_off); 807 } 808 809 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 810 { 811 const struct btf_field *fields; 812 int i; 813 814 if (IS_ERR_OR_NULL(rec)) 815 return; 816 fields = rec->fields; 817 for (i = 0; i < rec->cnt; i++) { 818 struct btf_struct_meta *pointee_struct_meta; 819 const struct btf_field *field = &fields[i]; 820 void *field_ptr = obj + field->offset; 821 void *xchgd_field; 822 823 switch (fields[i].type) { 824 case BPF_SPIN_LOCK: 825 case BPF_RES_SPIN_LOCK: 826 break; 827 case BPF_TIMER: 828 bpf_timer_cancel_and_free(field_ptr); 829 break; 830 case BPF_WORKQUEUE: 831 bpf_wq_cancel_and_free(field_ptr); 832 break; 833 case BPF_TASK_WORK: 834 bpf_task_work_cancel_and_free(field_ptr); 835 break; 836 case BPF_KPTR_UNREF: 837 WRITE_ONCE(*(u64 *)field_ptr, 0); 838 break; 839 case BPF_KPTR_REF: 840 case BPF_KPTR_PERCPU: 841 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 842 if (!xchgd_field) 843 break; 844 845 if (!btf_is_kernel(field->kptr.btf)) { 846 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 847 field->kptr.btf_id); 848 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 849 pointee_struct_meta->record : NULL, 850 fields[i].type == BPF_KPTR_PERCPU); 851 } else { 852 field->kptr.dtor(xchgd_field); 853 } 854 break; 855 case BPF_UPTR: 856 /* The caller ensured that no one is using the uptr */ 857 unpin_uptr_kaddr(*(void **)field_ptr); 858 break; 859 case BPF_LIST_HEAD: 860 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 861 continue; 862 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 863 break; 864 case BPF_RB_ROOT: 865 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 866 continue; 867 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 868 break; 869 case BPF_LIST_NODE: 870 case BPF_RB_NODE: 871 case BPF_REFCOUNT: 872 break; 873 default: 874 WARN_ON_ONCE(1); 875 continue; 876 } 877 } 878 } 879 880 static void bpf_map_free(struct bpf_map *map) 881 { 882 struct btf_record *rec = map->record; 883 struct btf *btf = map->btf; 884 885 /* implementation dependent freeing. Disabling migration to simplify 886 * the free of values or special fields allocated from bpf memory 887 * allocator. 888 */ 889 kfree(map->excl_prog_sha); 890 migrate_disable(); 891 map->ops->map_free(map); 892 migrate_enable(); 893 894 /* Delay freeing of btf_record for maps, as map_free 895 * callback usually needs access to them. It is better to do it here 896 * than require each callback to do the free itself manually. 897 * 898 * Note that the btf_record stashed in map->inner_map_meta->record was 899 * already freed using the map_free callback for map in map case which 900 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 901 * template bpf_map struct used during verification. 902 */ 903 btf_record_free(rec); 904 /* Delay freeing of btf for maps, as map_free callback may need 905 * struct_meta info which will be freed with btf_put(). 906 */ 907 btf_put(btf); 908 } 909 910 /* called from workqueue */ 911 static void bpf_map_free_deferred(struct work_struct *work) 912 { 913 struct bpf_map *map = container_of(work, struct bpf_map, work); 914 915 security_bpf_map_free(map); 916 bpf_map_release_memcg(map); 917 bpf_map_owner_free(map); 918 bpf_map_free(map); 919 } 920 921 static void bpf_map_put_uref(struct bpf_map *map) 922 { 923 if (atomic64_dec_and_test(&map->usercnt)) { 924 if (map->ops->map_release_uref) 925 map->ops->map_release_uref(map); 926 } 927 } 928 929 static void bpf_map_free_in_work(struct bpf_map *map) 930 { 931 INIT_WORK(&map->work, bpf_map_free_deferred); 932 /* Avoid spawning kworkers, since they all might contend 933 * for the same mutex like slab_mutex. 934 */ 935 queue_work(system_dfl_wq, &map->work); 936 } 937 938 static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 939 { 940 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 941 } 942 943 static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 944 { 945 if (rcu_trace_implies_rcu_gp()) 946 bpf_map_free_rcu_gp(rcu); 947 else 948 call_rcu(rcu, bpf_map_free_rcu_gp); 949 } 950 951 /* decrement map refcnt and schedule it for freeing via workqueue 952 * (underlying map implementation ops->map_free() might sleep) 953 */ 954 void bpf_map_put(struct bpf_map *map) 955 { 956 if (atomic64_dec_and_test(&map->refcnt)) { 957 /* bpf_map_free_id() must be called first */ 958 bpf_map_free_id(map); 959 960 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 961 if (READ_ONCE(map->free_after_mult_rcu_gp)) 962 call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 963 else if (READ_ONCE(map->free_after_rcu_gp)) 964 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 965 else 966 bpf_map_free_in_work(map); 967 } 968 } 969 EXPORT_SYMBOL_GPL(bpf_map_put); 970 971 void bpf_map_put_with_uref(struct bpf_map *map) 972 { 973 bpf_map_put_uref(map); 974 bpf_map_put(map); 975 } 976 977 static int bpf_map_release(struct inode *inode, struct file *filp) 978 { 979 struct bpf_map *map = filp->private_data; 980 981 if (map->ops->map_release) 982 map->ops->map_release(map, filp); 983 984 bpf_map_put_with_uref(map); 985 return 0; 986 } 987 988 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 989 { 990 fmode_t mode = fd_file(f)->f_mode; 991 992 /* Our file permissions may have been overridden by global 993 * map permissions facing syscall side. 994 */ 995 if (READ_ONCE(map->frozen)) 996 mode &= ~FMODE_CAN_WRITE; 997 return mode; 998 } 999 1000 #ifdef CONFIG_PROC_FS 1001 /* Show the memory usage of a bpf map */ 1002 static u64 bpf_map_memory_usage(const struct bpf_map *map) 1003 { 1004 return map->ops->map_mem_usage(map); 1005 } 1006 1007 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 1008 { 1009 struct bpf_map *map = filp->private_data; 1010 u32 type = 0, jited = 0; 1011 1012 spin_lock(&map->owner_lock); 1013 if (map->owner) { 1014 type = map->owner->type; 1015 jited = map->owner->jited; 1016 } 1017 spin_unlock(&map->owner_lock); 1018 1019 seq_printf(m, 1020 "map_type:\t%u\n" 1021 "key_size:\t%u\n" 1022 "value_size:\t%u\n" 1023 "max_entries:\t%u\n" 1024 "map_flags:\t%#x\n" 1025 "map_extra:\t%#llx\n" 1026 "memlock:\t%llu\n" 1027 "map_id:\t%u\n" 1028 "frozen:\t%u\n", 1029 map->map_type, 1030 map->key_size, 1031 map->value_size, 1032 map->max_entries, 1033 map->map_flags, 1034 (unsigned long long)map->map_extra, 1035 bpf_map_memory_usage(map), 1036 map->id, 1037 READ_ONCE(map->frozen)); 1038 if (type) { 1039 seq_printf(m, "owner_prog_type:\t%u\n", type); 1040 seq_printf(m, "owner_jited:\t%u\n", jited); 1041 } 1042 } 1043 #endif 1044 1045 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 1046 loff_t *ppos) 1047 { 1048 /* We need this handler such that alloc_file() enables 1049 * f_mode with FMODE_CAN_READ. 1050 */ 1051 return -EINVAL; 1052 } 1053 1054 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 1055 size_t siz, loff_t *ppos) 1056 { 1057 /* We need this handler such that alloc_file() enables 1058 * f_mode with FMODE_CAN_WRITE. 1059 */ 1060 return -EINVAL; 1061 } 1062 1063 /* called for any extra memory-mapped regions (except initial) */ 1064 static void bpf_map_mmap_open(struct vm_area_struct *vma) 1065 { 1066 struct bpf_map *map = vma->vm_file->private_data; 1067 1068 if (vma->vm_flags & VM_MAYWRITE) 1069 bpf_map_write_active_inc(map); 1070 } 1071 1072 /* called for all unmapped memory region (including initial) */ 1073 static void bpf_map_mmap_close(struct vm_area_struct *vma) 1074 { 1075 struct bpf_map *map = vma->vm_file->private_data; 1076 1077 if (vma->vm_flags & VM_MAYWRITE) 1078 bpf_map_write_active_dec(map); 1079 } 1080 1081 static const struct vm_operations_struct bpf_map_default_vmops = { 1082 .open = bpf_map_mmap_open, 1083 .close = bpf_map_mmap_close, 1084 }; 1085 1086 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 1087 { 1088 struct bpf_map *map = filp->private_data; 1089 int err = 0; 1090 1091 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 1092 return -ENOTSUPP; 1093 1094 if (!(vma->vm_flags & VM_SHARED)) 1095 return -EINVAL; 1096 1097 mutex_lock(&map->freeze_mutex); 1098 1099 if (vma->vm_flags & VM_WRITE) { 1100 if (map->frozen) { 1101 err = -EPERM; 1102 goto out; 1103 } 1104 /* map is meant to be read-only, so do not allow mapping as 1105 * writable, because it's possible to leak a writable page 1106 * reference and allows user-space to still modify it after 1107 * freezing, while verifier will assume contents do not change 1108 */ 1109 if (map->map_flags & BPF_F_RDONLY_PROG) { 1110 err = -EACCES; 1111 goto out; 1112 } 1113 bpf_map_write_active_inc(map); 1114 } 1115 out: 1116 mutex_unlock(&map->freeze_mutex); 1117 if (err) 1118 return err; 1119 1120 /* set default open/close callbacks */ 1121 vma->vm_ops = &bpf_map_default_vmops; 1122 vma->vm_private_data = map; 1123 vm_flags_clear(vma, VM_MAYEXEC); 1124 /* If mapping is read-only, then disallow potentially re-mapping with 1125 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing 1126 * means that as far as BPF map's memory-mapped VMAs are concerned, 1127 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, 1128 * both should be set, so we can forget about VM_MAYWRITE and always 1129 * check just VM_WRITE 1130 */ 1131 if (!(vma->vm_flags & VM_WRITE)) 1132 vm_flags_clear(vma, VM_MAYWRITE); 1133 1134 err = map->ops->map_mmap(map, vma); 1135 if (err) { 1136 if (vma->vm_flags & VM_WRITE) 1137 bpf_map_write_active_dec(map); 1138 } 1139 1140 return err; 1141 } 1142 1143 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 1144 { 1145 struct bpf_map *map = filp->private_data; 1146 1147 if (map->ops->map_poll) 1148 return map->ops->map_poll(map, filp, pts); 1149 1150 return EPOLLERR; 1151 } 1152 1153 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, 1154 unsigned long len, unsigned long pgoff, 1155 unsigned long flags) 1156 { 1157 struct bpf_map *map = filp->private_data; 1158 1159 if (map->ops->map_get_unmapped_area) 1160 return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); 1161 #ifdef CONFIG_MMU 1162 return mm_get_unmapped_area(filp, addr, len, pgoff, flags); 1163 #else 1164 return addr; 1165 #endif 1166 } 1167 1168 const struct file_operations bpf_map_fops = { 1169 #ifdef CONFIG_PROC_FS 1170 .show_fdinfo = bpf_map_show_fdinfo, 1171 #endif 1172 .release = bpf_map_release, 1173 .read = bpf_dummy_read, 1174 .write = bpf_dummy_write, 1175 .mmap = bpf_map_mmap, 1176 .poll = bpf_map_poll, 1177 .get_unmapped_area = bpf_get_unmapped_area, 1178 }; 1179 1180 int bpf_map_new_fd(struct bpf_map *map, int flags) 1181 { 1182 int ret; 1183 1184 ret = security_bpf_map(map, OPEN_FMODE(flags)); 1185 if (ret < 0) 1186 return ret; 1187 1188 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 1189 flags | O_CLOEXEC); 1190 } 1191 1192 int bpf_get_file_flag(int flags) 1193 { 1194 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 1195 return -EINVAL; 1196 if (flags & BPF_F_RDONLY) 1197 return O_RDONLY; 1198 if (flags & BPF_F_WRONLY) 1199 return O_WRONLY; 1200 return O_RDWR; 1201 } 1202 1203 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 1204 #define CHECK_ATTR(CMD) \ 1205 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 1206 sizeof(attr->CMD##_LAST_FIELD), 0, \ 1207 sizeof(*attr) - \ 1208 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 1209 sizeof(attr->CMD##_LAST_FIELD)) != NULL 1210 1211 /* dst and src must have at least "size" number of bytes. 1212 * Return strlen on success and < 0 on error. 1213 */ 1214 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 1215 { 1216 const char *end = src + size; 1217 const char *orig_src = src; 1218 1219 memset(dst, 0, size); 1220 /* Copy all isalnum(), '_' and '.' chars. */ 1221 while (src < end && *src) { 1222 if (!isalnum(*src) && 1223 *src != '_' && *src != '.') 1224 return -EINVAL; 1225 *dst++ = *src++; 1226 } 1227 1228 /* No '\0' found in "size" number of bytes */ 1229 if (src == end) 1230 return -EINVAL; 1231 1232 return src - orig_src; 1233 } 1234 EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); 1235 1236 int map_check_no_btf(const struct bpf_map *map, 1237 const struct btf *btf, 1238 const struct btf_type *key_type, 1239 const struct btf_type *value_type) 1240 { 1241 return -ENOTSUPP; 1242 } 1243 1244 static int map_check_btf(struct bpf_map *map, struct bpf_token *token, 1245 const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 1246 { 1247 const struct btf_type *key_type, *value_type; 1248 u32 key_size, value_size; 1249 int ret = 0; 1250 1251 /* Some maps allow key to be unspecified. */ 1252 if (btf_key_id) { 1253 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1254 if (!key_type || key_size != map->key_size) 1255 return -EINVAL; 1256 } else { 1257 key_type = btf_type_by_id(btf, 0); 1258 if (!map->ops->map_check_btf) 1259 return -EINVAL; 1260 } 1261 1262 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1263 if (!value_type || value_size != map->value_size) 1264 return -EINVAL; 1265 1266 map->record = btf_parse_fields(btf, value_type, 1267 BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1268 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | 1269 BPF_TASK_WORK, 1270 map->value_size); 1271 if (!IS_ERR_OR_NULL(map->record)) { 1272 int i; 1273 1274 if (!bpf_token_capable(token, CAP_BPF)) { 1275 ret = -EPERM; 1276 goto free_map_tab; 1277 } 1278 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1279 ret = -EACCES; 1280 goto free_map_tab; 1281 } 1282 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1283 switch (map->record->field_mask & (1 << i)) { 1284 case 0: 1285 continue; 1286 case BPF_SPIN_LOCK: 1287 case BPF_RES_SPIN_LOCK: 1288 if (map->map_type != BPF_MAP_TYPE_HASH && 1289 map->map_type != BPF_MAP_TYPE_ARRAY && 1290 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1291 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1292 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1293 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1294 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1295 ret = -EOPNOTSUPP; 1296 goto free_map_tab; 1297 } 1298 break; 1299 case BPF_TIMER: 1300 case BPF_WORKQUEUE: 1301 case BPF_TASK_WORK: 1302 if (map->map_type != BPF_MAP_TYPE_HASH && 1303 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1304 map->map_type != BPF_MAP_TYPE_ARRAY) { 1305 ret = -EOPNOTSUPP; 1306 goto free_map_tab; 1307 } 1308 break; 1309 case BPF_KPTR_UNREF: 1310 case BPF_KPTR_REF: 1311 case BPF_KPTR_PERCPU: 1312 case BPF_REFCOUNT: 1313 if (map->map_type != BPF_MAP_TYPE_HASH && 1314 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1315 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1316 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1317 map->map_type != BPF_MAP_TYPE_ARRAY && 1318 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1319 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1320 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1321 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1322 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1323 ret = -EOPNOTSUPP; 1324 goto free_map_tab; 1325 } 1326 break; 1327 case BPF_UPTR: 1328 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { 1329 ret = -EOPNOTSUPP; 1330 goto free_map_tab; 1331 } 1332 break; 1333 case BPF_LIST_HEAD: 1334 case BPF_RB_ROOT: 1335 if (map->map_type != BPF_MAP_TYPE_HASH && 1336 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1337 map->map_type != BPF_MAP_TYPE_ARRAY) { 1338 ret = -EOPNOTSUPP; 1339 goto free_map_tab; 1340 } 1341 break; 1342 default: 1343 /* Fail if map_type checks are missing for a field type */ 1344 ret = -EOPNOTSUPP; 1345 goto free_map_tab; 1346 } 1347 } 1348 } 1349 1350 ret = btf_check_and_fixup_fields(btf, map->record); 1351 if (ret < 0) 1352 goto free_map_tab; 1353 1354 if (map->ops->map_check_btf) { 1355 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1356 if (ret < 0) 1357 goto free_map_tab; 1358 } 1359 1360 return ret; 1361 free_map_tab: 1362 bpf_map_free_record(map); 1363 return ret; 1364 } 1365 1366 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size 1367 /* called via syscall */ 1368 static int map_create(union bpf_attr *attr, bpfptr_t uattr) 1369 { 1370 const struct bpf_map_ops *ops; 1371 struct bpf_token *token = NULL; 1372 int numa_node = bpf_map_attr_numa_node(attr); 1373 u32 map_type = attr->map_type; 1374 struct bpf_map *map; 1375 bool token_flag; 1376 int f_flags; 1377 int err; 1378 1379 err = CHECK_ATTR(BPF_MAP_CREATE); 1380 if (err) 1381 return -EINVAL; 1382 1383 /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it 1384 * to avoid per-map type checks tripping on unknown flag 1385 */ 1386 token_flag = attr->map_flags & BPF_F_TOKEN_FD; 1387 attr->map_flags &= ~BPF_F_TOKEN_FD; 1388 1389 if (attr->btf_vmlinux_value_type_id) { 1390 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1391 attr->btf_key_type_id || attr->btf_value_type_id) 1392 return -EINVAL; 1393 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1394 return -EINVAL; 1395 } 1396 1397 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1398 attr->map_type != BPF_MAP_TYPE_ARENA && 1399 attr->map_extra != 0) 1400 return -EINVAL; 1401 1402 f_flags = bpf_get_file_flag(attr->map_flags); 1403 if (f_flags < 0) 1404 return f_flags; 1405 1406 if (numa_node != NUMA_NO_NODE && 1407 ((unsigned int)numa_node >= nr_node_ids || 1408 !node_online(numa_node))) 1409 return -EINVAL; 1410 1411 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1412 map_type = attr->map_type; 1413 if (map_type >= ARRAY_SIZE(bpf_map_types)) 1414 return -EINVAL; 1415 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1416 ops = bpf_map_types[map_type]; 1417 if (!ops) 1418 return -EINVAL; 1419 1420 if (ops->map_alloc_check) { 1421 err = ops->map_alloc_check(attr); 1422 if (err) 1423 return err; 1424 } 1425 if (attr->map_ifindex) 1426 ops = &bpf_map_offload_ops; 1427 if (!ops->map_mem_usage) 1428 return -EINVAL; 1429 1430 if (token_flag) { 1431 token = bpf_token_get_from_fd(attr->map_token_fd); 1432 if (IS_ERR(token)) 1433 return PTR_ERR(token); 1434 1435 /* if current token doesn't grant map creation permissions, 1436 * then we can't use this token, so ignore it and rely on 1437 * system-wide capabilities checks 1438 */ 1439 if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || 1440 !bpf_token_allow_map_type(token, attr->map_type)) { 1441 bpf_token_put(token); 1442 token = NULL; 1443 } 1444 } 1445 1446 err = -EPERM; 1447 1448 /* Intent here is for unprivileged_bpf_disabled to block BPF map 1449 * creation for unprivileged users; other actions depend 1450 * on fd availability and access to bpffs, so are dependent on 1451 * object creation success. Even with unprivileged BPF disabled, 1452 * capability checks are still carried out. 1453 */ 1454 if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) 1455 goto put_token; 1456 1457 /* check privileged map type permissions */ 1458 switch (map_type) { 1459 case BPF_MAP_TYPE_ARRAY: 1460 case BPF_MAP_TYPE_PERCPU_ARRAY: 1461 case BPF_MAP_TYPE_PROG_ARRAY: 1462 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1463 case BPF_MAP_TYPE_CGROUP_ARRAY: 1464 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1465 case BPF_MAP_TYPE_HASH: 1466 case BPF_MAP_TYPE_PERCPU_HASH: 1467 case BPF_MAP_TYPE_HASH_OF_MAPS: 1468 case BPF_MAP_TYPE_RINGBUF: 1469 case BPF_MAP_TYPE_USER_RINGBUF: 1470 case BPF_MAP_TYPE_CGROUP_STORAGE: 1471 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1472 /* unprivileged */ 1473 break; 1474 case BPF_MAP_TYPE_SK_STORAGE: 1475 case BPF_MAP_TYPE_INODE_STORAGE: 1476 case BPF_MAP_TYPE_TASK_STORAGE: 1477 case BPF_MAP_TYPE_CGRP_STORAGE: 1478 case BPF_MAP_TYPE_BLOOM_FILTER: 1479 case BPF_MAP_TYPE_LPM_TRIE: 1480 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1481 case BPF_MAP_TYPE_STACK_TRACE: 1482 case BPF_MAP_TYPE_QUEUE: 1483 case BPF_MAP_TYPE_STACK: 1484 case BPF_MAP_TYPE_LRU_HASH: 1485 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1486 case BPF_MAP_TYPE_STRUCT_OPS: 1487 case BPF_MAP_TYPE_CPUMAP: 1488 case BPF_MAP_TYPE_ARENA: 1489 case BPF_MAP_TYPE_INSN_ARRAY: 1490 if (!bpf_token_capable(token, CAP_BPF)) 1491 goto put_token; 1492 break; 1493 case BPF_MAP_TYPE_SOCKMAP: 1494 case BPF_MAP_TYPE_SOCKHASH: 1495 case BPF_MAP_TYPE_DEVMAP: 1496 case BPF_MAP_TYPE_DEVMAP_HASH: 1497 case BPF_MAP_TYPE_XSKMAP: 1498 if (!bpf_token_capable(token, CAP_NET_ADMIN)) 1499 goto put_token; 1500 break; 1501 default: 1502 WARN(1, "unsupported map type %d", map_type); 1503 goto put_token; 1504 } 1505 1506 map = ops->map_alloc(attr); 1507 if (IS_ERR(map)) { 1508 err = PTR_ERR(map); 1509 goto put_token; 1510 } 1511 map->ops = ops; 1512 map->map_type = map_type; 1513 1514 err = bpf_obj_name_cpy(map->name, attr->map_name, 1515 sizeof(attr->map_name)); 1516 if (err < 0) 1517 goto free_map; 1518 1519 preempt_disable(); 1520 map->cookie = gen_cookie_next(&bpf_map_cookie); 1521 preempt_enable(); 1522 1523 atomic64_set(&map->refcnt, 1); 1524 atomic64_set(&map->usercnt, 1); 1525 mutex_init(&map->freeze_mutex); 1526 spin_lock_init(&map->owner_lock); 1527 1528 if (attr->btf_key_type_id || attr->btf_value_type_id || 1529 /* Even the map's value is a kernel's struct, 1530 * the bpf_prog.o must have BTF to begin with 1531 * to figure out the corresponding kernel's 1532 * counter part. Thus, attr->btf_fd has 1533 * to be valid also. 1534 */ 1535 attr->btf_vmlinux_value_type_id) { 1536 struct btf *btf; 1537 1538 btf = btf_get_by_fd(attr->btf_fd); 1539 if (IS_ERR(btf)) { 1540 err = PTR_ERR(btf); 1541 goto free_map; 1542 } 1543 if (btf_is_kernel(btf)) { 1544 btf_put(btf); 1545 err = -EACCES; 1546 goto free_map; 1547 } 1548 map->btf = btf; 1549 1550 if (attr->btf_value_type_id) { 1551 err = map_check_btf(map, token, btf, attr->btf_key_type_id, 1552 attr->btf_value_type_id); 1553 if (err) 1554 goto free_map; 1555 } 1556 1557 map->btf_key_type_id = attr->btf_key_type_id; 1558 map->btf_value_type_id = attr->btf_value_type_id; 1559 map->btf_vmlinux_value_type_id = 1560 attr->btf_vmlinux_value_type_id; 1561 } 1562 1563 if (attr->excl_prog_hash) { 1564 bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); 1565 1566 if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { 1567 err = -EINVAL; 1568 goto free_map; 1569 } 1570 1571 map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 1572 if (!map->excl_prog_sha) { 1573 err = -ENOMEM; 1574 goto free_map; 1575 } 1576 1577 if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { 1578 err = -EFAULT; 1579 goto free_map; 1580 } 1581 } else if (attr->excl_prog_hash_size) { 1582 err = -EINVAL; 1583 goto free_map; 1584 } 1585 1586 err = security_bpf_map_create(map, attr, token, uattr.is_kernel); 1587 if (err) 1588 goto free_map_sec; 1589 1590 err = bpf_map_alloc_id(map); 1591 if (err) 1592 goto free_map_sec; 1593 1594 bpf_map_save_memcg(map); 1595 bpf_token_put(token); 1596 1597 err = bpf_map_new_fd(map, f_flags); 1598 if (err < 0) { 1599 /* failed to allocate fd. 1600 * bpf_map_put_with_uref() is needed because the above 1601 * bpf_map_alloc_id() has published the map 1602 * to the userspace and the userspace may 1603 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1604 */ 1605 bpf_map_put_with_uref(map); 1606 return err; 1607 } 1608 1609 return err; 1610 1611 free_map_sec: 1612 security_bpf_map_free(map); 1613 free_map: 1614 bpf_map_free(map); 1615 put_token: 1616 bpf_token_put(token); 1617 return err; 1618 } 1619 1620 void bpf_map_inc(struct bpf_map *map) 1621 { 1622 atomic64_inc(&map->refcnt); 1623 } 1624 EXPORT_SYMBOL_GPL(bpf_map_inc); 1625 1626 void bpf_map_inc_with_uref(struct bpf_map *map) 1627 { 1628 atomic64_inc(&map->refcnt); 1629 atomic64_inc(&map->usercnt); 1630 } 1631 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1632 1633 struct bpf_map *bpf_map_get(u32 ufd) 1634 { 1635 CLASS(fd, f)(ufd); 1636 struct bpf_map *map = __bpf_map_get(f); 1637 1638 if (!IS_ERR(map)) 1639 bpf_map_inc(map); 1640 1641 return map; 1642 } 1643 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); 1644 1645 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1646 { 1647 CLASS(fd, f)(ufd); 1648 struct bpf_map *map = __bpf_map_get(f); 1649 1650 if (!IS_ERR(map)) 1651 bpf_map_inc_with_uref(map); 1652 1653 return map; 1654 } 1655 1656 /* map_idr_lock should have been held or the map should have been 1657 * protected by rcu read lock. 1658 */ 1659 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1660 { 1661 int refold; 1662 1663 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1664 if (!refold) 1665 return ERR_PTR(-ENOENT); 1666 if (uref) 1667 atomic64_inc(&map->usercnt); 1668 1669 return map; 1670 } 1671 1672 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1673 { 1674 lockdep_assert(rcu_read_lock_held()); 1675 return __bpf_map_inc_not_zero(map, false); 1676 } 1677 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1678 1679 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 1680 bool delete) 1681 { 1682 return -ENOTSUPP; 1683 } 1684 1685 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1686 { 1687 if (key_size) 1688 return vmemdup_user(ukey, key_size); 1689 1690 if (ukey) 1691 return ERR_PTR(-EINVAL); 1692 1693 return NULL; 1694 } 1695 1696 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1697 { 1698 if (key_size) 1699 return kvmemdup_bpfptr(ukey, key_size); 1700 1701 if (!bpfptr_is_null(ukey)) 1702 return ERR_PTR(-EINVAL); 1703 1704 return NULL; 1705 } 1706 1707 /* last field in 'union bpf_attr' used by this command */ 1708 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1709 1710 static int map_lookup_elem(union bpf_attr *attr) 1711 { 1712 void __user *ukey = u64_to_user_ptr(attr->key); 1713 void __user *uvalue = u64_to_user_ptr(attr->value); 1714 struct bpf_map *map; 1715 void *key, *value; 1716 u32 value_size; 1717 int err; 1718 1719 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1720 return -EINVAL; 1721 1722 CLASS(fd, f)(attr->map_fd); 1723 map = __bpf_map_get(f); 1724 if (IS_ERR(map)) 1725 return PTR_ERR(map); 1726 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1727 return -EPERM; 1728 1729 err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK | BPF_F_CPU); 1730 if (err) 1731 return err; 1732 1733 key = __bpf_copy_key(ukey, map->key_size); 1734 if (IS_ERR(key)) 1735 return PTR_ERR(key); 1736 1737 value_size = bpf_map_value_size(map, attr->flags); 1738 1739 err = -ENOMEM; 1740 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1741 if (!value) 1742 goto free_key; 1743 1744 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1745 if (copy_from_user(value, uvalue, value_size)) 1746 err = -EFAULT; 1747 else 1748 err = bpf_map_copy_value(map, key, value, attr->flags); 1749 goto free_value; 1750 } 1751 1752 err = bpf_map_copy_value(map, key, value, attr->flags); 1753 if (err) 1754 goto free_value; 1755 1756 err = -EFAULT; 1757 if (copy_to_user(uvalue, value, value_size) != 0) 1758 goto free_value; 1759 1760 err = 0; 1761 1762 free_value: 1763 kvfree(value); 1764 free_key: 1765 kvfree(key); 1766 return err; 1767 } 1768 1769 1770 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1771 1772 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1773 { 1774 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1775 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1776 struct bpf_map *map; 1777 void *key, *value; 1778 u32 value_size; 1779 int err; 1780 1781 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1782 return -EINVAL; 1783 1784 CLASS(fd, f)(attr->map_fd); 1785 map = __bpf_map_get(f); 1786 if (IS_ERR(map)) 1787 return PTR_ERR(map); 1788 bpf_map_write_active_inc(map); 1789 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1790 err = -EPERM; 1791 goto err_put; 1792 } 1793 1794 err = bpf_map_check_op_flags(map, attr->flags, ~0); 1795 if (err) 1796 goto err_put; 1797 1798 key = ___bpf_copy_key(ukey, map->key_size); 1799 if (IS_ERR(key)) { 1800 err = PTR_ERR(key); 1801 goto err_put; 1802 } 1803 1804 value_size = bpf_map_value_size(map, attr->flags); 1805 value = kvmemdup_bpfptr(uvalue, value_size); 1806 if (IS_ERR(value)) { 1807 err = PTR_ERR(value); 1808 goto free_key; 1809 } 1810 1811 err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags); 1812 if (!err) 1813 maybe_wait_bpf_programs(map); 1814 1815 kvfree(value); 1816 free_key: 1817 kvfree(key); 1818 err_put: 1819 bpf_map_write_active_dec(map); 1820 return err; 1821 } 1822 1823 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1824 1825 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1826 { 1827 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1828 struct bpf_map *map; 1829 void *key; 1830 int err; 1831 1832 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1833 return -EINVAL; 1834 1835 CLASS(fd, f)(attr->map_fd); 1836 map = __bpf_map_get(f); 1837 if (IS_ERR(map)) 1838 return PTR_ERR(map); 1839 bpf_map_write_active_inc(map); 1840 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1841 err = -EPERM; 1842 goto err_put; 1843 } 1844 1845 key = ___bpf_copy_key(ukey, map->key_size); 1846 if (IS_ERR(key)) { 1847 err = PTR_ERR(key); 1848 goto err_put; 1849 } 1850 1851 if (bpf_map_is_offloaded(map)) { 1852 err = bpf_map_offload_delete_elem(map, key); 1853 goto out; 1854 } else if (IS_FD_PROG_ARRAY(map) || 1855 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1856 /* These maps require sleepable context */ 1857 err = map->ops->map_delete_elem(map, key); 1858 goto out; 1859 } 1860 1861 bpf_disable_instrumentation(); 1862 rcu_read_lock(); 1863 err = map->ops->map_delete_elem(map, key); 1864 rcu_read_unlock(); 1865 bpf_enable_instrumentation(); 1866 if (!err) 1867 maybe_wait_bpf_programs(map); 1868 out: 1869 kvfree(key); 1870 err_put: 1871 bpf_map_write_active_dec(map); 1872 return err; 1873 } 1874 1875 /* last field in 'union bpf_attr' used by this command */ 1876 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1877 1878 static int map_get_next_key(union bpf_attr *attr) 1879 { 1880 void __user *ukey = u64_to_user_ptr(attr->key); 1881 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1882 struct bpf_map *map; 1883 void *key, *next_key; 1884 int err; 1885 1886 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1887 return -EINVAL; 1888 1889 CLASS(fd, f)(attr->map_fd); 1890 map = __bpf_map_get(f); 1891 if (IS_ERR(map)) 1892 return PTR_ERR(map); 1893 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1894 return -EPERM; 1895 1896 if (ukey) { 1897 key = __bpf_copy_key(ukey, map->key_size); 1898 if (IS_ERR(key)) 1899 return PTR_ERR(key); 1900 } else { 1901 key = NULL; 1902 } 1903 1904 err = -ENOMEM; 1905 next_key = kvmalloc(map->key_size, GFP_USER); 1906 if (!next_key) 1907 goto free_key; 1908 1909 if (bpf_map_is_offloaded(map)) { 1910 err = bpf_map_offload_get_next_key(map, key, next_key); 1911 goto out; 1912 } 1913 1914 rcu_read_lock(); 1915 err = map->ops->map_get_next_key(map, key, next_key); 1916 rcu_read_unlock(); 1917 out: 1918 if (err) 1919 goto free_next_key; 1920 1921 err = -EFAULT; 1922 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1923 goto free_next_key; 1924 1925 err = 0; 1926 1927 free_next_key: 1928 kvfree(next_key); 1929 free_key: 1930 kvfree(key); 1931 return err; 1932 } 1933 1934 int generic_map_delete_batch(struct bpf_map *map, 1935 const union bpf_attr *attr, 1936 union bpf_attr __user *uattr) 1937 { 1938 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1939 u32 cp, max_count; 1940 int err = 0; 1941 void *key; 1942 1943 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1944 return -EINVAL; 1945 1946 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1947 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1948 return -EINVAL; 1949 } 1950 1951 max_count = attr->batch.count; 1952 if (!max_count) 1953 return 0; 1954 1955 if (put_user(0, &uattr->batch.count)) 1956 return -EFAULT; 1957 1958 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1959 if (!key) 1960 return -ENOMEM; 1961 1962 for (cp = 0; cp < max_count; cp++) { 1963 err = -EFAULT; 1964 if (copy_from_user(key, keys + cp * map->key_size, 1965 map->key_size)) 1966 break; 1967 1968 if (bpf_map_is_offloaded(map)) { 1969 err = bpf_map_offload_delete_elem(map, key); 1970 break; 1971 } 1972 1973 bpf_disable_instrumentation(); 1974 rcu_read_lock(); 1975 err = map->ops->map_delete_elem(map, key); 1976 rcu_read_unlock(); 1977 bpf_enable_instrumentation(); 1978 if (err) 1979 break; 1980 cond_resched(); 1981 } 1982 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1983 err = -EFAULT; 1984 1985 kvfree(key); 1986 1987 return err; 1988 } 1989 1990 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 1991 const union bpf_attr *attr, 1992 union bpf_attr __user *uattr) 1993 { 1994 void __user *values = u64_to_user_ptr(attr->batch.values); 1995 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1996 u32 value_size, cp, max_count; 1997 void *key, *value; 1998 int err = 0; 1999 2000 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, 2001 BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS); 2002 if (err) 2003 return err; 2004 2005 value_size = bpf_map_value_size(map, attr->batch.elem_flags); 2006 2007 max_count = attr->batch.count; 2008 if (!max_count) 2009 return 0; 2010 2011 if (put_user(0, &uattr->batch.count)) 2012 return -EFAULT; 2013 2014 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2015 if (!key) 2016 return -ENOMEM; 2017 2018 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2019 if (!value) { 2020 kvfree(key); 2021 return -ENOMEM; 2022 } 2023 2024 for (cp = 0; cp < max_count; cp++) { 2025 err = -EFAULT; 2026 if (copy_from_user(key, keys + cp * map->key_size, 2027 map->key_size) || 2028 copy_from_user(value, values + cp * value_size, value_size)) 2029 break; 2030 2031 err = bpf_map_update_value(map, map_file, key, value, 2032 attr->batch.elem_flags); 2033 2034 if (err) 2035 break; 2036 cond_resched(); 2037 } 2038 2039 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 2040 err = -EFAULT; 2041 2042 kvfree(value); 2043 kvfree(key); 2044 2045 return err; 2046 } 2047 2048 int generic_map_lookup_batch(struct bpf_map *map, 2049 const union bpf_attr *attr, 2050 union bpf_attr __user *uattr) 2051 { 2052 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 2053 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 2054 void __user *values = u64_to_user_ptr(attr->batch.values); 2055 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2056 void *buf, *buf_prevkey, *prev_key, *key, *value; 2057 u32 value_size, cp, max_count; 2058 int err; 2059 2060 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK | BPF_F_CPU); 2061 if (err) 2062 return err; 2063 2064 value_size = bpf_map_value_size(map, attr->batch.elem_flags); 2065 2066 max_count = attr->batch.count; 2067 if (!max_count) 2068 return 0; 2069 2070 if (put_user(0, &uattr->batch.count)) 2071 return -EFAULT; 2072 2073 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2074 if (!buf_prevkey) 2075 return -ENOMEM; 2076 2077 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 2078 if (!buf) { 2079 kvfree(buf_prevkey); 2080 return -ENOMEM; 2081 } 2082 2083 err = -EFAULT; 2084 prev_key = NULL; 2085 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 2086 goto free_buf; 2087 key = buf; 2088 value = key + map->key_size; 2089 if (ubatch) 2090 prev_key = buf_prevkey; 2091 2092 for (cp = 0; cp < max_count;) { 2093 rcu_read_lock(); 2094 err = map->ops->map_get_next_key(map, prev_key, key); 2095 rcu_read_unlock(); 2096 if (err) 2097 break; 2098 err = bpf_map_copy_value(map, key, value, 2099 attr->batch.elem_flags); 2100 2101 if (err == -ENOENT) 2102 goto next_key; 2103 2104 if (err) 2105 goto free_buf; 2106 2107 if (copy_to_user(keys + cp * map->key_size, key, 2108 map->key_size)) { 2109 err = -EFAULT; 2110 goto free_buf; 2111 } 2112 if (copy_to_user(values + cp * value_size, value, value_size)) { 2113 err = -EFAULT; 2114 goto free_buf; 2115 } 2116 2117 cp++; 2118 next_key: 2119 if (!prev_key) 2120 prev_key = buf_prevkey; 2121 2122 swap(prev_key, key); 2123 cond_resched(); 2124 } 2125 2126 if (err == -EFAULT) 2127 goto free_buf; 2128 2129 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 2130 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 2131 err = -EFAULT; 2132 2133 free_buf: 2134 kvfree(buf_prevkey); 2135 kvfree(buf); 2136 return err; 2137 } 2138 2139 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 2140 2141 static int map_lookup_and_delete_elem(union bpf_attr *attr) 2142 { 2143 void __user *ukey = u64_to_user_ptr(attr->key); 2144 void __user *uvalue = u64_to_user_ptr(attr->value); 2145 struct bpf_map *map; 2146 void *key, *value; 2147 u32 value_size; 2148 int err; 2149 2150 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 2151 return -EINVAL; 2152 2153 if (attr->flags & ~BPF_F_LOCK) 2154 return -EINVAL; 2155 2156 CLASS(fd, f)(attr->map_fd); 2157 map = __bpf_map_get(f); 2158 if (IS_ERR(map)) 2159 return PTR_ERR(map); 2160 bpf_map_write_active_inc(map); 2161 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 2162 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 2163 err = -EPERM; 2164 goto err_put; 2165 } 2166 2167 if (attr->flags && 2168 (map->map_type == BPF_MAP_TYPE_QUEUE || 2169 map->map_type == BPF_MAP_TYPE_STACK)) { 2170 err = -EINVAL; 2171 goto err_put; 2172 } 2173 2174 if ((attr->flags & BPF_F_LOCK) && 2175 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2176 err = -EINVAL; 2177 goto err_put; 2178 } 2179 2180 key = __bpf_copy_key(ukey, map->key_size); 2181 if (IS_ERR(key)) { 2182 err = PTR_ERR(key); 2183 goto err_put; 2184 } 2185 2186 value_size = bpf_map_value_size(map, 0); 2187 2188 err = -ENOMEM; 2189 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2190 if (!value) 2191 goto free_key; 2192 2193 err = -ENOTSUPP; 2194 if (map->map_type == BPF_MAP_TYPE_QUEUE || 2195 map->map_type == BPF_MAP_TYPE_STACK) { 2196 err = map->ops->map_pop_elem(map, value); 2197 } else if (map->map_type == BPF_MAP_TYPE_HASH || 2198 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2199 map->map_type == BPF_MAP_TYPE_LRU_HASH || 2200 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 2201 map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 2202 if (!bpf_map_is_offloaded(map)) { 2203 bpf_disable_instrumentation(); 2204 rcu_read_lock(); 2205 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 2206 rcu_read_unlock(); 2207 bpf_enable_instrumentation(); 2208 } 2209 } 2210 2211 if (err) 2212 goto free_value; 2213 2214 if (copy_to_user(uvalue, value, value_size) != 0) { 2215 err = -EFAULT; 2216 goto free_value; 2217 } 2218 2219 err = 0; 2220 2221 free_value: 2222 kvfree(value); 2223 free_key: 2224 kvfree(key); 2225 err_put: 2226 bpf_map_write_active_dec(map); 2227 return err; 2228 } 2229 2230 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 2231 2232 static int map_freeze(const union bpf_attr *attr) 2233 { 2234 int err = 0; 2235 struct bpf_map *map; 2236 2237 if (CHECK_ATTR(BPF_MAP_FREEZE)) 2238 return -EINVAL; 2239 2240 CLASS(fd, f)(attr->map_fd); 2241 map = __bpf_map_get(f); 2242 if (IS_ERR(map)) 2243 return PTR_ERR(map); 2244 2245 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) 2246 return -ENOTSUPP; 2247 2248 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) 2249 return -EPERM; 2250 2251 mutex_lock(&map->freeze_mutex); 2252 if (bpf_map_write_active(map)) { 2253 err = -EBUSY; 2254 goto err_put; 2255 } 2256 if (READ_ONCE(map->frozen)) { 2257 err = -EBUSY; 2258 goto err_put; 2259 } 2260 2261 WRITE_ONCE(map->frozen, true); 2262 err_put: 2263 mutex_unlock(&map->freeze_mutex); 2264 return err; 2265 } 2266 2267 static const struct bpf_prog_ops * const bpf_prog_types[] = { 2268 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 2269 [_id] = & _name ## _prog_ops, 2270 #define BPF_MAP_TYPE(_id, _ops) 2271 #define BPF_LINK_TYPE(_id, _name) 2272 #include <linux/bpf_types.h> 2273 #undef BPF_PROG_TYPE 2274 #undef BPF_MAP_TYPE 2275 #undef BPF_LINK_TYPE 2276 }; 2277 2278 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 2279 { 2280 const struct bpf_prog_ops *ops; 2281 2282 if (type >= ARRAY_SIZE(bpf_prog_types)) 2283 return -EINVAL; 2284 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 2285 ops = bpf_prog_types[type]; 2286 if (!ops) 2287 return -EINVAL; 2288 2289 if (!bpf_prog_is_offloaded(prog->aux)) 2290 prog->aux->ops = ops; 2291 else 2292 prog->aux->ops = &bpf_offload_prog_ops; 2293 prog->type = type; 2294 return 0; 2295 } 2296 2297 enum bpf_audit { 2298 BPF_AUDIT_LOAD, 2299 BPF_AUDIT_UNLOAD, 2300 BPF_AUDIT_MAX, 2301 }; 2302 2303 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2304 [BPF_AUDIT_LOAD] = "LOAD", 2305 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2306 }; 2307 2308 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2309 { 2310 struct audit_context *ctx = NULL; 2311 struct audit_buffer *ab; 2312 2313 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2314 return; 2315 if (audit_enabled == AUDIT_OFF) 2316 return; 2317 if (!in_hardirq() && !irqs_disabled()) 2318 ctx = audit_context(); 2319 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2320 if (unlikely(!ab)) 2321 return; 2322 audit_log_format(ab, "prog-id=%u op=%s", 2323 prog->aux->id, bpf_audit_str[op]); 2324 audit_log_end(ab); 2325 } 2326 2327 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2328 { 2329 int id; 2330 2331 idr_preload(GFP_KERNEL); 2332 spin_lock_bh(&prog_idr_lock); 2333 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2334 if (id > 0) 2335 prog->aux->id = id; 2336 spin_unlock_bh(&prog_idr_lock); 2337 idr_preload_end(); 2338 2339 /* id is in [1, INT_MAX) */ 2340 if (WARN_ON_ONCE(!id)) 2341 return -ENOSPC; 2342 2343 return id > 0 ? 0 : id; 2344 } 2345 2346 void bpf_prog_free_id(struct bpf_prog *prog) 2347 { 2348 unsigned long flags; 2349 2350 /* cBPF to eBPF migrations are currently not in the idr store. 2351 * Offloaded programs are removed from the store when their device 2352 * disappears - even if someone grabs an fd to them they are unusable, 2353 * simply waiting for refcnt to drop to be freed. 2354 */ 2355 if (!prog->aux->id) 2356 return; 2357 2358 spin_lock_irqsave(&prog_idr_lock, flags); 2359 idr_remove(&prog_idr, prog->aux->id); 2360 prog->aux->id = 0; 2361 spin_unlock_irqrestore(&prog_idr_lock, flags); 2362 } 2363 2364 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2365 { 2366 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2367 2368 kvfree(aux->func_info); 2369 kfree(aux->func_info_aux); 2370 free_uid(aux->user); 2371 security_bpf_prog_free(aux->prog); 2372 bpf_prog_free(aux->prog); 2373 } 2374 2375 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2376 { 2377 bpf_prog_kallsyms_del_all(prog); 2378 btf_put(prog->aux->btf); 2379 module_put(prog->aux->mod); 2380 kvfree(prog->aux->jited_linfo); 2381 kvfree(prog->aux->linfo); 2382 kfree(prog->aux->kfunc_tab); 2383 kfree(prog->aux->ctx_arg_info); 2384 if (prog->aux->attach_btf) 2385 btf_put(prog->aux->attach_btf); 2386 2387 if (deferred) { 2388 if (prog->sleepable) 2389 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2390 else 2391 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2392 } else { 2393 __bpf_prog_put_rcu(&prog->aux->rcu); 2394 } 2395 } 2396 2397 static void bpf_prog_put_deferred(struct work_struct *work) 2398 { 2399 struct bpf_prog_aux *aux; 2400 struct bpf_prog *prog; 2401 2402 aux = container_of(work, struct bpf_prog_aux, work); 2403 prog = aux->prog; 2404 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2405 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2406 bpf_prog_free_id(prog); 2407 __bpf_prog_put_noref(prog, true); 2408 } 2409 2410 static void __bpf_prog_put(struct bpf_prog *prog) 2411 { 2412 struct bpf_prog_aux *aux = prog->aux; 2413 2414 if (atomic64_dec_and_test(&aux->refcnt)) { 2415 if (in_hardirq() || irqs_disabled()) { 2416 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2417 schedule_work(&aux->work); 2418 } else { 2419 bpf_prog_put_deferred(&aux->work); 2420 } 2421 } 2422 } 2423 2424 void bpf_prog_put(struct bpf_prog *prog) 2425 { 2426 __bpf_prog_put(prog); 2427 } 2428 EXPORT_SYMBOL_GPL(bpf_prog_put); 2429 2430 static int bpf_prog_release(struct inode *inode, struct file *filp) 2431 { 2432 struct bpf_prog *prog = filp->private_data; 2433 2434 bpf_prog_put(prog); 2435 return 0; 2436 } 2437 2438 struct bpf_prog_kstats { 2439 u64 nsecs; 2440 u64 cnt; 2441 u64 misses; 2442 }; 2443 2444 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2445 { 2446 struct bpf_prog_stats *stats; 2447 unsigned int flags; 2448 2449 if (unlikely(!prog->stats)) 2450 return; 2451 2452 stats = this_cpu_ptr(prog->stats); 2453 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2454 u64_stats_inc(&stats->misses); 2455 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2456 } 2457 2458 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2459 struct bpf_prog_kstats *stats) 2460 { 2461 u64 nsecs = 0, cnt = 0, misses = 0; 2462 int cpu; 2463 2464 for_each_possible_cpu(cpu) { 2465 const struct bpf_prog_stats *st; 2466 unsigned int start; 2467 u64 tnsecs, tcnt, tmisses; 2468 2469 st = per_cpu_ptr(prog->stats, cpu); 2470 do { 2471 start = u64_stats_fetch_begin(&st->syncp); 2472 tnsecs = u64_stats_read(&st->nsecs); 2473 tcnt = u64_stats_read(&st->cnt); 2474 tmisses = u64_stats_read(&st->misses); 2475 } while (u64_stats_fetch_retry(&st->syncp, start)); 2476 nsecs += tnsecs; 2477 cnt += tcnt; 2478 misses += tmisses; 2479 } 2480 stats->nsecs = nsecs; 2481 stats->cnt = cnt; 2482 stats->misses = misses; 2483 } 2484 2485 #ifdef CONFIG_PROC_FS 2486 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2487 { 2488 const struct bpf_prog *prog = filp->private_data; 2489 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2490 struct bpf_prog_kstats stats; 2491 2492 bpf_prog_get_stats(prog, &stats); 2493 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2494 seq_printf(m, 2495 "prog_type:\t%u\n" 2496 "prog_jited:\t%u\n" 2497 "prog_tag:\t%s\n" 2498 "memlock:\t%llu\n" 2499 "prog_id:\t%u\n" 2500 "run_time_ns:\t%llu\n" 2501 "run_cnt:\t%llu\n" 2502 "recursion_misses:\t%llu\n" 2503 "verified_insns:\t%u\n", 2504 prog->type, 2505 prog->jited, 2506 prog_tag, 2507 prog->pages * 1ULL << PAGE_SHIFT, 2508 prog->aux->id, 2509 stats.nsecs, 2510 stats.cnt, 2511 stats.misses, 2512 prog->aux->verified_insns); 2513 } 2514 #endif 2515 2516 const struct file_operations bpf_prog_fops = { 2517 #ifdef CONFIG_PROC_FS 2518 .show_fdinfo = bpf_prog_show_fdinfo, 2519 #endif 2520 .release = bpf_prog_release, 2521 .read = bpf_dummy_read, 2522 .write = bpf_dummy_write, 2523 }; 2524 2525 int bpf_prog_new_fd(struct bpf_prog *prog) 2526 { 2527 int ret; 2528 2529 ret = security_bpf_prog(prog); 2530 if (ret < 0) 2531 return ret; 2532 2533 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2534 O_RDWR | O_CLOEXEC); 2535 } 2536 2537 void bpf_prog_add(struct bpf_prog *prog, int i) 2538 { 2539 atomic64_add(i, &prog->aux->refcnt); 2540 } 2541 EXPORT_SYMBOL_GPL(bpf_prog_add); 2542 2543 void bpf_prog_sub(struct bpf_prog *prog, int i) 2544 { 2545 /* Only to be used for undoing previous bpf_prog_add() in some 2546 * error path. We still know that another entity in our call 2547 * path holds a reference to the program, thus atomic_sub() can 2548 * be safely used in such cases! 2549 */ 2550 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2551 } 2552 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2553 2554 void bpf_prog_inc(struct bpf_prog *prog) 2555 { 2556 atomic64_inc(&prog->aux->refcnt); 2557 } 2558 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2559 2560 /* prog_idr_lock should have been held */ 2561 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2562 { 2563 int refold; 2564 2565 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2566 2567 if (!refold) 2568 return ERR_PTR(-ENOENT); 2569 2570 return prog; 2571 } 2572 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2573 2574 bool bpf_prog_get_ok(struct bpf_prog *prog, 2575 enum bpf_prog_type *attach_type, bool attach_drv) 2576 { 2577 /* not an attachment, just a refcount inc, always allow */ 2578 if (!attach_type) 2579 return true; 2580 2581 if (prog->type != *attach_type) 2582 return false; 2583 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2584 return false; 2585 2586 return true; 2587 } 2588 2589 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2590 bool attach_drv) 2591 { 2592 CLASS(fd, f)(ufd); 2593 struct bpf_prog *prog; 2594 2595 if (fd_empty(f)) 2596 return ERR_PTR(-EBADF); 2597 if (fd_file(f)->f_op != &bpf_prog_fops) 2598 return ERR_PTR(-EINVAL); 2599 2600 prog = fd_file(f)->private_data; 2601 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) 2602 return ERR_PTR(-EINVAL); 2603 2604 bpf_prog_inc(prog); 2605 return prog; 2606 } 2607 2608 struct bpf_prog *bpf_prog_get(u32 ufd) 2609 { 2610 return __bpf_prog_get(ufd, NULL, false); 2611 } 2612 2613 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2614 bool attach_drv) 2615 { 2616 return __bpf_prog_get(ufd, &type, attach_drv); 2617 } 2618 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2619 2620 /* Initially all BPF programs could be loaded w/o specifying 2621 * expected_attach_type. Later for some of them specifying expected_attach_type 2622 * at load time became required so that program could be validated properly. 2623 * Programs of types that are allowed to be loaded both w/ and w/o (for 2624 * backward compatibility) expected_attach_type, should have the default attach 2625 * type assigned to expected_attach_type for the latter case, so that it can be 2626 * validated later at attach time. 2627 * 2628 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2629 * prog type requires it but has some attach types that have to be backward 2630 * compatible. 2631 */ 2632 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2633 { 2634 switch (attr->prog_type) { 2635 case BPF_PROG_TYPE_CGROUP_SOCK: 2636 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2637 * exist so checking for non-zero is the way to go here. 2638 */ 2639 if (!attr->expected_attach_type) 2640 attr->expected_attach_type = 2641 BPF_CGROUP_INET_SOCK_CREATE; 2642 break; 2643 case BPF_PROG_TYPE_SK_REUSEPORT: 2644 if (!attr->expected_attach_type) 2645 attr->expected_attach_type = 2646 BPF_SK_REUSEPORT_SELECT; 2647 break; 2648 } 2649 } 2650 2651 static int 2652 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2653 enum bpf_attach_type expected_attach_type, 2654 struct btf *attach_btf, u32 btf_id, 2655 struct bpf_prog *dst_prog) 2656 { 2657 if (btf_id) { 2658 if (btf_id > BTF_MAX_TYPE) 2659 return -EINVAL; 2660 2661 if (!attach_btf && !dst_prog) 2662 return -EINVAL; 2663 2664 switch (prog_type) { 2665 case BPF_PROG_TYPE_TRACING: 2666 case BPF_PROG_TYPE_LSM: 2667 case BPF_PROG_TYPE_STRUCT_OPS: 2668 case BPF_PROG_TYPE_EXT: 2669 break; 2670 default: 2671 return -EINVAL; 2672 } 2673 } 2674 2675 if (attach_btf && (!btf_id || dst_prog)) 2676 return -EINVAL; 2677 2678 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2679 prog_type != BPF_PROG_TYPE_EXT) 2680 return -EINVAL; 2681 2682 switch (prog_type) { 2683 case BPF_PROG_TYPE_CGROUP_SOCK: 2684 switch (expected_attach_type) { 2685 case BPF_CGROUP_INET_SOCK_CREATE: 2686 case BPF_CGROUP_INET_SOCK_RELEASE: 2687 case BPF_CGROUP_INET4_POST_BIND: 2688 case BPF_CGROUP_INET6_POST_BIND: 2689 return 0; 2690 default: 2691 return -EINVAL; 2692 } 2693 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2694 switch (expected_attach_type) { 2695 case BPF_CGROUP_INET4_BIND: 2696 case BPF_CGROUP_INET6_BIND: 2697 case BPF_CGROUP_INET4_CONNECT: 2698 case BPF_CGROUP_INET6_CONNECT: 2699 case BPF_CGROUP_UNIX_CONNECT: 2700 case BPF_CGROUP_INET4_GETPEERNAME: 2701 case BPF_CGROUP_INET6_GETPEERNAME: 2702 case BPF_CGROUP_UNIX_GETPEERNAME: 2703 case BPF_CGROUP_INET4_GETSOCKNAME: 2704 case BPF_CGROUP_INET6_GETSOCKNAME: 2705 case BPF_CGROUP_UNIX_GETSOCKNAME: 2706 case BPF_CGROUP_UDP4_SENDMSG: 2707 case BPF_CGROUP_UDP6_SENDMSG: 2708 case BPF_CGROUP_UNIX_SENDMSG: 2709 case BPF_CGROUP_UDP4_RECVMSG: 2710 case BPF_CGROUP_UDP6_RECVMSG: 2711 case BPF_CGROUP_UNIX_RECVMSG: 2712 return 0; 2713 default: 2714 return -EINVAL; 2715 } 2716 case BPF_PROG_TYPE_CGROUP_SKB: 2717 switch (expected_attach_type) { 2718 case BPF_CGROUP_INET_INGRESS: 2719 case BPF_CGROUP_INET_EGRESS: 2720 return 0; 2721 default: 2722 return -EINVAL; 2723 } 2724 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2725 switch (expected_attach_type) { 2726 case BPF_CGROUP_SETSOCKOPT: 2727 case BPF_CGROUP_GETSOCKOPT: 2728 return 0; 2729 default: 2730 return -EINVAL; 2731 } 2732 case BPF_PROG_TYPE_SK_LOOKUP: 2733 if (expected_attach_type == BPF_SK_LOOKUP) 2734 return 0; 2735 return -EINVAL; 2736 case BPF_PROG_TYPE_SK_REUSEPORT: 2737 switch (expected_attach_type) { 2738 case BPF_SK_REUSEPORT_SELECT: 2739 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2740 return 0; 2741 default: 2742 return -EINVAL; 2743 } 2744 case BPF_PROG_TYPE_NETFILTER: 2745 if (expected_attach_type == BPF_NETFILTER) 2746 return 0; 2747 return -EINVAL; 2748 case BPF_PROG_TYPE_SYSCALL: 2749 case BPF_PROG_TYPE_EXT: 2750 if (expected_attach_type) 2751 return -EINVAL; 2752 fallthrough; 2753 default: 2754 return 0; 2755 } 2756 } 2757 2758 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2759 { 2760 switch (prog_type) { 2761 case BPF_PROG_TYPE_SCHED_CLS: 2762 case BPF_PROG_TYPE_SCHED_ACT: 2763 case BPF_PROG_TYPE_XDP: 2764 case BPF_PROG_TYPE_LWT_IN: 2765 case BPF_PROG_TYPE_LWT_OUT: 2766 case BPF_PROG_TYPE_LWT_XMIT: 2767 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2768 case BPF_PROG_TYPE_SK_SKB: 2769 case BPF_PROG_TYPE_SK_MSG: 2770 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2771 case BPF_PROG_TYPE_CGROUP_DEVICE: 2772 case BPF_PROG_TYPE_CGROUP_SOCK: 2773 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2774 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2775 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2776 case BPF_PROG_TYPE_SOCK_OPS: 2777 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2778 case BPF_PROG_TYPE_NETFILTER: 2779 return true; 2780 case BPF_PROG_TYPE_CGROUP_SKB: 2781 /* always unpriv */ 2782 case BPF_PROG_TYPE_SK_REUSEPORT: 2783 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2784 default: 2785 return false; 2786 } 2787 } 2788 2789 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2790 { 2791 switch (prog_type) { 2792 case BPF_PROG_TYPE_KPROBE: 2793 case BPF_PROG_TYPE_TRACEPOINT: 2794 case BPF_PROG_TYPE_PERF_EVENT: 2795 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2796 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2797 case BPF_PROG_TYPE_TRACING: 2798 case BPF_PROG_TYPE_LSM: 2799 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2800 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2801 return true; 2802 default: 2803 return false; 2804 } 2805 } 2806 2807 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, 2808 bool is_kernel) 2809 { 2810 bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); 2811 struct bpf_dynptr_kern sig_ptr, insns_ptr; 2812 struct bpf_key *key = NULL; 2813 void *sig; 2814 int err = 0; 2815 2816 /* 2817 * Don't attempt to use kmalloc_large or vmalloc for signatures. 2818 * Practical signature for BPF program should be below this limit. 2819 */ 2820 if (attr->signature_size > KMALLOC_MAX_CACHE_SIZE) 2821 return -EINVAL; 2822 2823 if (system_keyring_id_check(attr->keyring_id) == 0) 2824 key = bpf_lookup_system_key(attr->keyring_id); 2825 else 2826 key = bpf_lookup_user_key(attr->keyring_id, 0); 2827 2828 if (!key) 2829 return -EINVAL; 2830 2831 sig = kvmemdup_bpfptr(usig, attr->signature_size); 2832 if (IS_ERR(sig)) { 2833 bpf_key_put(key); 2834 return -ENOMEM; 2835 } 2836 2837 bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, 2838 attr->signature_size); 2839 bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, 2840 prog->len * sizeof(struct bpf_insn)); 2841 2842 err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, 2843 (struct bpf_dynptr *)&sig_ptr, key); 2844 2845 bpf_key_put(key); 2846 kvfree(sig); 2847 return err; 2848 } 2849 2850 static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) 2851 { 2852 int err; 2853 int i; 2854 2855 for (i = 0; i < prog->aux->used_map_cnt; i++) { 2856 if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY) 2857 continue; 2858 2859 err = bpf_insn_array_ready(prog->aux->used_maps[i]); 2860 if (err) 2861 return err; 2862 } 2863 2864 return 0; 2865 } 2866 2867 /* last field in 'union bpf_attr' used by this command */ 2868 #define BPF_PROG_LOAD_LAST_FIELD keyring_id 2869 2870 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2871 { 2872 enum bpf_prog_type type = attr->prog_type; 2873 struct bpf_prog *prog, *dst_prog = NULL; 2874 struct btf *attach_btf = NULL; 2875 struct bpf_token *token = NULL; 2876 bool bpf_cap; 2877 int err; 2878 char license[128]; 2879 2880 if (CHECK_ATTR(BPF_PROG_LOAD)) 2881 return -EINVAL; 2882 2883 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2884 BPF_F_ANY_ALIGNMENT | 2885 BPF_F_TEST_STATE_FREQ | 2886 BPF_F_SLEEPABLE | 2887 BPF_F_TEST_RND_HI32 | 2888 BPF_F_XDP_HAS_FRAGS | 2889 BPF_F_XDP_DEV_BOUND_ONLY | 2890 BPF_F_TEST_REG_INVARIANTS | 2891 BPF_F_TOKEN_FD)) 2892 return -EINVAL; 2893 2894 bpf_prog_load_fixup_attach_type(attr); 2895 2896 if (attr->prog_flags & BPF_F_TOKEN_FD) { 2897 token = bpf_token_get_from_fd(attr->prog_token_fd); 2898 if (IS_ERR(token)) 2899 return PTR_ERR(token); 2900 /* if current token doesn't grant prog loading permissions, 2901 * then we can't use this token, so ignore it and rely on 2902 * system-wide capabilities checks 2903 */ 2904 if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || 2905 !bpf_token_allow_prog_type(token, attr->prog_type, 2906 attr->expected_attach_type)) { 2907 bpf_token_put(token); 2908 token = NULL; 2909 } 2910 } 2911 2912 bpf_cap = bpf_token_capable(token, CAP_BPF); 2913 err = -EPERM; 2914 2915 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2916 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2917 !bpf_cap) 2918 goto put_token; 2919 2920 /* Intent here is for unprivileged_bpf_disabled to block BPF program 2921 * creation for unprivileged users; other actions depend 2922 * on fd availability and access to bpffs, so are dependent on 2923 * object creation success. Even with unprivileged BPF disabled, 2924 * capability checks are still carried out for these 2925 * and other operations. 2926 */ 2927 if (sysctl_unprivileged_bpf_disabled && !bpf_cap) 2928 goto put_token; 2929 2930 if (attr->insn_cnt == 0 || 2931 attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2932 err = -E2BIG; 2933 goto put_token; 2934 } 2935 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2936 type != BPF_PROG_TYPE_CGROUP_SKB && 2937 !bpf_cap) 2938 goto put_token; 2939 2940 if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) 2941 goto put_token; 2942 if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) 2943 goto put_token; 2944 2945 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2946 * or btf, we need to check which one it is 2947 */ 2948 if (attr->attach_prog_fd) { 2949 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2950 if (IS_ERR(dst_prog)) { 2951 dst_prog = NULL; 2952 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2953 if (IS_ERR(attach_btf)) { 2954 err = -EINVAL; 2955 goto put_token; 2956 } 2957 if (!btf_is_kernel(attach_btf)) { 2958 /* attaching through specifying bpf_prog's BTF 2959 * objects directly might be supported eventually 2960 */ 2961 btf_put(attach_btf); 2962 err = -ENOTSUPP; 2963 goto put_token; 2964 } 2965 } 2966 } else if (attr->attach_btf_id) { 2967 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2968 attach_btf = bpf_get_btf_vmlinux(); 2969 if (IS_ERR(attach_btf)) { 2970 err = PTR_ERR(attach_btf); 2971 goto put_token; 2972 } 2973 if (!attach_btf) { 2974 err = -EINVAL; 2975 goto put_token; 2976 } 2977 btf_get(attach_btf); 2978 } 2979 2980 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2981 attach_btf, attr->attach_btf_id, 2982 dst_prog)) { 2983 if (dst_prog) 2984 bpf_prog_put(dst_prog); 2985 if (attach_btf) 2986 btf_put(attach_btf); 2987 err = -EINVAL; 2988 goto put_token; 2989 } 2990 2991 /* plain bpf_prog allocation */ 2992 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2993 if (!prog) { 2994 if (dst_prog) 2995 bpf_prog_put(dst_prog); 2996 if (attach_btf) 2997 btf_put(attach_btf); 2998 err = -EINVAL; 2999 goto put_token; 3000 } 3001 3002 prog->expected_attach_type = attr->expected_attach_type; 3003 prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); 3004 prog->aux->attach_btf = attach_btf; 3005 prog->aux->attach_btf_id = attr->attach_btf_id; 3006 prog->aux->dst_prog = dst_prog; 3007 prog->aux->dev_bound = !!attr->prog_ifindex; 3008 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 3009 3010 /* move token into prog->aux, reuse taken refcnt */ 3011 prog->aux->token = token; 3012 token = NULL; 3013 3014 prog->aux->user = get_current_user(); 3015 prog->len = attr->insn_cnt; 3016 3017 err = -EFAULT; 3018 if (copy_from_bpfptr(prog->insns, 3019 make_bpfptr(attr->insns, uattr.is_kernel), 3020 bpf_prog_insn_size(prog)) != 0) 3021 goto free_prog; 3022 /* copy eBPF program license from user space */ 3023 if (strncpy_from_bpfptr(license, 3024 make_bpfptr(attr->license, uattr.is_kernel), 3025 sizeof(license) - 1) < 0) 3026 goto free_prog; 3027 license[sizeof(license) - 1] = 0; 3028 3029 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 3030 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 3031 3032 if (attr->signature) { 3033 err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); 3034 if (err) 3035 goto free_prog; 3036 } 3037 3038 prog->orig_prog = NULL; 3039 prog->jited = 0; 3040 3041 atomic64_set(&prog->aux->refcnt, 1); 3042 3043 if (bpf_prog_is_dev_bound(prog->aux)) { 3044 err = bpf_prog_dev_bound_init(prog, attr); 3045 if (err) 3046 goto free_prog; 3047 } 3048 3049 if (type == BPF_PROG_TYPE_EXT && dst_prog && 3050 bpf_prog_is_dev_bound(dst_prog->aux)) { 3051 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 3052 if (err) 3053 goto free_prog; 3054 } 3055 3056 /* 3057 * Bookkeeping for managing the program attachment chain. 3058 * 3059 * It might be tempting to set attach_tracing_prog flag at the attachment 3060 * time, but this will not prevent from loading bunch of tracing prog 3061 * first, then attach them one to another. 3062 * 3063 * The flag attach_tracing_prog is set for the whole program lifecycle, and 3064 * doesn't have to be cleared in bpf_tracing_link_release, since tracing 3065 * programs cannot change attachment target. 3066 */ 3067 if (type == BPF_PROG_TYPE_TRACING && dst_prog && 3068 dst_prog->type == BPF_PROG_TYPE_TRACING) { 3069 prog->aux->attach_tracing_prog = true; 3070 } 3071 3072 /* find program type: socket_filter vs tracing_filter */ 3073 err = find_prog_type(type, prog); 3074 if (err < 0) 3075 goto free_prog; 3076 3077 prog->aux->load_time = ktime_get_boottime_ns(); 3078 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 3079 sizeof(attr->prog_name)); 3080 if (err < 0) 3081 goto free_prog; 3082 3083 err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 3084 if (err) 3085 goto free_prog_sec; 3086 3087 /* run eBPF verifier */ 3088 err = bpf_check(&prog, attr, uattr, uattr_size); 3089 if (err < 0) 3090 goto free_used_maps; 3091 3092 prog = bpf_prog_select_runtime(prog, &err); 3093 if (err < 0) 3094 goto free_used_maps; 3095 3096 err = bpf_prog_mark_insn_arrays_ready(prog); 3097 if (err < 0) 3098 goto free_used_maps; 3099 3100 err = bpf_prog_alloc_id(prog); 3101 if (err) 3102 goto free_used_maps; 3103 3104 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 3105 * effectively publicly exposed. However, retrieving via 3106 * bpf_prog_get_fd_by_id() will take another reference, 3107 * therefore it cannot be gone underneath us. 3108 * 3109 * Only for the time /after/ successful bpf_prog_new_fd() 3110 * and before returning to userspace, we might just hold 3111 * one reference and any parallel close on that fd could 3112 * rip everything out. Hence, below notifications must 3113 * happen before bpf_prog_new_fd(). 3114 * 3115 * Also, any failure handling from this point onwards must 3116 * be using bpf_prog_put() given the program is exposed. 3117 */ 3118 bpf_prog_kallsyms_add(prog); 3119 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 3120 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 3121 3122 err = bpf_prog_new_fd(prog); 3123 if (err < 0) 3124 bpf_prog_put(prog); 3125 return err; 3126 3127 free_used_maps: 3128 /* In case we have subprogs, we need to wait for a grace 3129 * period before we can tear down JIT memory since symbols 3130 * are already exposed under kallsyms. 3131 */ 3132 __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); 3133 return err; 3134 3135 free_prog_sec: 3136 security_bpf_prog_free(prog); 3137 free_prog: 3138 free_uid(prog->aux->user); 3139 if (prog->aux->attach_btf) 3140 btf_put(prog->aux->attach_btf); 3141 bpf_prog_free(prog); 3142 put_token: 3143 bpf_token_put(token); 3144 return err; 3145 } 3146 3147 #define BPF_OBJ_LAST_FIELD path_fd 3148 3149 static int bpf_obj_pin(const union bpf_attr *attr) 3150 { 3151 int path_fd; 3152 3153 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 3154 return -EINVAL; 3155 3156 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3157 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3158 return -EINVAL; 3159 3160 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3161 return bpf_obj_pin_user(attr->bpf_fd, path_fd, 3162 u64_to_user_ptr(attr->pathname)); 3163 } 3164 3165 static int bpf_obj_get(const union bpf_attr *attr) 3166 { 3167 int path_fd; 3168 3169 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 3170 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 3171 return -EINVAL; 3172 3173 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3174 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3175 return -EINVAL; 3176 3177 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3178 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 3179 attr->file_flags); 3180 } 3181 3182 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has 3183 * "sleepable" semantics, which normally would mean that BPF link's attach 3184 * hook can dereference link or link's underlying program for some time after 3185 * detachment due to RCU Tasks Trace-based lifetime protection scheme. 3186 * BPF program itself can be non-sleepable, yet, because it's transitively 3187 * reachable through BPF link, its freeing has to be delayed until after RCU 3188 * Tasks Trace GP. 3189 */ 3190 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, 3191 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3192 enum bpf_attach_type attach_type, bool sleepable) 3193 { 3194 WARN_ON(ops->dealloc && ops->dealloc_deferred); 3195 atomic64_set(&link->refcnt, 1); 3196 link->type = type; 3197 link->sleepable = sleepable; 3198 link->id = 0; 3199 link->ops = ops; 3200 link->prog = prog; 3201 link->attach_type = attach_type; 3202 } 3203 3204 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 3205 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3206 enum bpf_attach_type attach_type) 3207 { 3208 bpf_link_init_sleepable(link, type, ops, prog, attach_type, false); 3209 } 3210 3211 static void bpf_link_free_id(int id) 3212 { 3213 if (!id) 3214 return; 3215 3216 spin_lock_bh(&link_idr_lock); 3217 idr_remove(&link_idr, id); 3218 spin_unlock_bh(&link_idr_lock); 3219 } 3220 3221 /* Clean up bpf_link and corresponding anon_inode file and FD. After 3222 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 3223 * anon_inode's release() call. This helper marks bpf_link as 3224 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 3225 * is not decremented, it's the responsibility of a calling code that failed 3226 * to complete bpf_link initialization. 3227 * This helper eventually calls link's dealloc callback, but does not call 3228 * link's release callback. 3229 */ 3230 void bpf_link_cleanup(struct bpf_link_primer *primer) 3231 { 3232 primer->link->prog = NULL; 3233 bpf_link_free_id(primer->id); 3234 fput(primer->file); 3235 put_unused_fd(primer->fd); 3236 } 3237 3238 void bpf_link_inc(struct bpf_link *link) 3239 { 3240 atomic64_inc(&link->refcnt); 3241 } 3242 3243 static void bpf_link_dealloc(struct bpf_link *link) 3244 { 3245 /* now that we know that bpf_link itself can't be reached, put underlying BPF program */ 3246 if (link->prog) 3247 bpf_prog_put(link->prog); 3248 3249 /* free bpf_link and its containing memory */ 3250 if (link->ops->dealloc_deferred) 3251 link->ops->dealloc_deferred(link); 3252 else 3253 link->ops->dealloc(link); 3254 } 3255 3256 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) 3257 { 3258 struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); 3259 3260 bpf_link_dealloc(link); 3261 } 3262 3263 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3264 { 3265 if (rcu_trace_implies_rcu_gp()) 3266 bpf_link_defer_dealloc_rcu_gp(rcu); 3267 else 3268 call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3269 } 3270 3271 /* bpf_link_free is guaranteed to be called from process context */ 3272 static void bpf_link_free(struct bpf_link *link) 3273 { 3274 const struct bpf_link_ops *ops = link->ops; 3275 3276 bpf_link_free_id(link->id); 3277 /* detach BPF program, clean up used resources */ 3278 if (link->prog) 3279 ops->release(link); 3280 if (ops->dealloc_deferred) { 3281 /* Schedule BPF link deallocation, which will only then 3282 * trigger putting BPF program refcount. 3283 * If underlying BPF program is sleepable or BPF link's target 3284 * attach hookpoint is sleepable or otherwise requires RCU GPs 3285 * to ensure link and its underlying BPF program is not 3286 * reachable anymore, we need to first wait for RCU tasks 3287 * trace sync, and then go through "classic" RCU grace period 3288 */ 3289 if (link->sleepable || (link->prog && link->prog->sleepable)) 3290 call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3291 else 3292 call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3293 } else if (ops->dealloc) { 3294 bpf_link_dealloc(link); 3295 } 3296 } 3297 3298 static void bpf_link_put_deferred(struct work_struct *work) 3299 { 3300 struct bpf_link *link = container_of(work, struct bpf_link, work); 3301 3302 bpf_link_free(link); 3303 } 3304 3305 /* bpf_link_put might be called from atomic context. It needs to be called 3306 * from sleepable context in order to acquire sleeping locks during the process. 3307 */ 3308 void bpf_link_put(struct bpf_link *link) 3309 { 3310 if (!atomic64_dec_and_test(&link->refcnt)) 3311 return; 3312 3313 INIT_WORK(&link->work, bpf_link_put_deferred); 3314 schedule_work(&link->work); 3315 } 3316 EXPORT_SYMBOL(bpf_link_put); 3317 3318 static void bpf_link_put_direct(struct bpf_link *link) 3319 { 3320 if (!atomic64_dec_and_test(&link->refcnt)) 3321 return; 3322 bpf_link_free(link); 3323 } 3324 3325 static int bpf_link_release(struct inode *inode, struct file *filp) 3326 { 3327 struct bpf_link *link = filp->private_data; 3328 3329 bpf_link_put_direct(link); 3330 return 0; 3331 } 3332 3333 #ifdef CONFIG_PROC_FS 3334 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 3335 #define BPF_MAP_TYPE(_id, _ops) 3336 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 3337 static const char *bpf_link_type_strs[] = { 3338 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 3339 #include <linux/bpf_types.h> 3340 }; 3341 #undef BPF_PROG_TYPE 3342 #undef BPF_MAP_TYPE 3343 #undef BPF_LINK_TYPE 3344 3345 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 3346 { 3347 const struct bpf_link *link = filp->private_data; 3348 const struct bpf_prog *prog = link->prog; 3349 enum bpf_link_type type = link->type; 3350 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 3351 3352 if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { 3353 if (link->type == BPF_LINK_TYPE_KPROBE_MULTI) 3354 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? 3355 "kretprobe_multi" : "kprobe_multi"); 3356 else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) 3357 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? 3358 "uretprobe_multi" : "uprobe_multi"); 3359 else 3360 seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); 3361 } else { 3362 WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); 3363 seq_printf(m, "link_type:\t<%u>\n", type); 3364 } 3365 seq_printf(m, "link_id:\t%u\n", link->id); 3366 3367 if (prog) { 3368 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 3369 seq_printf(m, 3370 "prog_tag:\t%s\n" 3371 "prog_id:\t%u\n", 3372 prog_tag, 3373 prog->aux->id); 3374 } 3375 if (link->ops->show_fdinfo) 3376 link->ops->show_fdinfo(link, m); 3377 } 3378 #endif 3379 3380 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) 3381 { 3382 struct bpf_link *link = file->private_data; 3383 3384 return link->ops->poll(file, pts); 3385 } 3386 3387 static const struct file_operations bpf_link_fops = { 3388 #ifdef CONFIG_PROC_FS 3389 .show_fdinfo = bpf_link_show_fdinfo, 3390 #endif 3391 .release = bpf_link_release, 3392 .read = bpf_dummy_read, 3393 .write = bpf_dummy_write, 3394 }; 3395 3396 static const struct file_operations bpf_link_fops_poll = { 3397 #ifdef CONFIG_PROC_FS 3398 .show_fdinfo = bpf_link_show_fdinfo, 3399 #endif 3400 .release = bpf_link_release, 3401 .read = bpf_dummy_read, 3402 .write = bpf_dummy_write, 3403 .poll = bpf_link_poll, 3404 }; 3405 3406 static int bpf_link_alloc_id(struct bpf_link *link) 3407 { 3408 int id; 3409 3410 idr_preload(GFP_KERNEL); 3411 spin_lock_bh(&link_idr_lock); 3412 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 3413 spin_unlock_bh(&link_idr_lock); 3414 idr_preload_end(); 3415 3416 return id; 3417 } 3418 3419 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 3420 * reserving unused FD and allocating ID from link_idr. This is to be paired 3421 * with bpf_link_settle() to install FD and ID and expose bpf_link to 3422 * user-space, if bpf_link is successfully attached. If not, bpf_link and 3423 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 3424 * transient state is passed around in struct bpf_link_primer. 3425 * This is preferred way to create and initialize bpf_link, especially when 3426 * there are complicated and expensive operations in between creating bpf_link 3427 * itself and attaching it to BPF hook. By using bpf_link_prime() and 3428 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 3429 * expensive (and potentially failing) roll back operations in a rare case 3430 * that file, FD, or ID can't be allocated. 3431 */ 3432 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 3433 { 3434 struct file *file; 3435 int fd, id; 3436 3437 fd = get_unused_fd_flags(O_CLOEXEC); 3438 if (fd < 0) 3439 return fd; 3440 3441 3442 id = bpf_link_alloc_id(link); 3443 if (id < 0) { 3444 put_unused_fd(fd); 3445 return id; 3446 } 3447 3448 file = anon_inode_getfile("bpf_link", 3449 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3450 link, O_CLOEXEC); 3451 if (IS_ERR(file)) { 3452 bpf_link_free_id(id); 3453 put_unused_fd(fd); 3454 return PTR_ERR(file); 3455 } 3456 3457 primer->link = link; 3458 primer->file = file; 3459 primer->fd = fd; 3460 primer->id = id; 3461 return 0; 3462 } 3463 3464 int bpf_link_settle(struct bpf_link_primer *primer) 3465 { 3466 /* make bpf_link fetchable by ID */ 3467 spin_lock_bh(&link_idr_lock); 3468 primer->link->id = primer->id; 3469 spin_unlock_bh(&link_idr_lock); 3470 /* make bpf_link fetchable by FD */ 3471 fd_install(primer->fd, primer->file); 3472 /* pass through installed FD */ 3473 return primer->fd; 3474 } 3475 3476 int bpf_link_new_fd(struct bpf_link *link) 3477 { 3478 return anon_inode_getfd("bpf-link", 3479 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3480 link, O_CLOEXEC); 3481 } 3482 3483 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 3484 { 3485 CLASS(fd, f)(ufd); 3486 struct bpf_link *link; 3487 3488 if (fd_empty(f)) 3489 return ERR_PTR(-EBADF); 3490 if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) 3491 return ERR_PTR(-EINVAL); 3492 3493 link = fd_file(f)->private_data; 3494 bpf_link_inc(link); 3495 return link; 3496 } 3497 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); 3498 3499 static void bpf_tracing_link_release(struct bpf_link *link) 3500 { 3501 struct bpf_tracing_link *tr_link = 3502 container_of(link, struct bpf_tracing_link, link.link); 3503 3504 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3505 tr_link->trampoline, 3506 tr_link->tgt_prog)); 3507 3508 bpf_trampoline_put(tr_link->trampoline); 3509 3510 /* tgt_prog is NULL if target is a kernel function */ 3511 if (tr_link->tgt_prog) 3512 bpf_prog_put(tr_link->tgt_prog); 3513 } 3514 3515 static void bpf_tracing_link_dealloc(struct bpf_link *link) 3516 { 3517 struct bpf_tracing_link *tr_link = 3518 container_of(link, struct bpf_tracing_link, link.link); 3519 3520 kfree(tr_link); 3521 } 3522 3523 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 3524 struct seq_file *seq) 3525 { 3526 struct bpf_tracing_link *tr_link = 3527 container_of(link, struct bpf_tracing_link, link.link); 3528 u32 target_btf_id, target_obj_id; 3529 3530 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3531 &target_obj_id, &target_btf_id); 3532 seq_printf(seq, 3533 "attach_type:\t%d\n" 3534 "target_obj_id:\t%u\n" 3535 "target_btf_id:\t%u\n" 3536 "cookie:\t%llu\n", 3537 link->attach_type, 3538 target_obj_id, 3539 target_btf_id, 3540 tr_link->link.cookie); 3541 } 3542 3543 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 3544 struct bpf_link_info *info) 3545 { 3546 struct bpf_tracing_link *tr_link = 3547 container_of(link, struct bpf_tracing_link, link.link); 3548 3549 info->tracing.attach_type = link->attach_type; 3550 info->tracing.cookie = tr_link->link.cookie; 3551 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3552 &info->tracing.target_obj_id, 3553 &info->tracing.target_btf_id); 3554 3555 return 0; 3556 } 3557 3558 static const struct bpf_link_ops bpf_tracing_link_lops = { 3559 .release = bpf_tracing_link_release, 3560 .dealloc = bpf_tracing_link_dealloc, 3561 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3562 .fill_link_info = bpf_tracing_link_fill_link_info, 3563 }; 3564 3565 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3566 int tgt_prog_fd, 3567 u32 btf_id, 3568 u64 bpf_cookie, 3569 enum bpf_attach_type attach_type) 3570 { 3571 struct bpf_link_primer link_primer; 3572 struct bpf_prog *tgt_prog = NULL; 3573 struct bpf_trampoline *tr = NULL; 3574 struct bpf_tracing_link *link; 3575 u64 key = 0; 3576 int err; 3577 3578 switch (prog->type) { 3579 case BPF_PROG_TYPE_TRACING: 3580 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3581 prog->expected_attach_type != BPF_TRACE_FEXIT && 3582 prog->expected_attach_type != BPF_TRACE_FSESSION && 3583 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3584 err = -EINVAL; 3585 goto out_put_prog; 3586 } 3587 break; 3588 case BPF_PROG_TYPE_EXT: 3589 if (prog->expected_attach_type != 0) { 3590 err = -EINVAL; 3591 goto out_put_prog; 3592 } 3593 break; 3594 case BPF_PROG_TYPE_LSM: 3595 if (prog->expected_attach_type != BPF_LSM_MAC) { 3596 err = -EINVAL; 3597 goto out_put_prog; 3598 } 3599 break; 3600 default: 3601 err = -EINVAL; 3602 goto out_put_prog; 3603 } 3604 3605 if (!!tgt_prog_fd != !!btf_id) { 3606 err = -EINVAL; 3607 goto out_put_prog; 3608 } 3609 3610 if (tgt_prog_fd) { 3611 /* 3612 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this 3613 * part would be changed to implement the same for 3614 * BPF_PROG_TYPE_TRACING, do not forget to update the way how 3615 * attach_tracing_prog flag is set. 3616 */ 3617 if (prog->type != BPF_PROG_TYPE_EXT) { 3618 err = -EINVAL; 3619 goto out_put_prog; 3620 } 3621 3622 tgt_prog = bpf_prog_get(tgt_prog_fd); 3623 if (IS_ERR(tgt_prog)) { 3624 err = PTR_ERR(tgt_prog); 3625 tgt_prog = NULL; 3626 goto out_put_prog; 3627 } 3628 3629 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3630 } 3631 3632 if (prog->expected_attach_type == BPF_TRACE_FSESSION) { 3633 struct bpf_fsession_link *fslink; 3634 3635 fslink = kzalloc(sizeof(*fslink), GFP_USER); 3636 if (fslink) { 3637 bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING, 3638 &bpf_tracing_link_lops, prog, attach_type); 3639 fslink->fexit.cookie = bpf_cookie; 3640 link = &fslink->link; 3641 } else { 3642 link = NULL; 3643 } 3644 } else { 3645 link = kzalloc(sizeof(*link), GFP_USER); 3646 } 3647 if (!link) { 3648 err = -ENOMEM; 3649 goto out_put_prog; 3650 } 3651 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3652 &bpf_tracing_link_lops, prog, attach_type); 3653 3654 link->link.cookie = bpf_cookie; 3655 3656 mutex_lock(&prog->aux->dst_mutex); 3657 3658 /* There are a few possible cases here: 3659 * 3660 * - if prog->aux->dst_trampoline is set, the program was just loaded 3661 * and not yet attached to anything, so we can use the values stored 3662 * in prog->aux 3663 * 3664 * - if prog->aux->dst_trampoline is NULL, the program has already been 3665 * attached to a target and its initial target was cleared (below) 3666 * 3667 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3668 * target_btf_id using the link_create API. 3669 * 3670 * - if tgt_prog == NULL when this function was called using the old 3671 * raw_tracepoint_open API, and we need a target from prog->aux 3672 * 3673 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3674 * was detached and is going for re-attachment. 3675 * 3676 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf 3677 * are NULL, then program was already attached and user did not provide 3678 * tgt_prog_fd so we have no way to find out or create trampoline 3679 */ 3680 if (!prog->aux->dst_trampoline && !tgt_prog) { 3681 /* 3682 * Allow re-attach for TRACING and LSM programs. If it's 3683 * currently linked, bpf_trampoline_link_prog will fail. 3684 * EXT programs need to specify tgt_prog_fd, so they 3685 * re-attach in separate code path. 3686 */ 3687 if (prog->type != BPF_PROG_TYPE_TRACING && 3688 prog->type != BPF_PROG_TYPE_LSM) { 3689 err = -EINVAL; 3690 goto out_unlock; 3691 } 3692 /* We can allow re-attach only if we have valid attach_btf. */ 3693 if (!prog->aux->attach_btf) { 3694 err = -EINVAL; 3695 goto out_unlock; 3696 } 3697 btf_id = prog->aux->attach_btf_id; 3698 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3699 } 3700 3701 if (!prog->aux->dst_trampoline || 3702 (key && key != prog->aux->dst_trampoline->key)) { 3703 /* If there is no saved target, or the specified target is 3704 * different from the destination specified at load time, we 3705 * need a new trampoline and a check for compatibility 3706 */ 3707 struct bpf_attach_target_info tgt_info = {}; 3708 3709 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3710 &tgt_info); 3711 if (err) 3712 goto out_unlock; 3713 3714 if (tgt_info.tgt_mod) { 3715 module_put(prog->aux->mod); 3716 prog->aux->mod = tgt_info.tgt_mod; 3717 } 3718 3719 tr = bpf_trampoline_get(key, &tgt_info); 3720 if (!tr) { 3721 err = -ENOMEM; 3722 goto out_unlock; 3723 } 3724 } else { 3725 /* The caller didn't specify a target, or the target was the 3726 * same as the destination supplied during program load. This 3727 * means we can reuse the trampoline and reference from program 3728 * load time, and there is no need to allocate a new one. This 3729 * can only happen once for any program, as the saved values in 3730 * prog->aux are cleared below. 3731 */ 3732 tr = prog->aux->dst_trampoline; 3733 tgt_prog = prog->aux->dst_prog; 3734 } 3735 3736 err = bpf_link_prime(&link->link.link, &link_primer); 3737 if (err) 3738 goto out_unlock; 3739 3740 err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3741 if (err) { 3742 bpf_link_cleanup(&link_primer); 3743 link = NULL; 3744 goto out_unlock; 3745 } 3746 3747 link->tgt_prog = tgt_prog; 3748 link->trampoline = tr; 3749 3750 /* Always clear the trampoline and target prog from prog->aux to make 3751 * sure the original attach destination is not kept alive after a 3752 * program is (re-)attached to another target. 3753 */ 3754 if (prog->aux->dst_prog && 3755 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3756 /* got extra prog ref from syscall, or attaching to different prog */ 3757 bpf_prog_put(prog->aux->dst_prog); 3758 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3759 /* we allocated a new trampoline, so free the old one */ 3760 bpf_trampoline_put(prog->aux->dst_trampoline); 3761 3762 prog->aux->dst_prog = NULL; 3763 prog->aux->dst_trampoline = NULL; 3764 mutex_unlock(&prog->aux->dst_mutex); 3765 3766 return bpf_link_settle(&link_primer); 3767 out_unlock: 3768 if (tr && tr != prog->aux->dst_trampoline) 3769 bpf_trampoline_put(tr); 3770 mutex_unlock(&prog->aux->dst_mutex); 3771 kfree(link); 3772 out_put_prog: 3773 if (tgt_prog_fd && tgt_prog) 3774 bpf_prog_put(tgt_prog); 3775 return err; 3776 } 3777 3778 static void bpf_raw_tp_link_release(struct bpf_link *link) 3779 { 3780 struct bpf_raw_tp_link *raw_tp = 3781 container_of(link, struct bpf_raw_tp_link, link); 3782 3783 bpf_probe_unregister(raw_tp->btp, raw_tp); 3784 bpf_put_raw_tracepoint(raw_tp->btp); 3785 } 3786 3787 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3788 { 3789 struct bpf_raw_tp_link *raw_tp = 3790 container_of(link, struct bpf_raw_tp_link, link); 3791 3792 kfree(raw_tp); 3793 } 3794 3795 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3796 struct seq_file *seq) 3797 { 3798 struct bpf_raw_tp_link *raw_tp_link = 3799 container_of(link, struct bpf_raw_tp_link, link); 3800 3801 seq_printf(seq, 3802 "tp_name:\t%s\n" 3803 "cookie:\t%llu\n", 3804 raw_tp_link->btp->tp->name, 3805 raw_tp_link->cookie); 3806 } 3807 3808 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, 3809 u32 len) 3810 { 3811 if (ulen >= len + 1) { 3812 if (copy_to_user(ubuf, buf, len + 1)) 3813 return -EFAULT; 3814 } else { 3815 char zero = '\0'; 3816 3817 if (copy_to_user(ubuf, buf, ulen - 1)) 3818 return -EFAULT; 3819 if (put_user(zero, ubuf + ulen - 1)) 3820 return -EFAULT; 3821 return -ENOSPC; 3822 } 3823 3824 return 0; 3825 } 3826 3827 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3828 struct bpf_link_info *info) 3829 { 3830 struct bpf_raw_tp_link *raw_tp_link = 3831 container_of(link, struct bpf_raw_tp_link, link); 3832 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3833 const char *tp_name = raw_tp_link->btp->tp->name; 3834 u32 ulen = info->raw_tracepoint.tp_name_len; 3835 size_t tp_len = strlen(tp_name); 3836 3837 if (!ulen ^ !ubuf) 3838 return -EINVAL; 3839 3840 info->raw_tracepoint.tp_name_len = tp_len + 1; 3841 info->raw_tracepoint.cookie = raw_tp_link->cookie; 3842 3843 if (!ubuf) 3844 return 0; 3845 3846 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len); 3847 } 3848 3849 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3850 .release = bpf_raw_tp_link_release, 3851 .dealloc_deferred = bpf_raw_tp_link_dealloc, 3852 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3853 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3854 }; 3855 3856 #ifdef CONFIG_PERF_EVENTS 3857 struct bpf_perf_link { 3858 struct bpf_link link; 3859 struct file *perf_file; 3860 }; 3861 3862 static void bpf_perf_link_release(struct bpf_link *link) 3863 { 3864 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3865 struct perf_event *event = perf_link->perf_file->private_data; 3866 3867 perf_event_free_bpf_prog(event); 3868 fput(perf_link->perf_file); 3869 } 3870 3871 static void bpf_perf_link_dealloc(struct bpf_link *link) 3872 { 3873 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3874 3875 kfree(perf_link); 3876 } 3877 3878 static int bpf_perf_link_fill_common(const struct perf_event *event, 3879 char __user *uname, u32 *ulenp, 3880 u64 *probe_offset, u64 *probe_addr, 3881 u32 *fd_type, unsigned long *missed) 3882 { 3883 const char *buf; 3884 u32 prog_id, ulen; 3885 size_t len; 3886 int err; 3887 3888 ulen = *ulenp; 3889 if (!ulen ^ !uname) 3890 return -EINVAL; 3891 3892 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, 3893 probe_offset, probe_addr, missed); 3894 if (err) 3895 return err; 3896 3897 if (buf) { 3898 len = strlen(buf); 3899 *ulenp = len + 1; 3900 } else { 3901 *ulenp = 1; 3902 } 3903 if (!uname) 3904 return 0; 3905 3906 if (buf) { 3907 err = bpf_copy_to_user(uname, buf, ulen, len); 3908 if (err) 3909 return err; 3910 } else { 3911 char zero = '\0'; 3912 3913 if (put_user(zero, uname)) 3914 return -EFAULT; 3915 } 3916 return 0; 3917 } 3918 3919 #ifdef CONFIG_KPROBE_EVENTS 3920 static int bpf_perf_link_fill_kprobe(const struct perf_event *event, 3921 struct bpf_link_info *info) 3922 { 3923 unsigned long missed; 3924 char __user *uname; 3925 u64 addr, offset; 3926 u32 ulen, type; 3927 int err; 3928 3929 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); 3930 ulen = info->perf_event.kprobe.name_len; 3931 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, 3932 &type, &missed); 3933 if (err) 3934 return err; 3935 if (type == BPF_FD_TYPE_KRETPROBE) 3936 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; 3937 else 3938 info->perf_event.type = BPF_PERF_EVENT_KPROBE; 3939 info->perf_event.kprobe.name_len = ulen; 3940 info->perf_event.kprobe.offset = offset; 3941 info->perf_event.kprobe.missed = missed; 3942 if (!kallsyms_show_value(current_cred())) 3943 addr = 0; 3944 info->perf_event.kprobe.addr = addr; 3945 info->perf_event.kprobe.cookie = event->bpf_cookie; 3946 return 0; 3947 } 3948 3949 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, 3950 struct seq_file *seq) 3951 { 3952 const char *name; 3953 int err; 3954 u32 prog_id, type; 3955 u64 offset, addr; 3956 unsigned long missed; 3957 3958 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3959 &offset, &addr, &missed); 3960 if (err) 3961 return; 3962 3963 seq_printf(seq, 3964 "name:\t%s\n" 3965 "offset:\t%#llx\n" 3966 "missed:\t%lu\n" 3967 "addr:\t%#llx\n" 3968 "event_type:\t%s\n" 3969 "cookie:\t%llu\n", 3970 name, offset, missed, addr, 3971 type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", 3972 event->bpf_cookie); 3973 } 3974 #endif 3975 3976 #ifdef CONFIG_UPROBE_EVENTS 3977 static int bpf_perf_link_fill_uprobe(const struct perf_event *event, 3978 struct bpf_link_info *info) 3979 { 3980 u64 ref_ctr_offset, offset; 3981 char __user *uname; 3982 u32 ulen, type; 3983 int err; 3984 3985 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); 3986 ulen = info->perf_event.uprobe.name_len; 3987 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, 3988 &type, NULL); 3989 if (err) 3990 return err; 3991 3992 if (type == BPF_FD_TYPE_URETPROBE) 3993 info->perf_event.type = BPF_PERF_EVENT_URETPROBE; 3994 else 3995 info->perf_event.type = BPF_PERF_EVENT_UPROBE; 3996 info->perf_event.uprobe.name_len = ulen; 3997 info->perf_event.uprobe.offset = offset; 3998 info->perf_event.uprobe.cookie = event->bpf_cookie; 3999 info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; 4000 return 0; 4001 } 4002 4003 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, 4004 struct seq_file *seq) 4005 { 4006 const char *name; 4007 int err; 4008 u32 prog_id, type; 4009 u64 offset, ref_ctr_offset; 4010 unsigned long missed; 4011 4012 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 4013 &offset, &ref_ctr_offset, &missed); 4014 if (err) 4015 return; 4016 4017 seq_printf(seq, 4018 "name:\t%s\n" 4019 "offset:\t%#llx\n" 4020 "ref_ctr_offset:\t%#llx\n" 4021 "event_type:\t%s\n" 4022 "cookie:\t%llu\n", 4023 name, offset, ref_ctr_offset, 4024 type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", 4025 event->bpf_cookie); 4026 } 4027 #endif 4028 4029 static int bpf_perf_link_fill_probe(const struct perf_event *event, 4030 struct bpf_link_info *info) 4031 { 4032 #ifdef CONFIG_KPROBE_EVENTS 4033 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4034 return bpf_perf_link_fill_kprobe(event, info); 4035 #endif 4036 #ifdef CONFIG_UPROBE_EVENTS 4037 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4038 return bpf_perf_link_fill_uprobe(event, info); 4039 #endif 4040 return -EOPNOTSUPP; 4041 } 4042 4043 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, 4044 struct bpf_link_info *info) 4045 { 4046 char __user *uname; 4047 u32 ulen; 4048 int err; 4049 4050 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); 4051 ulen = info->perf_event.tracepoint.name_len; 4052 err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); 4053 if (err) 4054 return err; 4055 4056 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; 4057 info->perf_event.tracepoint.name_len = ulen; 4058 info->perf_event.tracepoint.cookie = event->bpf_cookie; 4059 return 0; 4060 } 4061 4062 static int bpf_perf_link_fill_perf_event(const struct perf_event *event, 4063 struct bpf_link_info *info) 4064 { 4065 info->perf_event.event.type = event->attr.type; 4066 info->perf_event.event.config = event->attr.config; 4067 info->perf_event.event.cookie = event->bpf_cookie; 4068 info->perf_event.type = BPF_PERF_EVENT_EVENT; 4069 return 0; 4070 } 4071 4072 static int bpf_perf_link_fill_link_info(const struct bpf_link *link, 4073 struct bpf_link_info *info) 4074 { 4075 struct bpf_perf_link *perf_link; 4076 const struct perf_event *event; 4077 4078 perf_link = container_of(link, struct bpf_perf_link, link); 4079 event = perf_get_event(perf_link->perf_file); 4080 if (IS_ERR(event)) 4081 return PTR_ERR(event); 4082 4083 switch (event->prog->type) { 4084 case BPF_PROG_TYPE_PERF_EVENT: 4085 return bpf_perf_link_fill_perf_event(event, info); 4086 case BPF_PROG_TYPE_TRACEPOINT: 4087 return bpf_perf_link_fill_tracepoint(event, info); 4088 case BPF_PROG_TYPE_KPROBE: 4089 return bpf_perf_link_fill_probe(event, info); 4090 default: 4091 return -EOPNOTSUPP; 4092 } 4093 } 4094 4095 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, 4096 struct seq_file *seq) 4097 { 4098 seq_printf(seq, 4099 "type:\t%u\n" 4100 "config:\t%llu\n" 4101 "event_type:\t%s\n" 4102 "cookie:\t%llu\n", 4103 event->attr.type, event->attr.config, 4104 "event", event->bpf_cookie); 4105 } 4106 4107 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, 4108 struct seq_file *seq) 4109 { 4110 int err; 4111 const char *name; 4112 u32 prog_id; 4113 4114 err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, 4115 NULL, NULL); 4116 if (err) 4117 return; 4118 4119 seq_printf(seq, 4120 "tp_name:\t%s\n" 4121 "event_type:\t%s\n" 4122 "cookie:\t%llu\n", 4123 name, "tracepoint", event->bpf_cookie); 4124 } 4125 4126 static void bpf_probe_link_show_fdinfo(const struct perf_event *event, 4127 struct seq_file *seq) 4128 { 4129 #ifdef CONFIG_KPROBE_EVENTS 4130 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4131 return bpf_perf_link_fdinfo_kprobe(event, seq); 4132 #endif 4133 4134 #ifdef CONFIG_UPROBE_EVENTS 4135 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4136 return bpf_perf_link_fdinfo_uprobe(event, seq); 4137 #endif 4138 } 4139 4140 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, 4141 struct seq_file *seq) 4142 { 4143 struct bpf_perf_link *perf_link; 4144 const struct perf_event *event; 4145 4146 perf_link = container_of(link, struct bpf_perf_link, link); 4147 event = perf_get_event(perf_link->perf_file); 4148 if (IS_ERR(event)) 4149 return; 4150 4151 switch (event->prog->type) { 4152 case BPF_PROG_TYPE_PERF_EVENT: 4153 return bpf_perf_event_link_show_fdinfo(event, seq); 4154 case BPF_PROG_TYPE_TRACEPOINT: 4155 return bpf_tracepoint_link_show_fdinfo(event, seq); 4156 case BPF_PROG_TYPE_KPROBE: 4157 return bpf_probe_link_show_fdinfo(event, seq); 4158 default: 4159 return; 4160 } 4161 } 4162 4163 static const struct bpf_link_ops bpf_perf_link_lops = { 4164 .release = bpf_perf_link_release, 4165 .dealloc = bpf_perf_link_dealloc, 4166 .fill_link_info = bpf_perf_link_fill_link_info, 4167 .show_fdinfo = bpf_perf_link_show_fdinfo, 4168 }; 4169 4170 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4171 { 4172 struct bpf_link_primer link_primer; 4173 struct bpf_perf_link *link; 4174 struct perf_event *event; 4175 struct file *perf_file; 4176 int err; 4177 4178 if (attr->link_create.flags) 4179 return -EINVAL; 4180 4181 perf_file = perf_event_get(attr->link_create.target_fd); 4182 if (IS_ERR(perf_file)) 4183 return PTR_ERR(perf_file); 4184 4185 link = kzalloc(sizeof(*link), GFP_USER); 4186 if (!link) { 4187 err = -ENOMEM; 4188 goto out_put_file; 4189 } 4190 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog, 4191 attr->link_create.attach_type); 4192 link->perf_file = perf_file; 4193 4194 err = bpf_link_prime(&link->link, &link_primer); 4195 if (err) { 4196 kfree(link); 4197 goto out_put_file; 4198 } 4199 4200 event = perf_file->private_data; 4201 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 4202 if (err) { 4203 bpf_link_cleanup(&link_primer); 4204 goto out_put_file; 4205 } 4206 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 4207 bpf_prog_inc(prog); 4208 4209 return bpf_link_settle(&link_primer); 4210 4211 out_put_file: 4212 fput(perf_file); 4213 return err; 4214 } 4215 #else 4216 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4217 { 4218 return -EOPNOTSUPP; 4219 } 4220 #endif /* CONFIG_PERF_EVENTS */ 4221 4222 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 4223 const char __user *user_tp_name, u64 cookie, 4224 enum bpf_attach_type attach_type) 4225 { 4226 struct bpf_link_primer link_primer; 4227 struct bpf_raw_tp_link *link; 4228 struct bpf_raw_event_map *btp; 4229 const char *tp_name; 4230 char buf[128]; 4231 int err; 4232 4233 switch (prog->type) { 4234 case BPF_PROG_TYPE_TRACING: 4235 case BPF_PROG_TYPE_EXT: 4236 case BPF_PROG_TYPE_LSM: 4237 if (user_tp_name) 4238 /* The attach point for this category of programs 4239 * should be specified via btf_id during program load. 4240 */ 4241 return -EINVAL; 4242 if (prog->type == BPF_PROG_TYPE_TRACING && 4243 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 4244 tp_name = prog->aux->attach_func_name; 4245 break; 4246 } 4247 return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type); 4248 case BPF_PROG_TYPE_RAW_TRACEPOINT: 4249 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 4250 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 4251 return -EFAULT; 4252 buf[sizeof(buf) - 1] = 0; 4253 tp_name = buf; 4254 break; 4255 default: 4256 return -EINVAL; 4257 } 4258 4259 btp = bpf_get_raw_tracepoint(tp_name); 4260 if (!btp) 4261 return -ENOENT; 4262 4263 link = kzalloc(sizeof(*link), GFP_USER); 4264 if (!link) { 4265 err = -ENOMEM; 4266 goto out_put_btp; 4267 } 4268 bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 4269 &bpf_raw_tp_link_lops, prog, attach_type, 4270 tracepoint_is_faultable(btp->tp)); 4271 link->btp = btp; 4272 link->cookie = cookie; 4273 4274 err = bpf_link_prime(&link->link, &link_primer); 4275 if (err) { 4276 kfree(link); 4277 goto out_put_btp; 4278 } 4279 4280 err = bpf_probe_register(link->btp, link); 4281 if (err) { 4282 bpf_link_cleanup(&link_primer); 4283 goto out_put_btp; 4284 } 4285 4286 return bpf_link_settle(&link_primer); 4287 4288 out_put_btp: 4289 bpf_put_raw_tracepoint(btp); 4290 return err; 4291 } 4292 4293 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie 4294 4295 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 4296 { 4297 struct bpf_prog *prog; 4298 void __user *tp_name; 4299 __u64 cookie; 4300 int fd; 4301 4302 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 4303 return -EINVAL; 4304 4305 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 4306 if (IS_ERR(prog)) 4307 return PTR_ERR(prog); 4308 4309 tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); 4310 cookie = attr->raw_tracepoint.cookie; 4311 fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type); 4312 if (fd < 0) 4313 bpf_prog_put(prog); 4314 return fd; 4315 } 4316 4317 static enum bpf_prog_type 4318 attach_type_to_prog_type(enum bpf_attach_type attach_type) 4319 { 4320 switch (attach_type) { 4321 case BPF_CGROUP_INET_INGRESS: 4322 case BPF_CGROUP_INET_EGRESS: 4323 return BPF_PROG_TYPE_CGROUP_SKB; 4324 case BPF_CGROUP_INET_SOCK_CREATE: 4325 case BPF_CGROUP_INET_SOCK_RELEASE: 4326 case BPF_CGROUP_INET4_POST_BIND: 4327 case BPF_CGROUP_INET6_POST_BIND: 4328 return BPF_PROG_TYPE_CGROUP_SOCK; 4329 case BPF_CGROUP_INET4_BIND: 4330 case BPF_CGROUP_INET6_BIND: 4331 case BPF_CGROUP_INET4_CONNECT: 4332 case BPF_CGROUP_INET6_CONNECT: 4333 case BPF_CGROUP_UNIX_CONNECT: 4334 case BPF_CGROUP_INET4_GETPEERNAME: 4335 case BPF_CGROUP_INET6_GETPEERNAME: 4336 case BPF_CGROUP_UNIX_GETPEERNAME: 4337 case BPF_CGROUP_INET4_GETSOCKNAME: 4338 case BPF_CGROUP_INET6_GETSOCKNAME: 4339 case BPF_CGROUP_UNIX_GETSOCKNAME: 4340 case BPF_CGROUP_UDP4_SENDMSG: 4341 case BPF_CGROUP_UDP6_SENDMSG: 4342 case BPF_CGROUP_UNIX_SENDMSG: 4343 case BPF_CGROUP_UDP4_RECVMSG: 4344 case BPF_CGROUP_UDP6_RECVMSG: 4345 case BPF_CGROUP_UNIX_RECVMSG: 4346 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 4347 case BPF_CGROUP_SOCK_OPS: 4348 return BPF_PROG_TYPE_SOCK_OPS; 4349 case BPF_CGROUP_DEVICE: 4350 return BPF_PROG_TYPE_CGROUP_DEVICE; 4351 case BPF_SK_MSG_VERDICT: 4352 return BPF_PROG_TYPE_SK_MSG; 4353 case BPF_SK_SKB_STREAM_PARSER: 4354 case BPF_SK_SKB_STREAM_VERDICT: 4355 case BPF_SK_SKB_VERDICT: 4356 return BPF_PROG_TYPE_SK_SKB; 4357 case BPF_LIRC_MODE2: 4358 return BPF_PROG_TYPE_LIRC_MODE2; 4359 case BPF_FLOW_DISSECTOR: 4360 return BPF_PROG_TYPE_FLOW_DISSECTOR; 4361 case BPF_CGROUP_SYSCTL: 4362 return BPF_PROG_TYPE_CGROUP_SYSCTL; 4363 case BPF_CGROUP_GETSOCKOPT: 4364 case BPF_CGROUP_SETSOCKOPT: 4365 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 4366 case BPF_TRACE_ITER: 4367 case BPF_TRACE_RAW_TP: 4368 case BPF_TRACE_FENTRY: 4369 case BPF_TRACE_FEXIT: 4370 case BPF_TRACE_FSESSION: 4371 case BPF_MODIFY_RETURN: 4372 return BPF_PROG_TYPE_TRACING; 4373 case BPF_LSM_MAC: 4374 return BPF_PROG_TYPE_LSM; 4375 case BPF_SK_LOOKUP: 4376 return BPF_PROG_TYPE_SK_LOOKUP; 4377 case BPF_XDP: 4378 return BPF_PROG_TYPE_XDP; 4379 case BPF_LSM_CGROUP: 4380 return BPF_PROG_TYPE_LSM; 4381 case BPF_TCX_INGRESS: 4382 case BPF_TCX_EGRESS: 4383 case BPF_NETKIT_PRIMARY: 4384 case BPF_NETKIT_PEER: 4385 return BPF_PROG_TYPE_SCHED_CLS; 4386 default: 4387 return BPF_PROG_TYPE_UNSPEC; 4388 } 4389 } 4390 4391 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 4392 enum bpf_attach_type attach_type) 4393 { 4394 enum bpf_prog_type ptype; 4395 4396 switch (prog->type) { 4397 case BPF_PROG_TYPE_CGROUP_SOCK: 4398 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4399 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4400 case BPF_PROG_TYPE_SK_LOOKUP: 4401 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 4402 case BPF_PROG_TYPE_CGROUP_SKB: 4403 if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) 4404 /* cg-skb progs can be loaded by unpriv user. 4405 * check permissions at attach time. 4406 */ 4407 return -EPERM; 4408 4409 ptype = attach_type_to_prog_type(attach_type); 4410 if (prog->type != ptype) 4411 return -EINVAL; 4412 4413 return prog->enforce_expected_attach_type && 4414 prog->expected_attach_type != attach_type ? 4415 -EINVAL : 0; 4416 case BPF_PROG_TYPE_EXT: 4417 return 0; 4418 case BPF_PROG_TYPE_NETFILTER: 4419 if (attach_type != BPF_NETFILTER) 4420 return -EINVAL; 4421 return 0; 4422 case BPF_PROG_TYPE_PERF_EVENT: 4423 case BPF_PROG_TYPE_TRACEPOINT: 4424 if (attach_type != BPF_PERF_EVENT) 4425 return -EINVAL; 4426 return 0; 4427 case BPF_PROG_TYPE_KPROBE: 4428 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 4429 attach_type != BPF_TRACE_KPROBE_MULTI) 4430 return -EINVAL; 4431 if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && 4432 attach_type != BPF_TRACE_KPROBE_SESSION) 4433 return -EINVAL; 4434 if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 4435 attach_type != BPF_TRACE_UPROBE_MULTI) 4436 return -EINVAL; 4437 if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION && 4438 attach_type != BPF_TRACE_UPROBE_SESSION) 4439 return -EINVAL; 4440 if (attach_type != BPF_PERF_EVENT && 4441 attach_type != BPF_TRACE_KPROBE_MULTI && 4442 attach_type != BPF_TRACE_KPROBE_SESSION && 4443 attach_type != BPF_TRACE_UPROBE_MULTI && 4444 attach_type != BPF_TRACE_UPROBE_SESSION) 4445 return -EINVAL; 4446 return 0; 4447 case BPF_PROG_TYPE_SCHED_CLS: 4448 if (attach_type != BPF_TCX_INGRESS && 4449 attach_type != BPF_TCX_EGRESS && 4450 attach_type != BPF_NETKIT_PRIMARY && 4451 attach_type != BPF_NETKIT_PEER) 4452 return -EINVAL; 4453 return 0; 4454 default: 4455 ptype = attach_type_to_prog_type(attach_type); 4456 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 4457 return -EINVAL; 4458 return 0; 4459 } 4460 } 4461 4462 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, 4463 bool check_atype) 4464 { 4465 switch (ptype) { 4466 case BPF_PROG_TYPE_CGROUP_DEVICE: 4467 case BPF_PROG_TYPE_CGROUP_SKB: 4468 case BPF_PROG_TYPE_CGROUP_SOCK: 4469 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4470 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4471 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4472 case BPF_PROG_TYPE_SOCK_OPS: 4473 return true; 4474 case BPF_PROG_TYPE_LSM: 4475 return check_atype ? atype == BPF_LSM_CGROUP : true; 4476 default: 4477 return false; 4478 } 4479 } 4480 4481 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision 4482 4483 #define BPF_F_ATTACH_MASK_BASE \ 4484 (BPF_F_ALLOW_OVERRIDE | \ 4485 BPF_F_ALLOW_MULTI | \ 4486 BPF_F_REPLACE | \ 4487 BPF_F_PREORDER) 4488 4489 #define BPF_F_ATTACH_MASK_MPROG \ 4490 (BPF_F_REPLACE | \ 4491 BPF_F_BEFORE | \ 4492 BPF_F_AFTER | \ 4493 BPF_F_ID | \ 4494 BPF_F_LINK) 4495 4496 static int bpf_prog_attach(const union bpf_attr *attr) 4497 { 4498 enum bpf_prog_type ptype; 4499 struct bpf_prog *prog; 4500 int ret; 4501 4502 if (CHECK_ATTR(BPF_PROG_ATTACH)) 4503 return -EINVAL; 4504 4505 ptype = attach_type_to_prog_type(attr->attach_type); 4506 if (ptype == BPF_PROG_TYPE_UNSPEC) 4507 return -EINVAL; 4508 if (bpf_mprog_supported(ptype)) { 4509 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4510 return -EINVAL; 4511 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4512 if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) 4513 return -EINVAL; 4514 } else { 4515 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) 4516 return -EINVAL; 4517 if (attr->relative_fd || 4518 attr->expected_revision) 4519 return -EINVAL; 4520 } 4521 4522 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4523 if (IS_ERR(prog)) 4524 return PTR_ERR(prog); 4525 4526 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 4527 bpf_prog_put(prog); 4528 return -EINVAL; 4529 } 4530 4531 if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { 4532 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 4533 goto out; 4534 } 4535 4536 switch (ptype) { 4537 case BPF_PROG_TYPE_SK_SKB: 4538 case BPF_PROG_TYPE_SK_MSG: 4539 ret = sock_map_get_from_fd(attr, prog); 4540 break; 4541 case BPF_PROG_TYPE_LIRC_MODE2: 4542 ret = lirc_prog_attach(attr, prog); 4543 break; 4544 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4545 ret = netns_bpf_prog_attach(attr, prog); 4546 break; 4547 case BPF_PROG_TYPE_SCHED_CLS: 4548 if (attr->attach_type == BPF_TCX_INGRESS || 4549 attr->attach_type == BPF_TCX_EGRESS) 4550 ret = tcx_prog_attach(attr, prog); 4551 else 4552 ret = netkit_prog_attach(attr, prog); 4553 break; 4554 default: 4555 ret = -EINVAL; 4556 } 4557 out: 4558 if (ret) 4559 bpf_prog_put(prog); 4560 return ret; 4561 } 4562 4563 #define BPF_PROG_DETACH_LAST_FIELD expected_revision 4564 4565 static int bpf_prog_detach(const union bpf_attr *attr) 4566 { 4567 struct bpf_prog *prog = NULL; 4568 enum bpf_prog_type ptype; 4569 int ret; 4570 4571 if (CHECK_ATTR(BPF_PROG_DETACH)) 4572 return -EINVAL; 4573 4574 ptype = attach_type_to_prog_type(attr->attach_type); 4575 if (bpf_mprog_supported(ptype)) { 4576 if (ptype == BPF_PROG_TYPE_UNSPEC) 4577 return -EINVAL; 4578 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4579 return -EINVAL; 4580 if (attr->attach_bpf_fd) { 4581 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4582 if (IS_ERR(prog)) 4583 return PTR_ERR(prog); 4584 } else if (!bpf_mprog_detach_empty(ptype)) { 4585 return -EPERM; 4586 } 4587 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4588 if (attr->attach_flags || attr->relative_fd) 4589 return -EINVAL; 4590 } else if (attr->attach_flags || 4591 attr->relative_fd || 4592 attr->expected_revision) { 4593 return -EINVAL; 4594 } 4595 4596 switch (ptype) { 4597 case BPF_PROG_TYPE_SK_MSG: 4598 case BPF_PROG_TYPE_SK_SKB: 4599 ret = sock_map_prog_detach(attr, ptype); 4600 break; 4601 case BPF_PROG_TYPE_LIRC_MODE2: 4602 ret = lirc_prog_detach(attr); 4603 break; 4604 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4605 ret = netns_bpf_prog_detach(attr, ptype); 4606 break; 4607 case BPF_PROG_TYPE_CGROUP_DEVICE: 4608 case BPF_PROG_TYPE_CGROUP_SKB: 4609 case BPF_PROG_TYPE_CGROUP_SOCK: 4610 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4611 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4612 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4613 case BPF_PROG_TYPE_SOCK_OPS: 4614 case BPF_PROG_TYPE_LSM: 4615 ret = cgroup_bpf_prog_detach(attr, ptype); 4616 break; 4617 case BPF_PROG_TYPE_SCHED_CLS: 4618 if (attr->attach_type == BPF_TCX_INGRESS || 4619 attr->attach_type == BPF_TCX_EGRESS) 4620 ret = tcx_prog_detach(attr, prog); 4621 else 4622 ret = netkit_prog_detach(attr, prog); 4623 break; 4624 default: 4625 ret = -EINVAL; 4626 } 4627 4628 if (prog) 4629 bpf_prog_put(prog); 4630 return ret; 4631 } 4632 4633 #define BPF_PROG_QUERY_LAST_FIELD query.revision 4634 4635 static int bpf_prog_query(const union bpf_attr *attr, 4636 union bpf_attr __user *uattr) 4637 { 4638 if (!bpf_net_capable()) 4639 return -EPERM; 4640 if (CHECK_ATTR(BPF_PROG_QUERY)) 4641 return -EINVAL; 4642 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 4643 return -EINVAL; 4644 4645 switch (attr->query.attach_type) { 4646 case BPF_CGROUP_INET_INGRESS: 4647 case BPF_CGROUP_INET_EGRESS: 4648 case BPF_CGROUP_INET_SOCK_CREATE: 4649 case BPF_CGROUP_INET_SOCK_RELEASE: 4650 case BPF_CGROUP_INET4_BIND: 4651 case BPF_CGROUP_INET6_BIND: 4652 case BPF_CGROUP_INET4_POST_BIND: 4653 case BPF_CGROUP_INET6_POST_BIND: 4654 case BPF_CGROUP_INET4_CONNECT: 4655 case BPF_CGROUP_INET6_CONNECT: 4656 case BPF_CGROUP_UNIX_CONNECT: 4657 case BPF_CGROUP_INET4_GETPEERNAME: 4658 case BPF_CGROUP_INET6_GETPEERNAME: 4659 case BPF_CGROUP_UNIX_GETPEERNAME: 4660 case BPF_CGROUP_INET4_GETSOCKNAME: 4661 case BPF_CGROUP_INET6_GETSOCKNAME: 4662 case BPF_CGROUP_UNIX_GETSOCKNAME: 4663 case BPF_CGROUP_UDP4_SENDMSG: 4664 case BPF_CGROUP_UDP6_SENDMSG: 4665 case BPF_CGROUP_UNIX_SENDMSG: 4666 case BPF_CGROUP_UDP4_RECVMSG: 4667 case BPF_CGROUP_UDP6_RECVMSG: 4668 case BPF_CGROUP_UNIX_RECVMSG: 4669 case BPF_CGROUP_SOCK_OPS: 4670 case BPF_CGROUP_DEVICE: 4671 case BPF_CGROUP_SYSCTL: 4672 case BPF_CGROUP_GETSOCKOPT: 4673 case BPF_CGROUP_SETSOCKOPT: 4674 case BPF_LSM_CGROUP: 4675 return cgroup_bpf_prog_query(attr, uattr); 4676 case BPF_LIRC_MODE2: 4677 return lirc_prog_query(attr, uattr); 4678 case BPF_FLOW_DISSECTOR: 4679 case BPF_SK_LOOKUP: 4680 return netns_bpf_prog_query(attr, uattr); 4681 case BPF_SK_SKB_STREAM_PARSER: 4682 case BPF_SK_SKB_STREAM_VERDICT: 4683 case BPF_SK_MSG_VERDICT: 4684 case BPF_SK_SKB_VERDICT: 4685 return sock_map_bpf_prog_query(attr, uattr); 4686 case BPF_TCX_INGRESS: 4687 case BPF_TCX_EGRESS: 4688 return tcx_prog_query(attr, uattr); 4689 case BPF_NETKIT_PRIMARY: 4690 case BPF_NETKIT_PEER: 4691 return netkit_prog_query(attr, uattr); 4692 default: 4693 return -EINVAL; 4694 } 4695 } 4696 4697 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 4698 4699 static int bpf_prog_test_run(const union bpf_attr *attr, 4700 union bpf_attr __user *uattr) 4701 { 4702 struct bpf_prog *prog; 4703 int ret = -ENOTSUPP; 4704 4705 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 4706 return -EINVAL; 4707 4708 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 4709 (!attr->test.ctx_size_in && attr->test.ctx_in)) 4710 return -EINVAL; 4711 4712 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 4713 (!attr->test.ctx_size_out && attr->test.ctx_out)) 4714 return -EINVAL; 4715 4716 prog = bpf_prog_get(attr->test.prog_fd); 4717 if (IS_ERR(prog)) 4718 return PTR_ERR(prog); 4719 4720 if (prog->aux->ops->test_run) 4721 ret = prog->aux->ops->test_run(prog, attr, uattr); 4722 4723 bpf_prog_put(prog); 4724 return ret; 4725 } 4726 4727 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 4728 4729 static int bpf_obj_get_next_id(const union bpf_attr *attr, 4730 union bpf_attr __user *uattr, 4731 struct idr *idr, 4732 spinlock_t *lock) 4733 { 4734 u32 next_id = attr->start_id; 4735 int err = 0; 4736 4737 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 4738 return -EINVAL; 4739 4740 if (!capable(CAP_SYS_ADMIN)) 4741 return -EPERM; 4742 4743 next_id++; 4744 spin_lock_bh(lock); 4745 if (!idr_get_next(idr, &next_id)) 4746 err = -ENOENT; 4747 spin_unlock_bh(lock); 4748 4749 if (!err) 4750 err = put_user(next_id, &uattr->next_id); 4751 4752 return err; 4753 } 4754 4755 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 4756 { 4757 struct bpf_map *map; 4758 4759 spin_lock_bh(&map_idr_lock); 4760 again: 4761 map = idr_get_next(&map_idr, id); 4762 if (map) { 4763 map = __bpf_map_inc_not_zero(map, false); 4764 if (IS_ERR(map)) { 4765 (*id)++; 4766 goto again; 4767 } 4768 } 4769 spin_unlock_bh(&map_idr_lock); 4770 4771 return map; 4772 } 4773 4774 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 4775 { 4776 struct bpf_prog *prog; 4777 4778 spin_lock_bh(&prog_idr_lock); 4779 again: 4780 prog = idr_get_next(&prog_idr, id); 4781 if (prog) { 4782 prog = bpf_prog_inc_not_zero(prog); 4783 if (IS_ERR(prog)) { 4784 (*id)++; 4785 goto again; 4786 } 4787 } 4788 spin_unlock_bh(&prog_idr_lock); 4789 4790 return prog; 4791 } 4792 4793 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 4794 4795 struct bpf_prog *bpf_prog_by_id(u32 id) 4796 { 4797 struct bpf_prog *prog; 4798 4799 if (!id) 4800 return ERR_PTR(-ENOENT); 4801 4802 spin_lock_bh(&prog_idr_lock); 4803 prog = idr_find(&prog_idr, id); 4804 if (prog) 4805 prog = bpf_prog_inc_not_zero(prog); 4806 else 4807 prog = ERR_PTR(-ENOENT); 4808 spin_unlock_bh(&prog_idr_lock); 4809 return prog; 4810 } 4811 4812 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 4813 { 4814 struct bpf_prog *prog; 4815 u32 id = attr->prog_id; 4816 int fd; 4817 4818 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 4819 return -EINVAL; 4820 4821 if (!capable(CAP_SYS_ADMIN)) 4822 return -EPERM; 4823 4824 prog = bpf_prog_by_id(id); 4825 if (IS_ERR(prog)) 4826 return PTR_ERR(prog); 4827 4828 fd = bpf_prog_new_fd(prog); 4829 if (fd < 0) 4830 bpf_prog_put(prog); 4831 4832 return fd; 4833 } 4834 4835 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 4836 4837 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 4838 { 4839 struct bpf_map *map; 4840 u32 id = attr->map_id; 4841 int f_flags; 4842 int fd; 4843 4844 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 4845 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 4846 return -EINVAL; 4847 4848 if (!capable(CAP_SYS_ADMIN)) 4849 return -EPERM; 4850 4851 f_flags = bpf_get_file_flag(attr->open_flags); 4852 if (f_flags < 0) 4853 return f_flags; 4854 4855 spin_lock_bh(&map_idr_lock); 4856 map = idr_find(&map_idr, id); 4857 if (map) 4858 map = __bpf_map_inc_not_zero(map, true); 4859 else 4860 map = ERR_PTR(-ENOENT); 4861 spin_unlock_bh(&map_idr_lock); 4862 4863 if (IS_ERR(map)) 4864 return PTR_ERR(map); 4865 4866 fd = bpf_map_new_fd(map, f_flags); 4867 if (fd < 0) 4868 bpf_map_put_with_uref(map); 4869 4870 return fd; 4871 } 4872 4873 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 4874 unsigned long addr, u32 *off, 4875 u32 *type) 4876 { 4877 const struct bpf_map *map; 4878 int i; 4879 4880 mutex_lock(&prog->aux->used_maps_mutex); 4881 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 4882 map = prog->aux->used_maps[i]; 4883 if (map == (void *)addr) { 4884 *type = BPF_PSEUDO_MAP_FD; 4885 goto out; 4886 } 4887 if (!map->ops->map_direct_value_meta) 4888 continue; 4889 if (!map->ops->map_direct_value_meta(map, addr, off)) { 4890 *type = BPF_PSEUDO_MAP_VALUE; 4891 goto out; 4892 } 4893 } 4894 map = NULL; 4895 4896 out: 4897 mutex_unlock(&prog->aux->used_maps_mutex); 4898 return map; 4899 } 4900 4901 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 4902 const struct cred *f_cred) 4903 { 4904 const struct bpf_map *map; 4905 struct bpf_insn *insns; 4906 u32 off, type; 4907 u64 imm; 4908 u8 code; 4909 int i; 4910 4911 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 4912 GFP_USER); 4913 if (!insns) 4914 return insns; 4915 4916 for (i = 0; i < prog->len; i++) { 4917 code = insns[i].code; 4918 4919 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 4920 insns[i].code = BPF_JMP | BPF_CALL; 4921 insns[i].imm = BPF_FUNC_tail_call; 4922 /* fall-through */ 4923 } 4924 if (code == (BPF_JMP | BPF_CALL) || 4925 code == (BPF_JMP | BPF_CALL_ARGS)) { 4926 if (code == (BPF_JMP | BPF_CALL_ARGS)) 4927 insns[i].code = BPF_JMP | BPF_CALL; 4928 if (!bpf_dump_raw_ok(f_cred)) 4929 insns[i].imm = 0; 4930 continue; 4931 } 4932 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 4933 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 4934 continue; 4935 } 4936 4937 if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX || 4938 BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) { 4939 insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM; 4940 continue; 4941 } 4942 4943 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 4944 continue; 4945 4946 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 4947 map = bpf_map_from_imm(prog, imm, &off, &type); 4948 if (map) { 4949 insns[i].src_reg = type; 4950 insns[i].imm = map->id; 4951 insns[i + 1].imm = off; 4952 continue; 4953 } 4954 } 4955 4956 return insns; 4957 } 4958 4959 static int set_info_rec_size(struct bpf_prog_info *info) 4960 { 4961 /* 4962 * Ensure info.*_rec_size is the same as kernel expected size 4963 * 4964 * or 4965 * 4966 * Only allow zero *_rec_size if both _rec_size and _cnt are 4967 * zero. In this case, the kernel will set the expected 4968 * _rec_size back to the info. 4969 */ 4970 4971 if ((info->nr_func_info || info->func_info_rec_size) && 4972 info->func_info_rec_size != sizeof(struct bpf_func_info)) 4973 return -EINVAL; 4974 4975 if ((info->nr_line_info || info->line_info_rec_size) && 4976 info->line_info_rec_size != sizeof(struct bpf_line_info)) 4977 return -EINVAL; 4978 4979 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 4980 info->jited_line_info_rec_size != sizeof(__u64)) 4981 return -EINVAL; 4982 4983 info->func_info_rec_size = sizeof(struct bpf_func_info); 4984 info->line_info_rec_size = sizeof(struct bpf_line_info); 4985 info->jited_line_info_rec_size = sizeof(__u64); 4986 4987 return 0; 4988 } 4989 4990 static int bpf_prog_get_info_by_fd(struct file *file, 4991 struct bpf_prog *prog, 4992 const union bpf_attr *attr, 4993 union bpf_attr __user *uattr) 4994 { 4995 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4996 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 4997 struct bpf_prog_info info; 4998 u32 info_len = attr->info.info_len; 4999 struct bpf_prog_kstats stats; 5000 char __user *uinsns; 5001 u32 ulen; 5002 int err; 5003 5004 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5005 if (err) 5006 return err; 5007 info_len = min_t(u32, sizeof(info), info_len); 5008 5009 memset(&info, 0, sizeof(info)); 5010 if (copy_from_user(&info, uinfo, info_len)) 5011 return -EFAULT; 5012 5013 info.type = prog->type; 5014 info.id = prog->aux->id; 5015 info.load_time = prog->aux->load_time; 5016 info.created_by_uid = from_kuid_munged(current_user_ns(), 5017 prog->aux->user->uid); 5018 info.gpl_compatible = prog->gpl_compatible; 5019 5020 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 5021 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 5022 5023 mutex_lock(&prog->aux->used_maps_mutex); 5024 ulen = info.nr_map_ids; 5025 info.nr_map_ids = prog->aux->used_map_cnt; 5026 ulen = min_t(u32, info.nr_map_ids, ulen); 5027 if (ulen) { 5028 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 5029 u32 i; 5030 5031 for (i = 0; i < ulen; i++) 5032 if (put_user(prog->aux->used_maps[i]->id, 5033 &user_map_ids[i])) { 5034 mutex_unlock(&prog->aux->used_maps_mutex); 5035 return -EFAULT; 5036 } 5037 } 5038 mutex_unlock(&prog->aux->used_maps_mutex); 5039 5040 err = set_info_rec_size(&info); 5041 if (err) 5042 return err; 5043 5044 bpf_prog_get_stats(prog, &stats); 5045 info.run_time_ns = stats.nsecs; 5046 info.run_cnt = stats.cnt; 5047 info.recursion_misses = stats.misses; 5048 5049 info.verified_insns = prog->aux->verified_insns; 5050 if (prog->aux->btf) 5051 info.btf_id = btf_obj_id(prog->aux->btf); 5052 5053 if (!bpf_capable()) { 5054 info.jited_prog_len = 0; 5055 info.xlated_prog_len = 0; 5056 info.nr_jited_ksyms = 0; 5057 info.nr_jited_func_lens = 0; 5058 info.nr_func_info = 0; 5059 info.nr_line_info = 0; 5060 info.nr_jited_line_info = 0; 5061 goto done; 5062 } 5063 5064 ulen = info.xlated_prog_len; 5065 info.xlated_prog_len = bpf_prog_insn_size(prog); 5066 if (info.xlated_prog_len && ulen) { 5067 struct bpf_insn *insns_sanitized; 5068 bool fault; 5069 5070 if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) { 5071 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 5072 if (!insns_sanitized) 5073 return -ENOMEM; 5074 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 5075 ulen = min_t(u32, info.xlated_prog_len, ulen); 5076 fault = copy_to_user(uinsns, insns_sanitized, ulen); 5077 kfree(insns_sanitized); 5078 if (fault) 5079 return -EFAULT; 5080 } else { 5081 info.xlated_prog_insns = 0; 5082 } 5083 } 5084 5085 if (bpf_prog_is_offloaded(prog->aux)) { 5086 err = bpf_prog_offload_info_fill(&info, prog); 5087 if (err) 5088 return err; 5089 goto done; 5090 } 5091 5092 /* NOTE: the following code is supposed to be skipped for offload. 5093 * bpf_prog_offload_info_fill() is the place to fill similar fields 5094 * for offload. 5095 */ 5096 ulen = info.jited_prog_len; 5097 if (prog->aux->func_cnt) { 5098 u32 i; 5099 5100 info.jited_prog_len = 0; 5101 for (i = 0; i < prog->aux->func_cnt; i++) 5102 info.jited_prog_len += prog->aux->func[i]->jited_len; 5103 } else { 5104 info.jited_prog_len = prog->jited_len; 5105 } 5106 5107 if (info.jited_prog_len && ulen) { 5108 if (bpf_dump_raw_ok(file->f_cred)) { 5109 uinsns = u64_to_user_ptr(info.jited_prog_insns); 5110 ulen = min_t(u32, info.jited_prog_len, ulen); 5111 5112 /* for multi-function programs, copy the JITed 5113 * instructions for all the functions 5114 */ 5115 if (prog->aux->func_cnt) { 5116 u32 len, free, i; 5117 u8 *img; 5118 5119 free = ulen; 5120 for (i = 0; i < prog->aux->func_cnt; i++) { 5121 len = prog->aux->func[i]->jited_len; 5122 len = min_t(u32, len, free); 5123 img = (u8 *) prog->aux->func[i]->bpf_func; 5124 if (copy_to_user(uinsns, img, len)) 5125 return -EFAULT; 5126 uinsns += len; 5127 free -= len; 5128 if (!free) 5129 break; 5130 } 5131 } else { 5132 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 5133 return -EFAULT; 5134 } 5135 } else { 5136 info.jited_prog_insns = 0; 5137 } 5138 } 5139 5140 ulen = info.nr_jited_ksyms; 5141 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 5142 if (ulen) { 5143 if (bpf_dump_raw_ok(file->f_cred)) { 5144 unsigned long ksym_addr; 5145 u64 __user *user_ksyms; 5146 u32 i; 5147 5148 /* copy the address of the kernel symbol 5149 * corresponding to each function 5150 */ 5151 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 5152 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 5153 if (prog->aux->func_cnt) { 5154 for (i = 0; i < ulen; i++) { 5155 ksym_addr = (unsigned long) 5156 prog->aux->func[i]->bpf_func; 5157 if (put_user((u64) ksym_addr, 5158 &user_ksyms[i])) 5159 return -EFAULT; 5160 } 5161 } else { 5162 ksym_addr = (unsigned long) prog->bpf_func; 5163 if (put_user((u64) ksym_addr, &user_ksyms[0])) 5164 return -EFAULT; 5165 } 5166 } else { 5167 info.jited_ksyms = 0; 5168 } 5169 } 5170 5171 ulen = info.nr_jited_func_lens; 5172 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 5173 if (ulen) { 5174 if (bpf_dump_raw_ok(file->f_cred)) { 5175 u32 __user *user_lens; 5176 u32 func_len, i; 5177 5178 /* copy the JITed image lengths for each function */ 5179 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 5180 user_lens = u64_to_user_ptr(info.jited_func_lens); 5181 if (prog->aux->func_cnt) { 5182 for (i = 0; i < ulen; i++) { 5183 func_len = 5184 prog->aux->func[i]->jited_len; 5185 if (put_user(func_len, &user_lens[i])) 5186 return -EFAULT; 5187 } 5188 } else { 5189 func_len = prog->jited_len; 5190 if (put_user(func_len, &user_lens[0])) 5191 return -EFAULT; 5192 } 5193 } else { 5194 info.jited_func_lens = 0; 5195 } 5196 } 5197 5198 info.attach_btf_id = prog->aux->attach_btf_id; 5199 if (attach_btf) 5200 info.attach_btf_obj_id = btf_obj_id(attach_btf); 5201 5202 ulen = info.nr_func_info; 5203 info.nr_func_info = prog->aux->func_info_cnt; 5204 if (info.nr_func_info && ulen) { 5205 char __user *user_finfo; 5206 5207 user_finfo = u64_to_user_ptr(info.func_info); 5208 ulen = min_t(u32, info.nr_func_info, ulen); 5209 if (copy_to_user(user_finfo, prog->aux->func_info, 5210 info.func_info_rec_size * ulen)) 5211 return -EFAULT; 5212 } 5213 5214 ulen = info.nr_line_info; 5215 info.nr_line_info = prog->aux->nr_linfo; 5216 if (info.nr_line_info && ulen) { 5217 __u8 __user *user_linfo; 5218 5219 user_linfo = u64_to_user_ptr(info.line_info); 5220 ulen = min_t(u32, info.nr_line_info, ulen); 5221 if (copy_to_user(user_linfo, prog->aux->linfo, 5222 info.line_info_rec_size * ulen)) 5223 return -EFAULT; 5224 } 5225 5226 ulen = info.nr_jited_line_info; 5227 if (prog->aux->jited_linfo) 5228 info.nr_jited_line_info = prog->aux->nr_linfo; 5229 else 5230 info.nr_jited_line_info = 0; 5231 if (info.nr_jited_line_info && ulen) { 5232 if (bpf_dump_raw_ok(file->f_cred)) { 5233 unsigned long line_addr; 5234 __u64 __user *user_linfo; 5235 u32 i; 5236 5237 user_linfo = u64_to_user_ptr(info.jited_line_info); 5238 ulen = min_t(u32, info.nr_jited_line_info, ulen); 5239 for (i = 0; i < ulen; i++) { 5240 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 5241 if (put_user((__u64)line_addr, &user_linfo[i])) 5242 return -EFAULT; 5243 } 5244 } else { 5245 info.jited_line_info = 0; 5246 } 5247 } 5248 5249 ulen = info.nr_prog_tags; 5250 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 5251 if (ulen) { 5252 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 5253 u32 i; 5254 5255 user_prog_tags = u64_to_user_ptr(info.prog_tags); 5256 ulen = min_t(u32, info.nr_prog_tags, ulen); 5257 if (prog->aux->func_cnt) { 5258 for (i = 0; i < ulen; i++) { 5259 if (copy_to_user(user_prog_tags[i], 5260 prog->aux->func[i]->tag, 5261 BPF_TAG_SIZE)) 5262 return -EFAULT; 5263 } 5264 } else { 5265 if (copy_to_user(user_prog_tags[0], 5266 prog->tag, BPF_TAG_SIZE)) 5267 return -EFAULT; 5268 } 5269 } 5270 5271 done: 5272 if (copy_to_user(uinfo, &info, info_len) || 5273 put_user(info_len, &uattr->info.info_len)) 5274 return -EFAULT; 5275 5276 return 0; 5277 } 5278 5279 static int bpf_map_get_info_by_fd(struct file *file, 5280 struct bpf_map *map, 5281 const union bpf_attr *attr, 5282 union bpf_attr __user *uattr) 5283 { 5284 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5285 struct bpf_map_info info; 5286 u32 info_len = attr->info.info_len; 5287 int err; 5288 5289 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5290 if (err) 5291 return err; 5292 info_len = min_t(u32, sizeof(info), info_len); 5293 5294 memset(&info, 0, sizeof(info)); 5295 if (copy_from_user(&info, uinfo, info_len)) 5296 return -EFAULT; 5297 5298 info.type = map->map_type; 5299 info.id = map->id; 5300 info.key_size = map->key_size; 5301 info.value_size = map->value_size; 5302 info.max_entries = map->max_entries; 5303 info.map_flags = map->map_flags; 5304 info.map_extra = map->map_extra; 5305 memcpy(info.name, map->name, sizeof(map->name)); 5306 5307 if (map->btf) { 5308 info.btf_id = btf_obj_id(map->btf); 5309 info.btf_key_type_id = map->btf_key_type_id; 5310 info.btf_value_type_id = map->btf_value_type_id; 5311 } 5312 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5313 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) 5314 bpf_map_struct_ops_info_fill(&info, map); 5315 5316 if (bpf_map_is_offloaded(map)) { 5317 err = bpf_map_offload_info_fill(&info, map); 5318 if (err) 5319 return err; 5320 } 5321 5322 if (info.hash) { 5323 char __user *uhash = u64_to_user_ptr(info.hash); 5324 5325 if (!map->ops->map_get_hash) 5326 return -EINVAL; 5327 5328 if (info.hash_size != SHA256_DIGEST_SIZE) 5329 return -EINVAL; 5330 5331 if (!READ_ONCE(map->frozen)) 5332 return -EPERM; 5333 5334 err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); 5335 if (err != 0) 5336 return err; 5337 5338 if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) 5339 return -EFAULT; 5340 } else if (info.hash_size) { 5341 return -EINVAL; 5342 } 5343 5344 if (copy_to_user(uinfo, &info, info_len) || 5345 put_user(info_len, &uattr->info.info_len)) 5346 return -EFAULT; 5347 5348 return 0; 5349 } 5350 5351 static int bpf_btf_get_info_by_fd(struct file *file, 5352 struct btf *btf, 5353 const union bpf_attr *attr, 5354 union bpf_attr __user *uattr) 5355 { 5356 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5357 u32 info_len = attr->info.info_len; 5358 int err; 5359 5360 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5361 if (err) 5362 return err; 5363 5364 return btf_get_info_by_fd(btf, attr, uattr); 5365 } 5366 5367 static int bpf_link_get_info_by_fd(struct file *file, 5368 struct bpf_link *link, 5369 const union bpf_attr *attr, 5370 union bpf_attr __user *uattr) 5371 { 5372 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5373 struct bpf_link_info info; 5374 u32 info_len = attr->info.info_len; 5375 int err; 5376 5377 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5378 if (err) 5379 return err; 5380 info_len = min_t(u32, sizeof(info), info_len); 5381 5382 memset(&info, 0, sizeof(info)); 5383 if (copy_from_user(&info, uinfo, info_len)) 5384 return -EFAULT; 5385 5386 info.type = link->type; 5387 info.id = link->id; 5388 if (link->prog) 5389 info.prog_id = link->prog->aux->id; 5390 5391 if (link->ops->fill_link_info) { 5392 err = link->ops->fill_link_info(link, &info); 5393 if (err) 5394 return err; 5395 } 5396 5397 if (copy_to_user(uinfo, &info, info_len) || 5398 put_user(info_len, &uattr->info.info_len)) 5399 return -EFAULT; 5400 5401 return 0; 5402 } 5403 5404 5405 static int token_get_info_by_fd(struct file *file, 5406 struct bpf_token *token, 5407 const union bpf_attr *attr, 5408 union bpf_attr __user *uattr) 5409 { 5410 struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5411 u32 info_len = attr->info.info_len; 5412 int err; 5413 5414 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5415 if (err) 5416 return err; 5417 return bpf_token_get_info_by_fd(token, attr, uattr); 5418 } 5419 5420 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 5421 5422 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 5423 union bpf_attr __user *uattr) 5424 { 5425 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 5426 return -EINVAL; 5427 5428 CLASS(fd, f)(attr->info.bpf_fd); 5429 if (fd_empty(f)) 5430 return -EBADFD; 5431 5432 if (fd_file(f)->f_op == &bpf_prog_fops) 5433 return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5434 uattr); 5435 else if (fd_file(f)->f_op == &bpf_map_fops) 5436 return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5437 uattr); 5438 else if (fd_file(f)->f_op == &btf_fops) 5439 return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); 5440 else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) 5441 return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5442 attr, uattr); 5443 else if (fd_file(f)->f_op == &bpf_token_fops) 5444 return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5445 attr, uattr); 5446 return -EINVAL; 5447 } 5448 5449 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd 5450 5451 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 5452 { 5453 struct bpf_token *token = NULL; 5454 5455 if (CHECK_ATTR(BPF_BTF_LOAD)) 5456 return -EINVAL; 5457 5458 if (attr->btf_flags & ~BPF_F_TOKEN_FD) 5459 return -EINVAL; 5460 5461 if (attr->btf_flags & BPF_F_TOKEN_FD) { 5462 token = bpf_token_get_from_fd(attr->btf_token_fd); 5463 if (IS_ERR(token)) 5464 return PTR_ERR(token); 5465 if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { 5466 bpf_token_put(token); 5467 token = NULL; 5468 } 5469 } 5470 5471 if (!bpf_token_capable(token, CAP_BPF)) { 5472 bpf_token_put(token); 5473 return -EPERM; 5474 } 5475 5476 bpf_token_put(token); 5477 5478 return btf_new_fd(attr, uattr, uattr_size); 5479 } 5480 5481 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5482 5483 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5484 { 5485 struct bpf_token *token = NULL; 5486 5487 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5488 return -EINVAL; 5489 5490 if (attr->open_flags & ~BPF_F_TOKEN_FD) 5491 return -EINVAL; 5492 5493 if (attr->open_flags & BPF_F_TOKEN_FD) { 5494 token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5495 if (IS_ERR(token)) 5496 return PTR_ERR(token); 5497 if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5498 bpf_token_put(token); 5499 token = NULL; 5500 } 5501 } 5502 5503 if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5504 bpf_token_put(token); 5505 return -EPERM; 5506 } 5507 5508 bpf_token_put(token); 5509 5510 return btf_get_fd_by_id(attr->btf_id); 5511 } 5512 5513 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 5514 union bpf_attr __user *uattr, 5515 u32 prog_id, u32 fd_type, 5516 const char *buf, u64 probe_offset, 5517 u64 probe_addr) 5518 { 5519 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 5520 u32 len = buf ? strlen(buf) : 0, input_len; 5521 int err = 0; 5522 5523 if (put_user(len, &uattr->task_fd_query.buf_len)) 5524 return -EFAULT; 5525 input_len = attr->task_fd_query.buf_len; 5526 if (input_len && ubuf) { 5527 if (!len) { 5528 /* nothing to copy, just make ubuf NULL terminated */ 5529 char zero = '\0'; 5530 5531 if (put_user(zero, ubuf)) 5532 return -EFAULT; 5533 } else { 5534 err = bpf_copy_to_user(ubuf, buf, input_len, len); 5535 if (err == -EFAULT) 5536 return err; 5537 } 5538 } 5539 5540 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 5541 put_user(fd_type, &uattr->task_fd_query.fd_type) || 5542 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 5543 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 5544 return -EFAULT; 5545 5546 return err; 5547 } 5548 5549 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 5550 5551 static int bpf_task_fd_query(const union bpf_attr *attr, 5552 union bpf_attr __user *uattr) 5553 { 5554 pid_t pid = attr->task_fd_query.pid; 5555 u32 fd = attr->task_fd_query.fd; 5556 const struct perf_event *event; 5557 struct task_struct *task; 5558 struct file *file; 5559 int err; 5560 5561 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 5562 return -EINVAL; 5563 5564 if (!capable(CAP_SYS_ADMIN)) 5565 return -EPERM; 5566 5567 if (attr->task_fd_query.flags != 0) 5568 return -EINVAL; 5569 5570 rcu_read_lock(); 5571 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 5572 rcu_read_unlock(); 5573 if (!task) 5574 return -ENOENT; 5575 5576 err = 0; 5577 file = fget_task(task, fd); 5578 put_task_struct(task); 5579 if (!file) 5580 return -EBADF; 5581 5582 if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { 5583 struct bpf_link *link = file->private_data; 5584 5585 if (link->ops == &bpf_raw_tp_link_lops) { 5586 struct bpf_raw_tp_link *raw_tp = 5587 container_of(link, struct bpf_raw_tp_link, link); 5588 struct bpf_raw_event_map *btp = raw_tp->btp; 5589 5590 err = bpf_task_fd_query_copy(attr, uattr, 5591 raw_tp->link.prog->aux->id, 5592 BPF_FD_TYPE_RAW_TRACEPOINT, 5593 btp->tp->name, 0, 0); 5594 goto put_file; 5595 } 5596 goto out_not_supp; 5597 } 5598 5599 event = perf_get_event(file); 5600 if (!IS_ERR(event)) { 5601 u64 probe_offset, probe_addr; 5602 u32 prog_id, fd_type; 5603 const char *buf; 5604 5605 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 5606 &buf, &probe_offset, 5607 &probe_addr, NULL); 5608 if (!err) 5609 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 5610 fd_type, buf, 5611 probe_offset, 5612 probe_addr); 5613 goto put_file; 5614 } 5615 5616 out_not_supp: 5617 err = -ENOTSUPP; 5618 put_file: 5619 fput(file); 5620 return err; 5621 } 5622 5623 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 5624 5625 #define BPF_DO_BATCH(fn, ...) \ 5626 do { \ 5627 if (!fn) { \ 5628 err = -ENOTSUPP; \ 5629 goto err_put; \ 5630 } \ 5631 err = fn(__VA_ARGS__); \ 5632 } while (0) 5633 5634 static int bpf_map_do_batch(const union bpf_attr *attr, 5635 union bpf_attr __user *uattr, 5636 int cmd) 5637 { 5638 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 5639 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 5640 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 5641 struct bpf_map *map; 5642 int err; 5643 5644 if (CHECK_ATTR(BPF_MAP_BATCH)) 5645 return -EINVAL; 5646 5647 CLASS(fd, f)(attr->batch.map_fd); 5648 5649 map = __bpf_map_get(f); 5650 if (IS_ERR(map)) 5651 return PTR_ERR(map); 5652 if (has_write) 5653 bpf_map_write_active_inc(map); 5654 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 5655 err = -EPERM; 5656 goto err_put; 5657 } 5658 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 5659 err = -EPERM; 5660 goto err_put; 5661 } 5662 5663 if (cmd == BPF_MAP_LOOKUP_BATCH) 5664 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 5665 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 5666 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 5667 else if (cmd == BPF_MAP_UPDATE_BATCH) 5668 BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr); 5669 else 5670 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 5671 err_put: 5672 if (has_write) { 5673 maybe_wait_bpf_programs(map); 5674 bpf_map_write_active_dec(map); 5675 } 5676 return err; 5677 } 5678 5679 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 5680 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 5681 { 5682 struct bpf_prog *prog; 5683 int ret; 5684 5685 if (CHECK_ATTR(BPF_LINK_CREATE)) 5686 return -EINVAL; 5687 5688 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 5689 return bpf_struct_ops_link_create(attr); 5690 5691 prog = bpf_prog_get(attr->link_create.prog_fd); 5692 if (IS_ERR(prog)) 5693 return PTR_ERR(prog); 5694 5695 ret = bpf_prog_attach_check_attach_type(prog, 5696 attr->link_create.attach_type); 5697 if (ret) 5698 goto out; 5699 5700 switch (prog->type) { 5701 case BPF_PROG_TYPE_CGROUP_SKB: 5702 case BPF_PROG_TYPE_CGROUP_SOCK: 5703 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5704 case BPF_PROG_TYPE_SOCK_OPS: 5705 case BPF_PROG_TYPE_CGROUP_DEVICE: 5706 case BPF_PROG_TYPE_CGROUP_SYSCTL: 5707 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 5708 ret = cgroup_bpf_link_attach(attr, prog); 5709 break; 5710 case BPF_PROG_TYPE_EXT: 5711 ret = bpf_tracing_prog_attach(prog, 5712 attr->link_create.target_fd, 5713 attr->link_create.target_btf_id, 5714 attr->link_create.tracing.cookie, 5715 attr->link_create.attach_type); 5716 break; 5717 case BPF_PROG_TYPE_LSM: 5718 case BPF_PROG_TYPE_TRACING: 5719 if (attr->link_create.attach_type != prog->expected_attach_type) { 5720 ret = -EINVAL; 5721 goto out; 5722 } 5723 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 5724 ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie, 5725 attr->link_create.attach_type); 5726 else if (prog->expected_attach_type == BPF_TRACE_ITER) 5727 ret = bpf_iter_link_attach(attr, uattr, prog); 5728 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 5729 ret = cgroup_bpf_link_attach(attr, prog); 5730 else 5731 ret = bpf_tracing_prog_attach(prog, 5732 attr->link_create.target_fd, 5733 attr->link_create.target_btf_id, 5734 attr->link_create.tracing.cookie, 5735 attr->link_create.attach_type); 5736 break; 5737 case BPF_PROG_TYPE_FLOW_DISSECTOR: 5738 case BPF_PROG_TYPE_SK_LOOKUP: 5739 ret = netns_bpf_link_create(attr, prog); 5740 break; 5741 case BPF_PROG_TYPE_SK_MSG: 5742 case BPF_PROG_TYPE_SK_SKB: 5743 ret = sock_map_link_create(attr, prog); 5744 break; 5745 #ifdef CONFIG_NET 5746 case BPF_PROG_TYPE_XDP: 5747 ret = bpf_xdp_link_attach(attr, prog); 5748 break; 5749 case BPF_PROG_TYPE_SCHED_CLS: 5750 if (attr->link_create.attach_type == BPF_TCX_INGRESS || 5751 attr->link_create.attach_type == BPF_TCX_EGRESS) 5752 ret = tcx_link_attach(attr, prog); 5753 else 5754 ret = netkit_link_attach(attr, prog); 5755 break; 5756 case BPF_PROG_TYPE_NETFILTER: 5757 ret = bpf_nf_link_attach(attr, prog); 5758 break; 5759 #endif 5760 case BPF_PROG_TYPE_PERF_EVENT: 5761 case BPF_PROG_TYPE_TRACEPOINT: 5762 ret = bpf_perf_link_attach(attr, prog); 5763 break; 5764 case BPF_PROG_TYPE_KPROBE: 5765 if (attr->link_create.attach_type == BPF_PERF_EVENT) 5766 ret = bpf_perf_link_attach(attr, prog); 5767 else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || 5768 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) 5769 ret = bpf_kprobe_multi_link_attach(attr, prog); 5770 else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI || 5771 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION) 5772 ret = bpf_uprobe_multi_link_attach(attr, prog); 5773 break; 5774 default: 5775 ret = -EINVAL; 5776 } 5777 5778 out: 5779 if (ret < 0) 5780 bpf_prog_put(prog); 5781 return ret; 5782 } 5783 5784 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 5785 { 5786 struct bpf_map *new_map, *old_map = NULL; 5787 int ret; 5788 5789 new_map = bpf_map_get(attr->link_update.new_map_fd); 5790 if (IS_ERR(new_map)) 5791 return PTR_ERR(new_map); 5792 5793 if (attr->link_update.flags & BPF_F_REPLACE) { 5794 old_map = bpf_map_get(attr->link_update.old_map_fd); 5795 if (IS_ERR(old_map)) { 5796 ret = PTR_ERR(old_map); 5797 goto out_put; 5798 } 5799 } else if (attr->link_update.old_map_fd) { 5800 ret = -EINVAL; 5801 goto out_put; 5802 } 5803 5804 ret = link->ops->update_map(link, new_map, old_map); 5805 5806 if (old_map) 5807 bpf_map_put(old_map); 5808 out_put: 5809 bpf_map_put(new_map); 5810 return ret; 5811 } 5812 5813 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 5814 5815 static int link_update(union bpf_attr *attr) 5816 { 5817 struct bpf_prog *old_prog = NULL, *new_prog; 5818 struct bpf_link *link; 5819 u32 flags; 5820 int ret; 5821 5822 if (CHECK_ATTR(BPF_LINK_UPDATE)) 5823 return -EINVAL; 5824 5825 flags = attr->link_update.flags; 5826 if (flags & ~BPF_F_REPLACE) 5827 return -EINVAL; 5828 5829 link = bpf_link_get_from_fd(attr->link_update.link_fd); 5830 if (IS_ERR(link)) 5831 return PTR_ERR(link); 5832 5833 if (link->ops->update_map) { 5834 ret = link_update_map(link, attr); 5835 goto out_put_link; 5836 } 5837 5838 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 5839 if (IS_ERR(new_prog)) { 5840 ret = PTR_ERR(new_prog); 5841 goto out_put_link; 5842 } 5843 5844 if (flags & BPF_F_REPLACE) { 5845 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 5846 if (IS_ERR(old_prog)) { 5847 ret = PTR_ERR(old_prog); 5848 old_prog = NULL; 5849 goto out_put_progs; 5850 } 5851 } else if (attr->link_update.old_prog_fd) { 5852 ret = -EINVAL; 5853 goto out_put_progs; 5854 } 5855 5856 if (link->ops->update_prog) 5857 ret = link->ops->update_prog(link, new_prog, old_prog); 5858 else 5859 ret = -EINVAL; 5860 5861 out_put_progs: 5862 if (old_prog) 5863 bpf_prog_put(old_prog); 5864 if (ret) 5865 bpf_prog_put(new_prog); 5866 out_put_link: 5867 bpf_link_put_direct(link); 5868 return ret; 5869 } 5870 5871 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 5872 5873 static int link_detach(union bpf_attr *attr) 5874 { 5875 struct bpf_link *link; 5876 int ret; 5877 5878 if (CHECK_ATTR(BPF_LINK_DETACH)) 5879 return -EINVAL; 5880 5881 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 5882 if (IS_ERR(link)) 5883 return PTR_ERR(link); 5884 5885 if (link->ops->detach) 5886 ret = link->ops->detach(link); 5887 else 5888 ret = -EOPNOTSUPP; 5889 5890 bpf_link_put_direct(link); 5891 return ret; 5892 } 5893 5894 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 5895 { 5896 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 5897 } 5898 EXPORT_SYMBOL(bpf_link_inc_not_zero); 5899 5900 struct bpf_link *bpf_link_by_id(u32 id) 5901 { 5902 struct bpf_link *link; 5903 5904 if (!id) 5905 return ERR_PTR(-ENOENT); 5906 5907 spin_lock_bh(&link_idr_lock); 5908 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 5909 link = idr_find(&link_idr, id); 5910 if (link) { 5911 if (link->id) 5912 link = bpf_link_inc_not_zero(link); 5913 else 5914 link = ERR_PTR(-EAGAIN); 5915 } else { 5916 link = ERR_PTR(-ENOENT); 5917 } 5918 spin_unlock_bh(&link_idr_lock); 5919 return link; 5920 } 5921 5922 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 5923 { 5924 struct bpf_link *link; 5925 5926 spin_lock_bh(&link_idr_lock); 5927 again: 5928 link = idr_get_next(&link_idr, id); 5929 if (link) { 5930 link = bpf_link_inc_not_zero(link); 5931 if (IS_ERR(link)) { 5932 (*id)++; 5933 goto again; 5934 } 5935 } 5936 spin_unlock_bh(&link_idr_lock); 5937 5938 return link; 5939 } 5940 5941 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 5942 5943 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 5944 { 5945 struct bpf_link *link; 5946 u32 id = attr->link_id; 5947 int fd; 5948 5949 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 5950 return -EINVAL; 5951 5952 if (!capable(CAP_SYS_ADMIN)) 5953 return -EPERM; 5954 5955 link = bpf_link_by_id(id); 5956 if (IS_ERR(link)) 5957 return PTR_ERR(link); 5958 5959 fd = bpf_link_new_fd(link); 5960 if (fd < 0) 5961 bpf_link_put_direct(link); 5962 5963 return fd; 5964 } 5965 5966 DEFINE_MUTEX(bpf_stats_enabled_mutex); 5967 5968 static int bpf_stats_release(struct inode *inode, struct file *file) 5969 { 5970 mutex_lock(&bpf_stats_enabled_mutex); 5971 static_key_slow_dec(&bpf_stats_enabled_key.key); 5972 mutex_unlock(&bpf_stats_enabled_mutex); 5973 return 0; 5974 } 5975 5976 static const struct file_operations bpf_stats_fops = { 5977 .release = bpf_stats_release, 5978 }; 5979 5980 static int bpf_enable_runtime_stats(void) 5981 { 5982 int fd; 5983 5984 mutex_lock(&bpf_stats_enabled_mutex); 5985 5986 /* Set a very high limit to avoid overflow */ 5987 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 5988 mutex_unlock(&bpf_stats_enabled_mutex); 5989 return -EBUSY; 5990 } 5991 5992 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 5993 if (fd >= 0) 5994 static_key_slow_inc(&bpf_stats_enabled_key.key); 5995 5996 mutex_unlock(&bpf_stats_enabled_mutex); 5997 return fd; 5998 } 5999 6000 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 6001 6002 static int bpf_enable_stats(union bpf_attr *attr) 6003 { 6004 6005 if (CHECK_ATTR(BPF_ENABLE_STATS)) 6006 return -EINVAL; 6007 6008 if (!capable(CAP_SYS_ADMIN)) 6009 return -EPERM; 6010 6011 switch (attr->enable_stats.type) { 6012 case BPF_STATS_RUN_TIME: 6013 return bpf_enable_runtime_stats(); 6014 default: 6015 break; 6016 } 6017 return -EINVAL; 6018 } 6019 6020 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 6021 6022 static int bpf_iter_create(union bpf_attr *attr) 6023 { 6024 struct bpf_link *link; 6025 int err; 6026 6027 if (CHECK_ATTR(BPF_ITER_CREATE)) 6028 return -EINVAL; 6029 6030 if (attr->iter_create.flags) 6031 return -EINVAL; 6032 6033 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 6034 if (IS_ERR(link)) 6035 return PTR_ERR(link); 6036 6037 err = bpf_iter_new_fd(link); 6038 bpf_link_put_direct(link); 6039 6040 return err; 6041 } 6042 6043 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 6044 6045 static int bpf_prog_bind_map(union bpf_attr *attr) 6046 { 6047 struct bpf_prog *prog; 6048 struct bpf_map *map; 6049 struct bpf_map **used_maps_old, **used_maps_new; 6050 int i, ret = 0; 6051 6052 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 6053 return -EINVAL; 6054 6055 if (attr->prog_bind_map.flags) 6056 return -EINVAL; 6057 6058 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 6059 if (IS_ERR(prog)) 6060 return PTR_ERR(prog); 6061 6062 map = bpf_map_get(attr->prog_bind_map.map_fd); 6063 if (IS_ERR(map)) { 6064 ret = PTR_ERR(map); 6065 goto out_prog_put; 6066 } 6067 6068 mutex_lock(&prog->aux->used_maps_mutex); 6069 6070 used_maps_old = prog->aux->used_maps; 6071 6072 for (i = 0; i < prog->aux->used_map_cnt; i++) 6073 if (used_maps_old[i] == map) { 6074 bpf_map_put(map); 6075 goto out_unlock; 6076 } 6077 6078 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 6079 sizeof(used_maps_new[0]), 6080 GFP_KERNEL); 6081 if (!used_maps_new) { 6082 ret = -ENOMEM; 6083 goto out_unlock; 6084 } 6085 6086 /* The bpf program will not access the bpf map, but for the sake of 6087 * simplicity, increase sleepable_refcnt for sleepable program as well. 6088 */ 6089 if (prog->sleepable) 6090 atomic64_inc(&map->sleepable_refcnt); 6091 memcpy(used_maps_new, used_maps_old, 6092 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 6093 used_maps_new[prog->aux->used_map_cnt] = map; 6094 6095 prog->aux->used_map_cnt++; 6096 prog->aux->used_maps = used_maps_new; 6097 6098 kfree(used_maps_old); 6099 6100 out_unlock: 6101 mutex_unlock(&prog->aux->used_maps_mutex); 6102 6103 if (ret) 6104 bpf_map_put(map); 6105 out_prog_put: 6106 bpf_prog_put(prog); 6107 return ret; 6108 } 6109 6110 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd 6111 6112 static int token_create(union bpf_attr *attr) 6113 { 6114 if (CHECK_ATTR(BPF_TOKEN_CREATE)) 6115 return -EINVAL; 6116 6117 /* no flags are supported yet */ 6118 if (attr->token_create.flags) 6119 return -EINVAL; 6120 6121 return bpf_token_create(attr); 6122 } 6123 6124 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd 6125 6126 static int prog_stream_read(union bpf_attr *attr) 6127 { 6128 char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); 6129 u32 len = attr->prog_stream_read.stream_buf_len; 6130 struct bpf_prog *prog; 6131 int ret; 6132 6133 if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) 6134 return -EINVAL; 6135 6136 prog = bpf_prog_get(attr->prog_stream_read.prog_fd); 6137 if (IS_ERR(prog)) 6138 return PTR_ERR(prog); 6139 6140 ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); 6141 bpf_prog_put(prog); 6142 6143 return ret; 6144 } 6145 6146 #define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd 6147 6148 static int prog_assoc_struct_ops(union bpf_attr *attr) 6149 { 6150 struct bpf_prog *prog; 6151 struct bpf_map *map; 6152 int ret; 6153 6154 if (CHECK_ATTR(BPF_PROG_ASSOC_STRUCT_OPS)) 6155 return -EINVAL; 6156 6157 if (attr->prog_assoc_struct_ops.flags) 6158 return -EINVAL; 6159 6160 prog = bpf_prog_get(attr->prog_assoc_struct_ops.prog_fd); 6161 if (IS_ERR(prog)) 6162 return PTR_ERR(prog); 6163 6164 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { 6165 ret = -EINVAL; 6166 goto put_prog; 6167 } 6168 6169 map = bpf_map_get(attr->prog_assoc_struct_ops.map_fd); 6170 if (IS_ERR(map)) { 6171 ret = PTR_ERR(map); 6172 goto put_prog; 6173 } 6174 6175 if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS) { 6176 ret = -EINVAL; 6177 goto put_map; 6178 } 6179 6180 ret = bpf_prog_assoc_struct_ops(prog, map); 6181 6182 put_map: 6183 bpf_map_put(map); 6184 put_prog: 6185 bpf_prog_put(prog); 6186 return ret; 6187 } 6188 6189 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) 6190 { 6191 union bpf_attr attr; 6192 int err; 6193 6194 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 6195 if (err) 6196 return err; 6197 size = min_t(u32, size, sizeof(attr)); 6198 6199 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 6200 memset(&attr, 0, sizeof(attr)); 6201 if (copy_from_bpfptr(&attr, uattr, size) != 0) 6202 return -EFAULT; 6203 6204 err = security_bpf(cmd, &attr, size, uattr.is_kernel); 6205 if (err < 0) 6206 return err; 6207 6208 switch (cmd) { 6209 case BPF_MAP_CREATE: 6210 err = map_create(&attr, uattr); 6211 break; 6212 case BPF_MAP_LOOKUP_ELEM: 6213 err = map_lookup_elem(&attr); 6214 break; 6215 case BPF_MAP_UPDATE_ELEM: 6216 err = map_update_elem(&attr, uattr); 6217 break; 6218 case BPF_MAP_DELETE_ELEM: 6219 err = map_delete_elem(&attr, uattr); 6220 break; 6221 case BPF_MAP_GET_NEXT_KEY: 6222 err = map_get_next_key(&attr); 6223 break; 6224 case BPF_MAP_FREEZE: 6225 err = map_freeze(&attr); 6226 break; 6227 case BPF_PROG_LOAD: 6228 err = bpf_prog_load(&attr, uattr, size); 6229 break; 6230 case BPF_OBJ_PIN: 6231 err = bpf_obj_pin(&attr); 6232 break; 6233 case BPF_OBJ_GET: 6234 err = bpf_obj_get(&attr); 6235 break; 6236 case BPF_PROG_ATTACH: 6237 err = bpf_prog_attach(&attr); 6238 break; 6239 case BPF_PROG_DETACH: 6240 err = bpf_prog_detach(&attr); 6241 break; 6242 case BPF_PROG_QUERY: 6243 err = bpf_prog_query(&attr, uattr.user); 6244 break; 6245 case BPF_PROG_TEST_RUN: 6246 err = bpf_prog_test_run(&attr, uattr.user); 6247 break; 6248 case BPF_PROG_GET_NEXT_ID: 6249 err = bpf_obj_get_next_id(&attr, uattr.user, 6250 &prog_idr, &prog_idr_lock); 6251 break; 6252 case BPF_MAP_GET_NEXT_ID: 6253 err = bpf_obj_get_next_id(&attr, uattr.user, 6254 &map_idr, &map_idr_lock); 6255 break; 6256 case BPF_BTF_GET_NEXT_ID: 6257 err = bpf_obj_get_next_id(&attr, uattr.user, 6258 &btf_idr, &btf_idr_lock); 6259 break; 6260 case BPF_PROG_GET_FD_BY_ID: 6261 err = bpf_prog_get_fd_by_id(&attr); 6262 break; 6263 case BPF_MAP_GET_FD_BY_ID: 6264 err = bpf_map_get_fd_by_id(&attr); 6265 break; 6266 case BPF_OBJ_GET_INFO_BY_FD: 6267 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 6268 break; 6269 case BPF_RAW_TRACEPOINT_OPEN: 6270 err = bpf_raw_tracepoint_open(&attr); 6271 break; 6272 case BPF_BTF_LOAD: 6273 err = bpf_btf_load(&attr, uattr, size); 6274 break; 6275 case BPF_BTF_GET_FD_BY_ID: 6276 err = bpf_btf_get_fd_by_id(&attr); 6277 break; 6278 case BPF_TASK_FD_QUERY: 6279 err = bpf_task_fd_query(&attr, uattr.user); 6280 break; 6281 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 6282 err = map_lookup_and_delete_elem(&attr); 6283 break; 6284 case BPF_MAP_LOOKUP_BATCH: 6285 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 6286 break; 6287 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 6288 err = bpf_map_do_batch(&attr, uattr.user, 6289 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 6290 break; 6291 case BPF_MAP_UPDATE_BATCH: 6292 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 6293 break; 6294 case BPF_MAP_DELETE_BATCH: 6295 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 6296 break; 6297 case BPF_LINK_CREATE: 6298 err = link_create(&attr, uattr); 6299 break; 6300 case BPF_LINK_UPDATE: 6301 err = link_update(&attr); 6302 break; 6303 case BPF_LINK_GET_FD_BY_ID: 6304 err = bpf_link_get_fd_by_id(&attr); 6305 break; 6306 case BPF_LINK_GET_NEXT_ID: 6307 err = bpf_obj_get_next_id(&attr, uattr.user, 6308 &link_idr, &link_idr_lock); 6309 break; 6310 case BPF_ENABLE_STATS: 6311 err = bpf_enable_stats(&attr); 6312 break; 6313 case BPF_ITER_CREATE: 6314 err = bpf_iter_create(&attr); 6315 break; 6316 case BPF_LINK_DETACH: 6317 err = link_detach(&attr); 6318 break; 6319 case BPF_PROG_BIND_MAP: 6320 err = bpf_prog_bind_map(&attr); 6321 break; 6322 case BPF_TOKEN_CREATE: 6323 err = token_create(&attr); 6324 break; 6325 case BPF_PROG_STREAM_READ_BY_FD: 6326 err = prog_stream_read(&attr); 6327 break; 6328 case BPF_PROG_ASSOC_STRUCT_OPS: 6329 err = prog_assoc_struct_ops(&attr); 6330 break; 6331 default: 6332 err = -EINVAL; 6333 break; 6334 } 6335 6336 return err; 6337 } 6338 6339 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 6340 { 6341 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 6342 } 6343 6344 static bool syscall_prog_is_valid_access(int off, int size, 6345 enum bpf_access_type type, 6346 const struct bpf_prog *prog, 6347 struct bpf_insn_access_aux *info) 6348 { 6349 if (off < 0 || off >= U16_MAX) 6350 return false; 6351 if (off % size != 0) 6352 return false; 6353 return true; 6354 } 6355 6356 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 6357 { 6358 switch (cmd) { 6359 case BPF_MAP_CREATE: 6360 case BPF_MAP_DELETE_ELEM: 6361 case BPF_MAP_UPDATE_ELEM: 6362 case BPF_MAP_FREEZE: 6363 case BPF_MAP_GET_FD_BY_ID: 6364 case BPF_PROG_LOAD: 6365 case BPF_BTF_LOAD: 6366 case BPF_LINK_CREATE: 6367 case BPF_RAW_TRACEPOINT_OPEN: 6368 break; 6369 default: 6370 return -EINVAL; 6371 } 6372 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 6373 } 6374 6375 6376 /* To shut up -Wmissing-prototypes. 6377 * This function is used by the kernel light skeleton 6378 * to load bpf programs when modules are loaded or during kernel boot. 6379 * See tools/lib/bpf/skel_internal.h 6380 */ 6381 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 6382 6383 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 6384 { 6385 struct bpf_prog * __maybe_unused prog; 6386 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 6387 6388 switch (cmd) { 6389 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 6390 case BPF_PROG_TEST_RUN: 6391 if (attr->test.data_in || attr->test.data_out || 6392 attr->test.ctx_out || attr->test.duration || 6393 attr->test.repeat || attr->test.flags) 6394 return -EINVAL; 6395 6396 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 6397 if (IS_ERR(prog)) 6398 return PTR_ERR(prog); 6399 6400 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 6401 attr->test.ctx_size_in > U16_MAX) { 6402 bpf_prog_put(prog); 6403 return -EINVAL; 6404 } 6405 6406 run_ctx.bpf_cookie = 0; 6407 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 6408 /* recursion detected */ 6409 __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx); 6410 bpf_prog_put(prog); 6411 return -EBUSY; 6412 } 6413 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 6414 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 6415 &run_ctx); 6416 bpf_prog_put(prog); 6417 return 0; 6418 #endif 6419 default: 6420 return ____bpf_sys_bpf(cmd, attr, size); 6421 } 6422 } 6423 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); 6424 6425 static const struct bpf_func_proto bpf_sys_bpf_proto = { 6426 .func = bpf_sys_bpf, 6427 .gpl_only = false, 6428 .ret_type = RET_INTEGER, 6429 .arg1_type = ARG_ANYTHING, 6430 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6431 .arg3_type = ARG_CONST_SIZE, 6432 }; 6433 6434 const struct bpf_func_proto * __weak 6435 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6436 { 6437 return bpf_base_func_proto(func_id, prog); 6438 } 6439 6440 BPF_CALL_1(bpf_sys_close, u32, fd) 6441 { 6442 /* When bpf program calls this helper there should not be 6443 * an fdget() without matching completed fdput(). 6444 * This helper is allowed in the following callchain only: 6445 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 6446 */ 6447 return close_fd(fd); 6448 } 6449 6450 static const struct bpf_func_proto bpf_sys_close_proto = { 6451 .func = bpf_sys_close, 6452 .gpl_only = false, 6453 .ret_type = RET_INTEGER, 6454 .arg1_type = ARG_ANYTHING, 6455 }; 6456 6457 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 6458 { 6459 *res = 0; 6460 if (flags) 6461 return -EINVAL; 6462 6463 if (name_sz <= 1 || name[name_sz - 1]) 6464 return -EINVAL; 6465 6466 if (!bpf_dump_raw_ok(current_cred())) 6467 return -EPERM; 6468 6469 *res = kallsyms_lookup_name(name); 6470 return *res ? 0 : -ENOENT; 6471 } 6472 6473 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 6474 .func = bpf_kallsyms_lookup_name, 6475 .gpl_only = false, 6476 .ret_type = RET_INTEGER, 6477 .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6478 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 6479 .arg3_type = ARG_ANYTHING, 6480 .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 6481 .arg4_size = sizeof(u64), 6482 }; 6483 6484 static const struct bpf_func_proto * 6485 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6486 { 6487 switch (func_id) { 6488 case BPF_FUNC_sys_bpf: 6489 return !bpf_token_capable(prog->aux->token, CAP_PERFMON) 6490 ? NULL : &bpf_sys_bpf_proto; 6491 case BPF_FUNC_btf_find_by_name_kind: 6492 return &bpf_btf_find_by_name_kind_proto; 6493 case BPF_FUNC_sys_close: 6494 return &bpf_sys_close_proto; 6495 case BPF_FUNC_kallsyms_lookup_name: 6496 return &bpf_kallsyms_lookup_name_proto; 6497 default: 6498 return tracing_prog_func_proto(func_id, prog); 6499 } 6500 } 6501 6502 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 6503 .get_func_proto = syscall_prog_func_proto, 6504 .is_valid_access = syscall_prog_is_valid_access, 6505 }; 6506 6507 const struct bpf_prog_ops bpf_syscall_prog_ops = { 6508 .test_run = bpf_prog_test_run_syscall, 6509 }; 6510 6511 #ifdef CONFIG_SYSCTL 6512 static int bpf_stats_handler(const struct ctl_table *table, int write, 6513 void *buffer, size_t *lenp, loff_t *ppos) 6514 { 6515 struct static_key *key = (struct static_key *)table->data; 6516 static int saved_val; 6517 int val, ret; 6518 struct ctl_table tmp = { 6519 .data = &val, 6520 .maxlen = sizeof(val), 6521 .mode = table->mode, 6522 .extra1 = SYSCTL_ZERO, 6523 .extra2 = SYSCTL_ONE, 6524 }; 6525 6526 if (write && !capable(CAP_SYS_ADMIN)) 6527 return -EPERM; 6528 6529 mutex_lock(&bpf_stats_enabled_mutex); 6530 val = saved_val; 6531 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6532 if (write && !ret && val != saved_val) { 6533 if (val) 6534 static_key_slow_inc(key); 6535 else 6536 static_key_slow_dec(key); 6537 saved_val = val; 6538 } 6539 mutex_unlock(&bpf_stats_enabled_mutex); 6540 return ret; 6541 } 6542 6543 void __weak unpriv_ebpf_notify(int new_state) 6544 { 6545 } 6546 6547 static int bpf_unpriv_handler(const struct ctl_table *table, int write, 6548 void *buffer, size_t *lenp, loff_t *ppos) 6549 { 6550 int ret, unpriv_enable = *(int *)table->data; 6551 bool locked_state = unpriv_enable == 1; 6552 struct ctl_table tmp = *table; 6553 6554 if (write && !capable(CAP_SYS_ADMIN)) 6555 return -EPERM; 6556 6557 tmp.data = &unpriv_enable; 6558 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6559 if (write && !ret) { 6560 if (locked_state && unpriv_enable != 1) 6561 return -EPERM; 6562 *(int *)table->data = unpriv_enable; 6563 } 6564 6565 if (write) 6566 unpriv_ebpf_notify(unpriv_enable); 6567 6568 return ret; 6569 } 6570 6571 static const struct ctl_table bpf_syscall_table[] = { 6572 { 6573 .procname = "unprivileged_bpf_disabled", 6574 .data = &sysctl_unprivileged_bpf_disabled, 6575 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 6576 .mode = 0644, 6577 .proc_handler = bpf_unpriv_handler, 6578 .extra1 = SYSCTL_ZERO, 6579 .extra2 = SYSCTL_TWO, 6580 }, 6581 { 6582 .procname = "bpf_stats_enabled", 6583 .data = &bpf_stats_enabled_key.key, 6584 .mode = 0644, 6585 .proc_handler = bpf_stats_handler, 6586 }, 6587 }; 6588 6589 static int __init bpf_syscall_sysctl_init(void) 6590 { 6591 register_sysctl_init("kernel", bpf_syscall_table); 6592 return 0; 6593 } 6594 late_initcall(bpf_syscall_sysctl_init); 6595 #endif /* CONFIG_SYSCTL */ 6596