1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <crypto/sha2.h> 5 #include <linux/bpf.h> 6 #include <linux/bpf-cgroup.h> 7 #include <linux/bpf_trace.h> 8 #include <linux/bpf_lirc.h> 9 #include <linux/bpf_verifier.h> 10 #include <linux/bsearch.h> 11 #include <linux/btf.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched/signal.h> 15 #include <linux/vmalloc.h> 16 #include <linux/mmzone.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/fdtable.h> 19 #include <linux/file.h> 20 #include <linux/fs.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/kernel.h> 24 #include <linux/idr.h> 25 #include <linux/cred.h> 26 #include <linux/timekeeping.h> 27 #include <linux/ctype.h> 28 #include <linux/nospec.h> 29 #include <linux/audit.h> 30 #include <uapi/linux/btf.h> 31 #include <linux/pgtable.h> 32 #include <linux/bpf_lsm.h> 33 #include <linux/poll.h> 34 #include <linux/sort.h> 35 #include <linux/bpf-netns.h> 36 #include <linux/rcupdate_trace.h> 37 #include <linux/memcontrol.h> 38 #include <linux/trace_events.h> 39 #include <linux/tracepoint.h> 40 #include <linux/overflow.h> 41 #include <linux/cookie.h> 42 #include <linux/verification.h> 43 44 #include <net/netfilter/nf_bpf_link.h> 45 #include <net/netkit.h> 46 #include <net/tcx.h> 47 48 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 49 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 50 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 51 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 52 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 53 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 54 IS_FD_HASH(map)) 55 56 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 57 58 DEFINE_PER_CPU(int, bpf_prog_active); 59 DEFINE_COOKIE(bpf_map_cookie); 60 static DEFINE_IDR(prog_idr); 61 static DEFINE_SPINLOCK(prog_idr_lock); 62 static DEFINE_IDR(map_idr); 63 static DEFINE_SPINLOCK(map_idr_lock); 64 static DEFINE_IDR(link_idr); 65 static DEFINE_SPINLOCK(link_idr_lock); 66 67 int sysctl_unprivileged_bpf_disabled __read_mostly = 68 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 69 70 static const struct bpf_map_ops * const bpf_map_types[] = { 71 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 72 #define BPF_MAP_TYPE(_id, _ops) \ 73 [_id] = &_ops, 74 #define BPF_LINK_TYPE(_id, _name) 75 #include <linux/bpf_types.h> 76 #undef BPF_PROG_TYPE 77 #undef BPF_MAP_TYPE 78 #undef BPF_LINK_TYPE 79 }; 80 81 /* 82 * If we're handed a bigger struct than we know of, ensure all the unknown bits 83 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 84 * we don't know about yet. 85 * 86 * There is a ToCToU between this function call and the following 87 * copy_from_user() call. However, this is not a concern since this function is 88 * meant to be a future-proofing of bits. 89 */ 90 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 91 size_t expected_size, 92 size_t actual_size) 93 { 94 int res; 95 96 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 97 return -E2BIG; 98 99 if (actual_size <= expected_size) 100 return 0; 101 102 if (uaddr.is_kernel) 103 res = memchr_inv(uaddr.kernel + expected_size, 0, 104 actual_size - expected_size) == NULL; 105 else 106 res = check_zeroed_user(uaddr.user + expected_size, 107 actual_size - expected_size); 108 if (res < 0) 109 return res; 110 return res ? 0 : -E2BIG; 111 } 112 113 const struct bpf_map_ops bpf_map_offload_ops = { 114 .map_meta_equal = bpf_map_meta_equal, 115 .map_alloc = bpf_map_offload_map_alloc, 116 .map_free = bpf_map_offload_map_free, 117 .map_check_btf = map_check_no_btf, 118 .map_mem_usage = bpf_map_offload_map_mem_usage, 119 }; 120 121 static void bpf_map_write_active_inc(struct bpf_map *map) 122 { 123 atomic64_inc(&map->writecnt); 124 } 125 126 static void bpf_map_write_active_dec(struct bpf_map *map) 127 { 128 atomic64_dec(&map->writecnt); 129 } 130 131 bool bpf_map_write_active(const struct bpf_map *map) 132 { 133 return atomic64_read(&map->writecnt) != 0; 134 } 135 136 static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) 137 { 138 if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) 139 return map->value_size; 140 else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 141 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 142 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 143 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 144 return round_up(map->value_size, 8) * num_possible_cpus(); 145 else if (IS_FD_MAP(map)) 146 return sizeof(u32); 147 else 148 return map->value_size; 149 } 150 151 static void maybe_wait_bpf_programs(struct bpf_map *map) 152 { 153 /* Wait for any running non-sleepable BPF programs to complete so that 154 * userspace, when we return to it, knows that all non-sleepable 155 * programs that could be running use the new map value. For sleepable 156 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait 157 * for the completions of these programs, but considering the waiting 158 * time can be very long and userspace may think it will hang forever, 159 * so don't handle sleepable BPF programs now. 160 */ 161 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 162 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 163 synchronize_rcu_expedited(); 164 } 165 166 static void unpin_uptr_kaddr(void *kaddr) 167 { 168 if (kaddr) 169 unpin_user_page(virt_to_page(kaddr)); 170 } 171 172 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj) 173 { 174 const struct btf_field *field; 175 void **uptr_addr; 176 int i; 177 178 for (i = 0, field = rec->fields; i < cnt; i++, field++) { 179 if (field->type != BPF_UPTR) 180 continue; 181 182 uptr_addr = obj + field->offset; 183 unpin_uptr_kaddr(*uptr_addr); 184 } 185 } 186 187 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj) 188 { 189 if (!btf_record_has_field(rec, BPF_UPTR)) 190 return; 191 192 __bpf_obj_unpin_uptrs(rec, rec->cnt, obj); 193 } 194 195 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj) 196 { 197 const struct btf_field *field; 198 const struct btf_type *t; 199 unsigned long start, end; 200 struct page *page; 201 void **uptr_addr; 202 int i, err; 203 204 if (!btf_record_has_field(rec, BPF_UPTR)) 205 return 0; 206 207 for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { 208 if (field->type != BPF_UPTR) 209 continue; 210 211 uptr_addr = obj + field->offset; 212 start = *(unsigned long *)uptr_addr; 213 if (!start) 214 continue; 215 216 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 217 /* t->size was checked for zero before */ 218 if (check_add_overflow(start, t->size - 1, &end)) { 219 err = -EFAULT; 220 goto unpin_all; 221 } 222 223 /* The uptr's struct cannot span across two pages */ 224 if ((start & PAGE_MASK) != (end & PAGE_MASK)) { 225 err = -EOPNOTSUPP; 226 goto unpin_all; 227 } 228 229 err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page); 230 if (err != 1) 231 goto unpin_all; 232 233 if (PageHighMem(page)) { 234 err = -EOPNOTSUPP; 235 unpin_user_page(page); 236 goto unpin_all; 237 } 238 239 *uptr_addr = page_address(page) + offset_in_page(start); 240 } 241 242 return 0; 243 244 unpin_all: 245 __bpf_obj_unpin_uptrs(rec, i, obj); 246 return err; 247 } 248 249 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 250 void *key, void *value, __u64 flags) 251 { 252 int err; 253 254 /* Need to create a kthread, thus must support schedule */ 255 if (bpf_map_is_offloaded(map)) { 256 return bpf_map_offload_update_elem(map, key, value, flags); 257 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 258 map->map_type == BPF_MAP_TYPE_ARENA || 259 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 260 return map->ops->map_update_elem(map, key, value, flags); 261 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 262 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 263 return sock_map_update_elem_sys(map, key, value, flags); 264 } else if (IS_FD_PROG_ARRAY(map)) { 265 return bpf_fd_array_map_update_elem(map, map_file, key, value, 266 flags); 267 } 268 269 bpf_disable_instrumentation(); 270 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 271 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 272 err = bpf_percpu_hash_update(map, key, value, flags); 273 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 274 err = bpf_percpu_array_update(map, key, value, flags); 275 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 276 err = bpf_percpu_cgroup_storage_update(map, key, value, 277 flags); 278 } else if (IS_FD_ARRAY(map)) { 279 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 280 flags); 281 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 282 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 283 flags); 284 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 285 /* rcu_read_lock() is not needed */ 286 err = bpf_fd_reuseport_array_update_elem(map, key, value, 287 flags); 288 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 289 map->map_type == BPF_MAP_TYPE_STACK || 290 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 291 err = map->ops->map_push_elem(map, value, flags); 292 } else { 293 err = bpf_obj_pin_uptrs(map->record, value); 294 if (!err) { 295 rcu_read_lock(); 296 err = map->ops->map_update_elem(map, key, value, flags); 297 rcu_read_unlock(); 298 if (err) 299 bpf_obj_unpin_uptrs(map->record, value); 300 } 301 } 302 bpf_enable_instrumentation(); 303 304 return err; 305 } 306 307 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 308 __u64 flags) 309 { 310 void *ptr; 311 int err; 312 313 if (bpf_map_is_offloaded(map)) 314 return bpf_map_offload_lookup_elem(map, key, value); 315 316 bpf_disable_instrumentation(); 317 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 318 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 319 err = bpf_percpu_hash_copy(map, key, value, flags); 320 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 321 err = bpf_percpu_array_copy(map, key, value, flags); 322 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 323 err = bpf_percpu_cgroup_storage_copy(map, key, value, flags); 324 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 325 err = bpf_stackmap_extract(map, key, value, false); 326 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 327 err = bpf_fd_array_map_lookup_elem(map, key, value); 328 } else if (IS_FD_HASH(map)) { 329 err = bpf_fd_htab_map_lookup_elem(map, key, value); 330 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 331 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 332 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 333 map->map_type == BPF_MAP_TYPE_STACK || 334 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 335 err = map->ops->map_peek_elem(map, value); 336 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 337 /* struct_ops map requires directly updating "value" */ 338 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 339 } else { 340 rcu_read_lock(); 341 if (map->ops->map_lookup_elem_sys_only) 342 ptr = map->ops->map_lookup_elem_sys_only(map, key); 343 else 344 ptr = map->ops->map_lookup_elem(map, key); 345 if (IS_ERR(ptr)) { 346 err = PTR_ERR(ptr); 347 } else if (!ptr) { 348 err = -ENOENT; 349 } else { 350 err = 0; 351 if (flags & BPF_F_LOCK) 352 /* lock 'ptr' and copy everything but lock */ 353 copy_map_value_locked(map, value, ptr, true); 354 else 355 copy_map_value(map, value, ptr); 356 /* mask lock and timer, since value wasn't zero inited */ 357 check_and_init_map_value(map, value); 358 } 359 rcu_read_unlock(); 360 } 361 362 bpf_enable_instrumentation(); 363 364 return err; 365 } 366 367 /* Please, do not use this function outside from the map creation path 368 * (e.g. in map update path) without taking care of setting the active 369 * memory cgroup (see at bpf_map_kmalloc_node() for example). 370 */ 371 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 372 { 373 /* We really just want to fail instead of triggering OOM killer 374 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 375 * which is used for lower order allocation requests. 376 * 377 * It has been observed that higher order allocation requests done by 378 * vmalloc with __GFP_NORETRY being set might fail due to not trying 379 * to reclaim memory from the page cache, thus we set 380 * __GFP_RETRY_MAYFAIL to avoid such situations. 381 */ 382 383 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 384 unsigned int flags = 0; 385 unsigned long align = 1; 386 void *area; 387 388 if (size >= SIZE_MAX) 389 return NULL; 390 391 /* kmalloc()'ed memory can't be mmap()'ed */ 392 if (mmapable) { 393 BUG_ON(!PAGE_ALIGNED(size)); 394 align = SHMLBA; 395 flags = VM_USERMAP; 396 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 397 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 398 numa_node); 399 if (area != NULL) 400 return area; 401 } 402 403 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 404 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 405 flags, numa_node, __builtin_return_address(0)); 406 } 407 408 void *bpf_map_area_alloc(u64 size, int numa_node) 409 { 410 return __bpf_map_area_alloc(size, numa_node, false); 411 } 412 413 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 414 { 415 return __bpf_map_area_alloc(size, numa_node, true); 416 } 417 418 void bpf_map_area_free(void *area) 419 { 420 kvfree(area); 421 } 422 423 static u32 bpf_map_flags_retain_permanent(u32 flags) 424 { 425 /* Some map creation flags are not tied to the map object but 426 * rather to the map fd instead, so they have no meaning upon 427 * map object inspection since multiple file descriptors with 428 * different (access) properties can exist here. Thus, given 429 * this has zero meaning for the map itself, lets clear these 430 * from here. 431 */ 432 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 433 } 434 435 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 436 { 437 map->map_type = attr->map_type; 438 map->key_size = attr->key_size; 439 map->value_size = attr->value_size; 440 map->max_entries = attr->max_entries; 441 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 442 map->numa_node = bpf_map_attr_numa_node(attr); 443 map->map_extra = attr->map_extra; 444 } 445 446 static int bpf_map_alloc_id(struct bpf_map *map) 447 { 448 int id; 449 450 idr_preload(GFP_KERNEL); 451 spin_lock_bh(&map_idr_lock); 452 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 453 if (id > 0) 454 map->id = id; 455 spin_unlock_bh(&map_idr_lock); 456 idr_preload_end(); 457 458 if (WARN_ON_ONCE(!id)) 459 return -ENOSPC; 460 461 return id > 0 ? 0 : id; 462 } 463 464 void bpf_map_free_id(struct bpf_map *map) 465 { 466 unsigned long flags; 467 468 /* Offloaded maps are removed from the IDR store when their device 469 * disappears - even if someone holds an fd to them they are unusable, 470 * the memory is gone, all ops will fail; they are simply waiting for 471 * refcnt to drop to be freed. 472 */ 473 if (!map->id) 474 return; 475 476 spin_lock_irqsave(&map_idr_lock, flags); 477 478 idr_remove(&map_idr, map->id); 479 map->id = 0; 480 481 spin_unlock_irqrestore(&map_idr_lock, flags); 482 } 483 484 #ifdef CONFIG_MEMCG 485 static void bpf_map_save_memcg(struct bpf_map *map) 486 { 487 /* Currently if a map is created by a process belonging to the root 488 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 489 * So we have to check map->objcg for being NULL each time it's 490 * being used. 491 */ 492 if (memcg_bpf_enabled()) 493 map->objcg = get_obj_cgroup_from_current(); 494 } 495 496 static void bpf_map_release_memcg(struct bpf_map *map) 497 { 498 if (map->objcg) 499 obj_cgroup_put(map->objcg); 500 } 501 502 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 503 { 504 if (map->objcg) 505 return get_mem_cgroup_from_objcg(map->objcg); 506 507 return root_mem_cgroup; 508 } 509 510 void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, 511 struct mem_cgroup **new_memcg) 512 { 513 *new_memcg = bpf_map_get_memcg(map); 514 *old_memcg = set_active_memcg(*new_memcg); 515 } 516 517 void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, 518 struct mem_cgroup *new_memcg) 519 { 520 set_active_memcg(old_memcg); 521 mem_cgroup_put(new_memcg); 522 } 523 524 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 525 int node) 526 { 527 struct mem_cgroup *memcg, *old_memcg; 528 void *ptr; 529 530 bpf_map_memcg_enter(map, &old_memcg, &memcg); 531 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 532 bpf_map_memcg_exit(old_memcg, memcg); 533 534 return ptr; 535 } 536 537 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 538 int node) 539 { 540 struct mem_cgroup *memcg, *old_memcg; 541 void *ptr; 542 543 bpf_map_memcg_enter(map, &old_memcg, &memcg); 544 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 545 bpf_map_memcg_exit(old_memcg, memcg); 546 547 return ptr; 548 } 549 550 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 551 { 552 struct mem_cgroup *memcg, *old_memcg; 553 void *ptr; 554 555 bpf_map_memcg_enter(map, &old_memcg, &memcg); 556 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 557 bpf_map_memcg_exit(old_memcg, memcg); 558 559 return ptr; 560 } 561 562 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 563 gfp_t flags) 564 { 565 struct mem_cgroup *memcg, *old_memcg; 566 void *ptr; 567 568 bpf_map_memcg_enter(map, &old_memcg, &memcg); 569 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 570 bpf_map_memcg_exit(old_memcg, memcg); 571 572 return ptr; 573 } 574 575 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 576 size_t align, gfp_t flags) 577 { 578 struct mem_cgroup *memcg, *old_memcg; 579 void __percpu *ptr; 580 581 bpf_map_memcg_enter(map, &old_memcg, &memcg); 582 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 583 bpf_map_memcg_exit(old_memcg, memcg); 584 585 return ptr; 586 } 587 588 #else 589 static void bpf_map_save_memcg(struct bpf_map *map) 590 { 591 } 592 593 static void bpf_map_release_memcg(struct bpf_map *map) 594 { 595 } 596 #endif 597 598 static bool can_alloc_pages(void) 599 { 600 return preempt_count() == 0 && !irqs_disabled() && 601 !IS_ENABLED(CONFIG_PREEMPT_RT); 602 } 603 604 static struct page *__bpf_alloc_page(int nid) 605 { 606 if (!can_alloc_pages()) 607 return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); 608 609 return alloc_pages_node(nid, 610 GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT 611 | __GFP_NOWARN, 612 0); 613 } 614 615 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 616 unsigned long nr_pages, struct page **pages) 617 { 618 unsigned long i, j; 619 struct page *pg; 620 int ret = 0; 621 622 for (i = 0; i < nr_pages; i++) { 623 pg = __bpf_alloc_page(nid); 624 625 if (pg) { 626 pages[i] = pg; 627 continue; 628 } 629 for (j = 0; j < i; j++) 630 free_pages_nolock(pages[j], 0); 631 ret = -ENOMEM; 632 break; 633 } 634 635 return ret; 636 } 637 638 639 static int btf_field_cmp(const void *a, const void *b) 640 { 641 const struct btf_field *f1 = a, *f2 = b; 642 643 if (f1->offset < f2->offset) 644 return -1; 645 else if (f1->offset > f2->offset) 646 return 1; 647 return 0; 648 } 649 650 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 651 u32 field_mask) 652 { 653 struct btf_field *field; 654 655 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 656 return NULL; 657 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 658 if (!field || !(field->type & field_mask)) 659 return NULL; 660 return field; 661 } 662 663 void btf_record_free(struct btf_record *rec) 664 { 665 int i; 666 667 if (IS_ERR_OR_NULL(rec)) 668 return; 669 for (i = 0; i < rec->cnt; i++) { 670 switch (rec->fields[i].type) { 671 case BPF_KPTR_UNREF: 672 case BPF_KPTR_REF: 673 case BPF_KPTR_PERCPU: 674 case BPF_UPTR: 675 if (rec->fields[i].kptr.module) 676 module_put(rec->fields[i].kptr.module); 677 if (btf_is_kernel(rec->fields[i].kptr.btf)) 678 btf_put(rec->fields[i].kptr.btf); 679 break; 680 case BPF_LIST_HEAD: 681 case BPF_LIST_NODE: 682 case BPF_RB_ROOT: 683 case BPF_RB_NODE: 684 case BPF_SPIN_LOCK: 685 case BPF_RES_SPIN_LOCK: 686 case BPF_TIMER: 687 case BPF_REFCOUNT: 688 case BPF_WORKQUEUE: 689 case BPF_TASK_WORK: 690 /* Nothing to release */ 691 break; 692 default: 693 WARN_ON_ONCE(1); 694 continue; 695 } 696 } 697 kfree(rec); 698 } 699 700 void bpf_map_free_record(struct bpf_map *map) 701 { 702 btf_record_free(map->record); 703 map->record = NULL; 704 } 705 706 struct btf_record *btf_record_dup(const struct btf_record *rec) 707 { 708 const struct btf_field *fields; 709 struct btf_record *new_rec; 710 int ret, size, i; 711 712 if (IS_ERR_OR_NULL(rec)) 713 return NULL; 714 size = struct_size(rec, fields, rec->cnt); 715 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 716 if (!new_rec) 717 return ERR_PTR(-ENOMEM); 718 /* Do a deep copy of the btf_record */ 719 fields = rec->fields; 720 new_rec->cnt = 0; 721 for (i = 0; i < rec->cnt; i++) { 722 switch (fields[i].type) { 723 case BPF_KPTR_UNREF: 724 case BPF_KPTR_REF: 725 case BPF_KPTR_PERCPU: 726 case BPF_UPTR: 727 if (btf_is_kernel(fields[i].kptr.btf)) 728 btf_get(fields[i].kptr.btf); 729 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 730 ret = -ENXIO; 731 goto free; 732 } 733 break; 734 case BPF_LIST_HEAD: 735 case BPF_LIST_NODE: 736 case BPF_RB_ROOT: 737 case BPF_RB_NODE: 738 case BPF_SPIN_LOCK: 739 case BPF_RES_SPIN_LOCK: 740 case BPF_TIMER: 741 case BPF_REFCOUNT: 742 case BPF_WORKQUEUE: 743 case BPF_TASK_WORK: 744 /* Nothing to acquire */ 745 break; 746 default: 747 ret = -EFAULT; 748 WARN_ON_ONCE(1); 749 goto free; 750 } 751 new_rec->cnt++; 752 } 753 return new_rec; 754 free: 755 btf_record_free(new_rec); 756 return ERR_PTR(ret); 757 } 758 759 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 760 { 761 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 762 int size; 763 764 if (!a_has_fields && !b_has_fields) 765 return true; 766 if (a_has_fields != b_has_fields) 767 return false; 768 if (rec_a->cnt != rec_b->cnt) 769 return false; 770 size = struct_size(rec_a, fields, rec_a->cnt); 771 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 772 * members are zeroed out. So memcmp is safe to do without worrying 773 * about padding/unused fields. 774 * 775 * While spin_lock, timer, and kptr have no relation to map BTF, 776 * list_head metadata is specific to map BTF, the btf and value_rec 777 * members in particular. btf is the map BTF, while value_rec points to 778 * btf_record in that map BTF. 779 * 780 * So while by default, we don't rely on the map BTF (which the records 781 * were parsed from) matching for both records, which is not backwards 782 * compatible, in case list_head is part of it, we implicitly rely on 783 * that by way of depending on memcmp succeeding for it. 784 */ 785 return !memcmp(rec_a, rec_b, size); 786 } 787 788 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 789 { 790 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 791 return; 792 bpf_timer_cancel_and_free(obj + rec->timer_off); 793 } 794 795 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) 796 { 797 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) 798 return; 799 bpf_wq_cancel_and_free(obj + rec->wq_off); 800 } 801 802 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) 803 { 804 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) 805 return; 806 bpf_task_work_cancel_and_free(obj + rec->task_work_off); 807 } 808 809 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 810 { 811 const struct btf_field *fields; 812 int i; 813 814 if (IS_ERR_OR_NULL(rec)) 815 return; 816 fields = rec->fields; 817 for (i = 0; i < rec->cnt; i++) { 818 struct btf_struct_meta *pointee_struct_meta; 819 const struct btf_field *field = &fields[i]; 820 void *field_ptr = obj + field->offset; 821 void *xchgd_field; 822 823 switch (fields[i].type) { 824 case BPF_SPIN_LOCK: 825 case BPF_RES_SPIN_LOCK: 826 break; 827 case BPF_TIMER: 828 bpf_timer_cancel_and_free(field_ptr); 829 break; 830 case BPF_WORKQUEUE: 831 bpf_wq_cancel_and_free(field_ptr); 832 break; 833 case BPF_TASK_WORK: 834 bpf_task_work_cancel_and_free(field_ptr); 835 break; 836 case BPF_KPTR_UNREF: 837 WRITE_ONCE(*(u64 *)field_ptr, 0); 838 break; 839 case BPF_KPTR_REF: 840 case BPF_KPTR_PERCPU: 841 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 842 if (!xchgd_field) 843 break; 844 845 if (!btf_is_kernel(field->kptr.btf)) { 846 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 847 field->kptr.btf_id); 848 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 849 pointee_struct_meta->record : NULL, 850 fields[i].type == BPF_KPTR_PERCPU); 851 } else { 852 field->kptr.dtor(xchgd_field); 853 } 854 break; 855 case BPF_UPTR: 856 /* The caller ensured that no one is using the uptr */ 857 unpin_uptr_kaddr(*(void **)field_ptr); 858 break; 859 case BPF_LIST_HEAD: 860 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 861 continue; 862 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 863 break; 864 case BPF_RB_ROOT: 865 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 866 continue; 867 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 868 break; 869 case BPF_LIST_NODE: 870 case BPF_RB_NODE: 871 case BPF_REFCOUNT: 872 break; 873 default: 874 WARN_ON_ONCE(1); 875 continue; 876 } 877 } 878 } 879 880 static void bpf_map_free(struct bpf_map *map) 881 { 882 struct btf_record *rec = map->record; 883 struct btf *btf = map->btf; 884 885 /* implementation dependent freeing. Disabling migration to simplify 886 * the free of values or special fields allocated from bpf memory 887 * allocator. 888 */ 889 kfree(map->excl_prog_sha); 890 migrate_disable(); 891 map->ops->map_free(map); 892 migrate_enable(); 893 894 /* Delay freeing of btf_record for maps, as map_free 895 * callback usually needs access to them. It is better to do it here 896 * than require each callback to do the free itself manually. 897 * 898 * Note that the btf_record stashed in map->inner_map_meta->record was 899 * already freed using the map_free callback for map in map case which 900 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 901 * template bpf_map struct used during verification. 902 */ 903 btf_record_free(rec); 904 /* Delay freeing of btf for maps, as map_free callback may need 905 * struct_meta info which will be freed with btf_put(). 906 */ 907 btf_put(btf); 908 } 909 910 /* called from workqueue */ 911 static void bpf_map_free_deferred(struct work_struct *work) 912 { 913 struct bpf_map *map = container_of(work, struct bpf_map, work); 914 915 security_bpf_map_free(map); 916 bpf_map_release_memcg(map); 917 bpf_map_owner_free(map); 918 bpf_map_free(map); 919 } 920 921 static void bpf_map_put_uref(struct bpf_map *map) 922 { 923 if (atomic64_dec_and_test(&map->usercnt)) { 924 if (map->ops->map_release_uref) 925 map->ops->map_release_uref(map); 926 } 927 } 928 929 static void bpf_map_free_in_work(struct bpf_map *map) 930 { 931 INIT_WORK(&map->work, bpf_map_free_deferred); 932 /* Avoid spawning kworkers, since they all might contend 933 * for the same mutex like slab_mutex. 934 */ 935 queue_work(system_dfl_wq, &map->work); 936 } 937 938 static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 939 { 940 bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 941 } 942 943 static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 944 { 945 if (rcu_trace_implies_rcu_gp()) 946 bpf_map_free_rcu_gp(rcu); 947 else 948 call_rcu(rcu, bpf_map_free_rcu_gp); 949 } 950 951 /* decrement map refcnt and schedule it for freeing via workqueue 952 * (underlying map implementation ops->map_free() might sleep) 953 */ 954 void bpf_map_put(struct bpf_map *map) 955 { 956 if (atomic64_dec_and_test(&map->refcnt)) { 957 /* bpf_map_free_id() must be called first */ 958 bpf_map_free_id(map); 959 960 WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 961 if (READ_ONCE(map->free_after_mult_rcu_gp)) 962 call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 963 else if (READ_ONCE(map->free_after_rcu_gp)) 964 call_rcu(&map->rcu, bpf_map_free_rcu_gp); 965 else 966 bpf_map_free_in_work(map); 967 } 968 } 969 EXPORT_SYMBOL_GPL(bpf_map_put); 970 971 void bpf_map_put_with_uref(struct bpf_map *map) 972 { 973 bpf_map_put_uref(map); 974 bpf_map_put(map); 975 } 976 977 static int bpf_map_release(struct inode *inode, struct file *filp) 978 { 979 struct bpf_map *map = filp->private_data; 980 981 if (map->ops->map_release) 982 map->ops->map_release(map, filp); 983 984 bpf_map_put_with_uref(map); 985 return 0; 986 } 987 988 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 989 { 990 fmode_t mode = fd_file(f)->f_mode; 991 992 /* Our file permissions may have been overridden by global 993 * map permissions facing syscall side. 994 */ 995 if (READ_ONCE(map->frozen)) 996 mode &= ~FMODE_CAN_WRITE; 997 return mode; 998 } 999 1000 #ifdef CONFIG_PROC_FS 1001 /* Show the memory usage of a bpf map */ 1002 static u64 bpf_map_memory_usage(const struct bpf_map *map) 1003 { 1004 return map->ops->map_mem_usage(map); 1005 } 1006 1007 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 1008 { 1009 struct bpf_map *map = filp->private_data; 1010 u32 type = 0, jited = 0; 1011 1012 spin_lock(&map->owner_lock); 1013 if (map->owner) { 1014 type = map->owner->type; 1015 jited = map->owner->jited; 1016 } 1017 spin_unlock(&map->owner_lock); 1018 1019 seq_printf(m, 1020 "map_type:\t%u\n" 1021 "key_size:\t%u\n" 1022 "value_size:\t%u\n" 1023 "max_entries:\t%u\n" 1024 "map_flags:\t%#x\n" 1025 "map_extra:\t%#llx\n" 1026 "memlock:\t%llu\n" 1027 "map_id:\t%u\n" 1028 "frozen:\t%u\n", 1029 map->map_type, 1030 map->key_size, 1031 map->value_size, 1032 map->max_entries, 1033 map->map_flags, 1034 (unsigned long long)map->map_extra, 1035 bpf_map_memory_usage(map), 1036 map->id, 1037 READ_ONCE(map->frozen)); 1038 if (type) { 1039 seq_printf(m, "owner_prog_type:\t%u\n", type); 1040 seq_printf(m, "owner_jited:\t%u\n", jited); 1041 } 1042 } 1043 #endif 1044 1045 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 1046 loff_t *ppos) 1047 { 1048 /* We need this handler such that alloc_file() enables 1049 * f_mode with FMODE_CAN_READ. 1050 */ 1051 return -EINVAL; 1052 } 1053 1054 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 1055 size_t siz, loff_t *ppos) 1056 { 1057 /* We need this handler such that alloc_file() enables 1058 * f_mode with FMODE_CAN_WRITE. 1059 */ 1060 return -EINVAL; 1061 } 1062 1063 /* called for any extra memory-mapped regions (except initial) */ 1064 static void bpf_map_mmap_open(struct vm_area_struct *vma) 1065 { 1066 struct bpf_map *map = vma->vm_file->private_data; 1067 1068 if (vma->vm_flags & VM_MAYWRITE) 1069 bpf_map_write_active_inc(map); 1070 } 1071 1072 /* called for all unmapped memory region (including initial) */ 1073 static void bpf_map_mmap_close(struct vm_area_struct *vma) 1074 { 1075 struct bpf_map *map = vma->vm_file->private_data; 1076 1077 if (vma->vm_flags & VM_MAYWRITE) 1078 bpf_map_write_active_dec(map); 1079 } 1080 1081 static const struct vm_operations_struct bpf_map_default_vmops = { 1082 .open = bpf_map_mmap_open, 1083 .close = bpf_map_mmap_close, 1084 }; 1085 1086 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 1087 { 1088 struct bpf_map *map = filp->private_data; 1089 int err = 0; 1090 1091 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 1092 return -ENOTSUPP; 1093 1094 if (!(vma->vm_flags & VM_SHARED)) 1095 return -EINVAL; 1096 1097 mutex_lock(&map->freeze_mutex); 1098 1099 if (vma->vm_flags & VM_WRITE) { 1100 if (map->frozen) { 1101 err = -EPERM; 1102 goto out; 1103 } 1104 /* map is meant to be read-only, so do not allow mapping as 1105 * writable, because it's possible to leak a writable page 1106 * reference and allows user-space to still modify it after 1107 * freezing, while verifier will assume contents do not change 1108 */ 1109 if (map->map_flags & BPF_F_RDONLY_PROG) { 1110 err = -EACCES; 1111 goto out; 1112 } 1113 bpf_map_write_active_inc(map); 1114 } 1115 out: 1116 mutex_unlock(&map->freeze_mutex); 1117 if (err) 1118 return err; 1119 1120 /* set default open/close callbacks */ 1121 vma->vm_ops = &bpf_map_default_vmops; 1122 vma->vm_private_data = map; 1123 vm_flags_clear(vma, VM_MAYEXEC); 1124 /* If mapping is read-only, then disallow potentially re-mapping with 1125 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing 1126 * means that as far as BPF map's memory-mapped VMAs are concerned, 1127 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, 1128 * both should be set, so we can forget about VM_MAYWRITE and always 1129 * check just VM_WRITE 1130 */ 1131 if (!(vma->vm_flags & VM_WRITE)) 1132 vm_flags_clear(vma, VM_MAYWRITE); 1133 1134 err = map->ops->map_mmap(map, vma); 1135 if (err) { 1136 if (vma->vm_flags & VM_WRITE) 1137 bpf_map_write_active_dec(map); 1138 } 1139 1140 return err; 1141 } 1142 1143 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 1144 { 1145 struct bpf_map *map = filp->private_data; 1146 1147 if (map->ops->map_poll) 1148 return map->ops->map_poll(map, filp, pts); 1149 1150 return EPOLLERR; 1151 } 1152 1153 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr, 1154 unsigned long len, unsigned long pgoff, 1155 unsigned long flags) 1156 { 1157 struct bpf_map *map = filp->private_data; 1158 1159 if (map->ops->map_get_unmapped_area) 1160 return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags); 1161 #ifdef CONFIG_MMU 1162 return mm_get_unmapped_area(filp, addr, len, pgoff, flags); 1163 #else 1164 return addr; 1165 #endif 1166 } 1167 1168 const struct file_operations bpf_map_fops = { 1169 #ifdef CONFIG_PROC_FS 1170 .show_fdinfo = bpf_map_show_fdinfo, 1171 #endif 1172 .release = bpf_map_release, 1173 .read = bpf_dummy_read, 1174 .write = bpf_dummy_write, 1175 .mmap = bpf_map_mmap, 1176 .poll = bpf_map_poll, 1177 .get_unmapped_area = bpf_get_unmapped_area, 1178 }; 1179 1180 int bpf_map_new_fd(struct bpf_map *map, int flags) 1181 { 1182 int ret; 1183 1184 ret = security_bpf_map(map, OPEN_FMODE(flags)); 1185 if (ret < 0) 1186 return ret; 1187 1188 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 1189 flags | O_CLOEXEC); 1190 } 1191 1192 int bpf_get_file_flag(int flags) 1193 { 1194 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 1195 return -EINVAL; 1196 if (flags & BPF_F_RDONLY) 1197 return O_RDONLY; 1198 if (flags & BPF_F_WRONLY) 1199 return O_WRONLY; 1200 return O_RDWR; 1201 } 1202 1203 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 1204 #define CHECK_ATTR(CMD) \ 1205 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 1206 sizeof(attr->CMD##_LAST_FIELD), 0, \ 1207 sizeof(*attr) - \ 1208 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 1209 sizeof(attr->CMD##_LAST_FIELD)) != NULL 1210 1211 /* dst and src must have at least "size" number of bytes. 1212 * Return strlen on success and < 0 on error. 1213 */ 1214 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 1215 { 1216 const char *end = src + size; 1217 const char *orig_src = src; 1218 1219 memset(dst, 0, size); 1220 /* Copy all isalnum(), '_' and '.' chars. */ 1221 while (src < end && *src) { 1222 if (!isalnum(*src) && 1223 *src != '_' && *src != '.') 1224 return -EINVAL; 1225 *dst++ = *src++; 1226 } 1227 1228 /* No '\0' found in "size" number of bytes */ 1229 if (src == end) 1230 return -EINVAL; 1231 1232 return src - orig_src; 1233 } 1234 EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); 1235 1236 int map_check_no_btf(const struct bpf_map *map, 1237 const struct btf *btf, 1238 const struct btf_type *key_type, 1239 const struct btf_type *value_type) 1240 { 1241 return -ENOTSUPP; 1242 } 1243 1244 static int map_check_btf(struct bpf_map *map, struct bpf_token *token, 1245 const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 1246 { 1247 const struct btf_type *key_type, *value_type; 1248 u32 key_size, value_size; 1249 int ret = 0; 1250 1251 /* Some maps allow key to be unspecified. */ 1252 if (btf_key_id) { 1253 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1254 if (!key_type || key_size != map->key_size) 1255 return -EINVAL; 1256 } else { 1257 key_type = btf_type_by_id(btf, 0); 1258 if (!map->ops->map_check_btf) 1259 return -EINVAL; 1260 } 1261 1262 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1263 if (!value_type || value_size != map->value_size) 1264 return -EINVAL; 1265 1266 map->record = btf_parse_fields(btf, value_type, 1267 BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1268 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | 1269 BPF_TASK_WORK, 1270 map->value_size); 1271 if (!IS_ERR_OR_NULL(map->record)) { 1272 int i; 1273 1274 if (!bpf_token_capable(token, CAP_BPF)) { 1275 ret = -EPERM; 1276 goto free_map_tab; 1277 } 1278 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1279 ret = -EACCES; 1280 goto free_map_tab; 1281 } 1282 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1283 switch (map->record->field_mask & (1 << i)) { 1284 case 0: 1285 continue; 1286 case BPF_SPIN_LOCK: 1287 case BPF_RES_SPIN_LOCK: 1288 if (map->map_type != BPF_MAP_TYPE_HASH && 1289 map->map_type != BPF_MAP_TYPE_ARRAY && 1290 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1291 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1292 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1293 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1294 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1295 ret = -EOPNOTSUPP; 1296 goto free_map_tab; 1297 } 1298 break; 1299 case BPF_TIMER: 1300 case BPF_WORKQUEUE: 1301 case BPF_TASK_WORK: 1302 if (map->map_type != BPF_MAP_TYPE_HASH && 1303 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1304 map->map_type != BPF_MAP_TYPE_ARRAY) { 1305 ret = -EOPNOTSUPP; 1306 goto free_map_tab; 1307 } 1308 break; 1309 case BPF_KPTR_UNREF: 1310 case BPF_KPTR_REF: 1311 case BPF_KPTR_PERCPU: 1312 case BPF_REFCOUNT: 1313 if (map->map_type != BPF_MAP_TYPE_HASH && 1314 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1315 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1316 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1317 map->map_type != BPF_MAP_TYPE_ARRAY && 1318 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1319 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1320 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1321 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1322 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1323 ret = -EOPNOTSUPP; 1324 goto free_map_tab; 1325 } 1326 break; 1327 case BPF_UPTR: 1328 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) { 1329 ret = -EOPNOTSUPP; 1330 goto free_map_tab; 1331 } 1332 break; 1333 case BPF_LIST_HEAD: 1334 case BPF_RB_ROOT: 1335 if (map->map_type != BPF_MAP_TYPE_HASH && 1336 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1337 map->map_type != BPF_MAP_TYPE_ARRAY) { 1338 ret = -EOPNOTSUPP; 1339 goto free_map_tab; 1340 } 1341 break; 1342 default: 1343 /* Fail if map_type checks are missing for a field type */ 1344 ret = -EOPNOTSUPP; 1345 goto free_map_tab; 1346 } 1347 } 1348 } 1349 1350 ret = btf_check_and_fixup_fields(btf, map->record); 1351 if (ret < 0) 1352 goto free_map_tab; 1353 1354 if (map->ops->map_check_btf) { 1355 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1356 if (ret < 0) 1357 goto free_map_tab; 1358 } 1359 1360 return ret; 1361 free_map_tab: 1362 bpf_map_free_record(map); 1363 return ret; 1364 } 1365 1366 static bool bpf_net_capable(void) 1367 { 1368 return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); 1369 } 1370 1371 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size 1372 /* called via syscall */ 1373 static int map_create(union bpf_attr *attr, bpfptr_t uattr) 1374 { 1375 const struct bpf_map_ops *ops; 1376 struct bpf_token *token = NULL; 1377 int numa_node = bpf_map_attr_numa_node(attr); 1378 u32 map_type = attr->map_type; 1379 struct bpf_map *map; 1380 bool token_flag; 1381 int f_flags; 1382 int err; 1383 1384 err = CHECK_ATTR(BPF_MAP_CREATE); 1385 if (err) 1386 return -EINVAL; 1387 1388 /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it 1389 * to avoid per-map type checks tripping on unknown flag 1390 */ 1391 token_flag = attr->map_flags & BPF_F_TOKEN_FD; 1392 attr->map_flags &= ~BPF_F_TOKEN_FD; 1393 1394 if (attr->btf_vmlinux_value_type_id) { 1395 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1396 attr->btf_key_type_id || attr->btf_value_type_id) 1397 return -EINVAL; 1398 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1399 return -EINVAL; 1400 } 1401 1402 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1403 attr->map_type != BPF_MAP_TYPE_ARENA && 1404 attr->map_extra != 0) 1405 return -EINVAL; 1406 1407 f_flags = bpf_get_file_flag(attr->map_flags); 1408 if (f_flags < 0) 1409 return f_flags; 1410 1411 if (numa_node != NUMA_NO_NODE && 1412 ((unsigned int)numa_node >= nr_node_ids || 1413 !node_online(numa_node))) 1414 return -EINVAL; 1415 1416 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1417 map_type = attr->map_type; 1418 if (map_type >= ARRAY_SIZE(bpf_map_types)) 1419 return -EINVAL; 1420 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1421 ops = bpf_map_types[map_type]; 1422 if (!ops) 1423 return -EINVAL; 1424 1425 if (ops->map_alloc_check) { 1426 err = ops->map_alloc_check(attr); 1427 if (err) 1428 return err; 1429 } 1430 if (attr->map_ifindex) 1431 ops = &bpf_map_offload_ops; 1432 if (!ops->map_mem_usage) 1433 return -EINVAL; 1434 1435 if (token_flag) { 1436 token = bpf_token_get_from_fd(attr->map_token_fd); 1437 if (IS_ERR(token)) 1438 return PTR_ERR(token); 1439 1440 /* if current token doesn't grant map creation permissions, 1441 * then we can't use this token, so ignore it and rely on 1442 * system-wide capabilities checks 1443 */ 1444 if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || 1445 !bpf_token_allow_map_type(token, attr->map_type)) { 1446 bpf_token_put(token); 1447 token = NULL; 1448 } 1449 } 1450 1451 err = -EPERM; 1452 1453 /* Intent here is for unprivileged_bpf_disabled to block BPF map 1454 * creation for unprivileged users; other actions depend 1455 * on fd availability and access to bpffs, so are dependent on 1456 * object creation success. Even with unprivileged BPF disabled, 1457 * capability checks are still carried out. 1458 */ 1459 if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) 1460 goto put_token; 1461 1462 /* check privileged map type permissions */ 1463 switch (map_type) { 1464 case BPF_MAP_TYPE_ARRAY: 1465 case BPF_MAP_TYPE_PERCPU_ARRAY: 1466 case BPF_MAP_TYPE_PROG_ARRAY: 1467 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1468 case BPF_MAP_TYPE_CGROUP_ARRAY: 1469 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1470 case BPF_MAP_TYPE_HASH: 1471 case BPF_MAP_TYPE_PERCPU_HASH: 1472 case BPF_MAP_TYPE_HASH_OF_MAPS: 1473 case BPF_MAP_TYPE_RINGBUF: 1474 case BPF_MAP_TYPE_USER_RINGBUF: 1475 case BPF_MAP_TYPE_CGROUP_STORAGE: 1476 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1477 /* unprivileged */ 1478 break; 1479 case BPF_MAP_TYPE_SK_STORAGE: 1480 case BPF_MAP_TYPE_INODE_STORAGE: 1481 case BPF_MAP_TYPE_TASK_STORAGE: 1482 case BPF_MAP_TYPE_CGRP_STORAGE: 1483 case BPF_MAP_TYPE_BLOOM_FILTER: 1484 case BPF_MAP_TYPE_LPM_TRIE: 1485 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1486 case BPF_MAP_TYPE_STACK_TRACE: 1487 case BPF_MAP_TYPE_QUEUE: 1488 case BPF_MAP_TYPE_STACK: 1489 case BPF_MAP_TYPE_LRU_HASH: 1490 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1491 case BPF_MAP_TYPE_STRUCT_OPS: 1492 case BPF_MAP_TYPE_CPUMAP: 1493 case BPF_MAP_TYPE_ARENA: 1494 case BPF_MAP_TYPE_INSN_ARRAY: 1495 if (!bpf_token_capable(token, CAP_BPF)) 1496 goto put_token; 1497 break; 1498 case BPF_MAP_TYPE_SOCKMAP: 1499 case BPF_MAP_TYPE_SOCKHASH: 1500 case BPF_MAP_TYPE_DEVMAP: 1501 case BPF_MAP_TYPE_DEVMAP_HASH: 1502 case BPF_MAP_TYPE_XSKMAP: 1503 if (!bpf_token_capable(token, CAP_NET_ADMIN)) 1504 goto put_token; 1505 break; 1506 default: 1507 WARN(1, "unsupported map type %d", map_type); 1508 goto put_token; 1509 } 1510 1511 map = ops->map_alloc(attr); 1512 if (IS_ERR(map)) { 1513 err = PTR_ERR(map); 1514 goto put_token; 1515 } 1516 map->ops = ops; 1517 map->map_type = map_type; 1518 1519 err = bpf_obj_name_cpy(map->name, attr->map_name, 1520 sizeof(attr->map_name)); 1521 if (err < 0) 1522 goto free_map; 1523 1524 preempt_disable(); 1525 map->cookie = gen_cookie_next(&bpf_map_cookie); 1526 preempt_enable(); 1527 1528 atomic64_set(&map->refcnt, 1); 1529 atomic64_set(&map->usercnt, 1); 1530 mutex_init(&map->freeze_mutex); 1531 spin_lock_init(&map->owner_lock); 1532 1533 if (attr->btf_key_type_id || attr->btf_value_type_id || 1534 /* Even the map's value is a kernel's struct, 1535 * the bpf_prog.o must have BTF to begin with 1536 * to figure out the corresponding kernel's 1537 * counter part. Thus, attr->btf_fd has 1538 * to be valid also. 1539 */ 1540 attr->btf_vmlinux_value_type_id) { 1541 struct btf *btf; 1542 1543 btf = btf_get_by_fd(attr->btf_fd); 1544 if (IS_ERR(btf)) { 1545 err = PTR_ERR(btf); 1546 goto free_map; 1547 } 1548 if (btf_is_kernel(btf)) { 1549 btf_put(btf); 1550 err = -EACCES; 1551 goto free_map; 1552 } 1553 map->btf = btf; 1554 1555 if (attr->btf_value_type_id) { 1556 err = map_check_btf(map, token, btf, attr->btf_key_type_id, 1557 attr->btf_value_type_id); 1558 if (err) 1559 goto free_map; 1560 } 1561 1562 map->btf_key_type_id = attr->btf_key_type_id; 1563 map->btf_value_type_id = attr->btf_value_type_id; 1564 map->btf_vmlinux_value_type_id = 1565 attr->btf_vmlinux_value_type_id; 1566 } 1567 1568 if (attr->excl_prog_hash) { 1569 bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); 1570 1571 if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { 1572 err = -EINVAL; 1573 goto free_map; 1574 } 1575 1576 map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 1577 if (!map->excl_prog_sha) { 1578 err = -ENOMEM; 1579 goto free_map; 1580 } 1581 1582 if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { 1583 err = -EFAULT; 1584 goto free_map; 1585 } 1586 } else if (attr->excl_prog_hash_size) { 1587 err = -EINVAL; 1588 goto free_map; 1589 } 1590 1591 err = security_bpf_map_create(map, attr, token, uattr.is_kernel); 1592 if (err) 1593 goto free_map_sec; 1594 1595 err = bpf_map_alloc_id(map); 1596 if (err) 1597 goto free_map_sec; 1598 1599 bpf_map_save_memcg(map); 1600 bpf_token_put(token); 1601 1602 err = bpf_map_new_fd(map, f_flags); 1603 if (err < 0) { 1604 /* failed to allocate fd. 1605 * bpf_map_put_with_uref() is needed because the above 1606 * bpf_map_alloc_id() has published the map 1607 * to the userspace and the userspace may 1608 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1609 */ 1610 bpf_map_put_with_uref(map); 1611 return err; 1612 } 1613 1614 return err; 1615 1616 free_map_sec: 1617 security_bpf_map_free(map); 1618 free_map: 1619 bpf_map_free(map); 1620 put_token: 1621 bpf_token_put(token); 1622 return err; 1623 } 1624 1625 void bpf_map_inc(struct bpf_map *map) 1626 { 1627 atomic64_inc(&map->refcnt); 1628 } 1629 EXPORT_SYMBOL_GPL(bpf_map_inc); 1630 1631 void bpf_map_inc_with_uref(struct bpf_map *map) 1632 { 1633 atomic64_inc(&map->refcnt); 1634 atomic64_inc(&map->usercnt); 1635 } 1636 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1637 1638 struct bpf_map *bpf_map_get(u32 ufd) 1639 { 1640 CLASS(fd, f)(ufd); 1641 struct bpf_map *map = __bpf_map_get(f); 1642 1643 if (!IS_ERR(map)) 1644 bpf_map_inc(map); 1645 1646 return map; 1647 } 1648 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); 1649 1650 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1651 { 1652 CLASS(fd, f)(ufd); 1653 struct bpf_map *map = __bpf_map_get(f); 1654 1655 if (!IS_ERR(map)) 1656 bpf_map_inc_with_uref(map); 1657 1658 return map; 1659 } 1660 1661 /* map_idr_lock should have been held or the map should have been 1662 * protected by rcu read lock. 1663 */ 1664 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1665 { 1666 int refold; 1667 1668 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1669 if (!refold) 1670 return ERR_PTR(-ENOENT); 1671 if (uref) 1672 atomic64_inc(&map->usercnt); 1673 1674 return map; 1675 } 1676 1677 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1678 { 1679 lockdep_assert(rcu_read_lock_held()); 1680 return __bpf_map_inc_not_zero(map, false); 1681 } 1682 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1683 1684 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, 1685 bool delete) 1686 { 1687 return -ENOTSUPP; 1688 } 1689 1690 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1691 { 1692 if (key_size) 1693 return vmemdup_user(ukey, key_size); 1694 1695 if (ukey) 1696 return ERR_PTR(-EINVAL); 1697 1698 return NULL; 1699 } 1700 1701 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1702 { 1703 if (key_size) 1704 return kvmemdup_bpfptr(ukey, key_size); 1705 1706 if (!bpfptr_is_null(ukey)) 1707 return ERR_PTR(-EINVAL); 1708 1709 return NULL; 1710 } 1711 1712 /* last field in 'union bpf_attr' used by this command */ 1713 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1714 1715 static int map_lookup_elem(union bpf_attr *attr) 1716 { 1717 void __user *ukey = u64_to_user_ptr(attr->key); 1718 void __user *uvalue = u64_to_user_ptr(attr->value); 1719 struct bpf_map *map; 1720 void *key, *value; 1721 u32 value_size; 1722 int err; 1723 1724 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1725 return -EINVAL; 1726 1727 CLASS(fd, f)(attr->map_fd); 1728 map = __bpf_map_get(f); 1729 if (IS_ERR(map)) 1730 return PTR_ERR(map); 1731 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1732 return -EPERM; 1733 1734 err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK | BPF_F_CPU); 1735 if (err) 1736 return err; 1737 1738 key = __bpf_copy_key(ukey, map->key_size); 1739 if (IS_ERR(key)) 1740 return PTR_ERR(key); 1741 1742 value_size = bpf_map_value_size(map, attr->flags); 1743 1744 err = -ENOMEM; 1745 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1746 if (!value) 1747 goto free_key; 1748 1749 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1750 if (copy_from_user(value, uvalue, value_size)) 1751 err = -EFAULT; 1752 else 1753 err = bpf_map_copy_value(map, key, value, attr->flags); 1754 goto free_value; 1755 } 1756 1757 err = bpf_map_copy_value(map, key, value, attr->flags); 1758 if (err) 1759 goto free_value; 1760 1761 err = -EFAULT; 1762 if (copy_to_user(uvalue, value, value_size) != 0) 1763 goto free_value; 1764 1765 err = 0; 1766 1767 free_value: 1768 kvfree(value); 1769 free_key: 1770 kvfree(key); 1771 return err; 1772 } 1773 1774 1775 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1776 1777 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1778 { 1779 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1780 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1781 struct bpf_map *map; 1782 void *key, *value; 1783 u32 value_size; 1784 int err; 1785 1786 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1787 return -EINVAL; 1788 1789 CLASS(fd, f)(attr->map_fd); 1790 map = __bpf_map_get(f); 1791 if (IS_ERR(map)) 1792 return PTR_ERR(map); 1793 bpf_map_write_active_inc(map); 1794 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1795 err = -EPERM; 1796 goto err_put; 1797 } 1798 1799 err = bpf_map_check_op_flags(map, attr->flags, ~0); 1800 if (err) 1801 goto err_put; 1802 1803 key = ___bpf_copy_key(ukey, map->key_size); 1804 if (IS_ERR(key)) { 1805 err = PTR_ERR(key); 1806 goto err_put; 1807 } 1808 1809 value_size = bpf_map_value_size(map, attr->flags); 1810 value = kvmemdup_bpfptr(uvalue, value_size); 1811 if (IS_ERR(value)) { 1812 err = PTR_ERR(value); 1813 goto free_key; 1814 } 1815 1816 err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags); 1817 if (!err) 1818 maybe_wait_bpf_programs(map); 1819 1820 kvfree(value); 1821 free_key: 1822 kvfree(key); 1823 err_put: 1824 bpf_map_write_active_dec(map); 1825 return err; 1826 } 1827 1828 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1829 1830 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1831 { 1832 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1833 struct bpf_map *map; 1834 void *key; 1835 int err; 1836 1837 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1838 return -EINVAL; 1839 1840 CLASS(fd, f)(attr->map_fd); 1841 map = __bpf_map_get(f); 1842 if (IS_ERR(map)) 1843 return PTR_ERR(map); 1844 bpf_map_write_active_inc(map); 1845 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1846 err = -EPERM; 1847 goto err_put; 1848 } 1849 1850 key = ___bpf_copy_key(ukey, map->key_size); 1851 if (IS_ERR(key)) { 1852 err = PTR_ERR(key); 1853 goto err_put; 1854 } 1855 1856 if (bpf_map_is_offloaded(map)) { 1857 err = bpf_map_offload_delete_elem(map, key); 1858 goto out; 1859 } else if (IS_FD_PROG_ARRAY(map) || 1860 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1861 /* These maps require sleepable context */ 1862 err = map->ops->map_delete_elem(map, key); 1863 goto out; 1864 } 1865 1866 bpf_disable_instrumentation(); 1867 rcu_read_lock(); 1868 err = map->ops->map_delete_elem(map, key); 1869 rcu_read_unlock(); 1870 bpf_enable_instrumentation(); 1871 if (!err) 1872 maybe_wait_bpf_programs(map); 1873 out: 1874 kvfree(key); 1875 err_put: 1876 bpf_map_write_active_dec(map); 1877 return err; 1878 } 1879 1880 /* last field in 'union bpf_attr' used by this command */ 1881 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1882 1883 static int map_get_next_key(union bpf_attr *attr) 1884 { 1885 void __user *ukey = u64_to_user_ptr(attr->key); 1886 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1887 struct bpf_map *map; 1888 void *key, *next_key; 1889 int err; 1890 1891 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1892 return -EINVAL; 1893 1894 CLASS(fd, f)(attr->map_fd); 1895 map = __bpf_map_get(f); 1896 if (IS_ERR(map)) 1897 return PTR_ERR(map); 1898 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) 1899 return -EPERM; 1900 1901 if (ukey) { 1902 key = __bpf_copy_key(ukey, map->key_size); 1903 if (IS_ERR(key)) 1904 return PTR_ERR(key); 1905 } else { 1906 key = NULL; 1907 } 1908 1909 err = -ENOMEM; 1910 next_key = kvmalloc(map->key_size, GFP_USER); 1911 if (!next_key) 1912 goto free_key; 1913 1914 if (bpf_map_is_offloaded(map)) { 1915 err = bpf_map_offload_get_next_key(map, key, next_key); 1916 goto out; 1917 } 1918 1919 rcu_read_lock(); 1920 err = map->ops->map_get_next_key(map, key, next_key); 1921 rcu_read_unlock(); 1922 out: 1923 if (err) 1924 goto free_next_key; 1925 1926 err = -EFAULT; 1927 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1928 goto free_next_key; 1929 1930 err = 0; 1931 1932 free_next_key: 1933 kvfree(next_key); 1934 free_key: 1935 kvfree(key); 1936 return err; 1937 } 1938 1939 int generic_map_delete_batch(struct bpf_map *map, 1940 const union bpf_attr *attr, 1941 union bpf_attr __user *uattr) 1942 { 1943 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1944 u32 cp, max_count; 1945 int err = 0; 1946 void *key; 1947 1948 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1949 return -EINVAL; 1950 1951 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1952 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1953 return -EINVAL; 1954 } 1955 1956 max_count = attr->batch.count; 1957 if (!max_count) 1958 return 0; 1959 1960 if (put_user(0, &uattr->batch.count)) 1961 return -EFAULT; 1962 1963 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1964 if (!key) 1965 return -ENOMEM; 1966 1967 for (cp = 0; cp < max_count; cp++) { 1968 err = -EFAULT; 1969 if (copy_from_user(key, keys + cp * map->key_size, 1970 map->key_size)) 1971 break; 1972 1973 if (bpf_map_is_offloaded(map)) { 1974 err = bpf_map_offload_delete_elem(map, key); 1975 break; 1976 } 1977 1978 bpf_disable_instrumentation(); 1979 rcu_read_lock(); 1980 err = map->ops->map_delete_elem(map, key); 1981 rcu_read_unlock(); 1982 bpf_enable_instrumentation(); 1983 if (err) 1984 break; 1985 cond_resched(); 1986 } 1987 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1988 err = -EFAULT; 1989 1990 kvfree(key); 1991 1992 return err; 1993 } 1994 1995 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 1996 const union bpf_attr *attr, 1997 union bpf_attr __user *uattr) 1998 { 1999 void __user *values = u64_to_user_ptr(attr->batch.values); 2000 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2001 u32 value_size, cp, max_count; 2002 void *key, *value; 2003 int err = 0; 2004 2005 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, 2006 BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS); 2007 if (err) 2008 return err; 2009 2010 value_size = bpf_map_value_size(map, attr->batch.elem_flags); 2011 2012 max_count = attr->batch.count; 2013 if (!max_count) 2014 return 0; 2015 2016 if (put_user(0, &uattr->batch.count)) 2017 return -EFAULT; 2018 2019 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2020 if (!key) 2021 return -ENOMEM; 2022 2023 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2024 if (!value) { 2025 kvfree(key); 2026 return -ENOMEM; 2027 } 2028 2029 for (cp = 0; cp < max_count; cp++) { 2030 err = -EFAULT; 2031 if (copy_from_user(key, keys + cp * map->key_size, 2032 map->key_size) || 2033 copy_from_user(value, values + cp * value_size, value_size)) 2034 break; 2035 2036 err = bpf_map_update_value(map, map_file, key, value, 2037 attr->batch.elem_flags); 2038 2039 if (err) 2040 break; 2041 cond_resched(); 2042 } 2043 2044 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 2045 err = -EFAULT; 2046 2047 kvfree(value); 2048 kvfree(key); 2049 2050 return err; 2051 } 2052 2053 int generic_map_lookup_batch(struct bpf_map *map, 2054 const union bpf_attr *attr, 2055 union bpf_attr __user *uattr) 2056 { 2057 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 2058 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 2059 void __user *values = u64_to_user_ptr(attr->batch.values); 2060 void __user *keys = u64_to_user_ptr(attr->batch.keys); 2061 void *buf, *buf_prevkey, *prev_key, *key, *value; 2062 u32 value_size, cp, max_count; 2063 int err; 2064 2065 err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK | BPF_F_CPU); 2066 if (err) 2067 return err; 2068 2069 value_size = bpf_map_value_size(map, attr->batch.elem_flags); 2070 2071 max_count = attr->batch.count; 2072 if (!max_count) 2073 return 0; 2074 2075 if (put_user(0, &uattr->batch.count)) 2076 return -EFAULT; 2077 2078 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 2079 if (!buf_prevkey) 2080 return -ENOMEM; 2081 2082 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 2083 if (!buf) { 2084 kvfree(buf_prevkey); 2085 return -ENOMEM; 2086 } 2087 2088 err = -EFAULT; 2089 prev_key = NULL; 2090 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 2091 goto free_buf; 2092 key = buf; 2093 value = key + map->key_size; 2094 if (ubatch) 2095 prev_key = buf_prevkey; 2096 2097 for (cp = 0; cp < max_count;) { 2098 rcu_read_lock(); 2099 err = map->ops->map_get_next_key(map, prev_key, key); 2100 rcu_read_unlock(); 2101 if (err) 2102 break; 2103 err = bpf_map_copy_value(map, key, value, 2104 attr->batch.elem_flags); 2105 2106 if (err == -ENOENT) 2107 goto next_key; 2108 2109 if (err) 2110 goto free_buf; 2111 2112 if (copy_to_user(keys + cp * map->key_size, key, 2113 map->key_size)) { 2114 err = -EFAULT; 2115 goto free_buf; 2116 } 2117 if (copy_to_user(values + cp * value_size, value, value_size)) { 2118 err = -EFAULT; 2119 goto free_buf; 2120 } 2121 2122 cp++; 2123 next_key: 2124 if (!prev_key) 2125 prev_key = buf_prevkey; 2126 2127 swap(prev_key, key); 2128 cond_resched(); 2129 } 2130 2131 if (err == -EFAULT) 2132 goto free_buf; 2133 2134 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 2135 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 2136 err = -EFAULT; 2137 2138 free_buf: 2139 kvfree(buf_prevkey); 2140 kvfree(buf); 2141 return err; 2142 } 2143 2144 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 2145 2146 static int map_lookup_and_delete_elem(union bpf_attr *attr) 2147 { 2148 void __user *ukey = u64_to_user_ptr(attr->key); 2149 void __user *uvalue = u64_to_user_ptr(attr->value); 2150 struct bpf_map *map; 2151 void *key, *value; 2152 u32 value_size; 2153 int err; 2154 2155 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 2156 return -EINVAL; 2157 2158 if (attr->flags & ~BPF_F_LOCK) 2159 return -EINVAL; 2160 2161 CLASS(fd, f)(attr->map_fd); 2162 map = __bpf_map_get(f); 2163 if (IS_ERR(map)) 2164 return PTR_ERR(map); 2165 bpf_map_write_active_inc(map); 2166 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 2167 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 2168 err = -EPERM; 2169 goto err_put; 2170 } 2171 2172 if (attr->flags && 2173 (map->map_type == BPF_MAP_TYPE_QUEUE || 2174 map->map_type == BPF_MAP_TYPE_STACK)) { 2175 err = -EINVAL; 2176 goto err_put; 2177 } 2178 2179 if ((attr->flags & BPF_F_LOCK) && 2180 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 2181 err = -EINVAL; 2182 goto err_put; 2183 } 2184 2185 key = __bpf_copy_key(ukey, map->key_size); 2186 if (IS_ERR(key)) { 2187 err = PTR_ERR(key); 2188 goto err_put; 2189 } 2190 2191 value_size = bpf_map_value_size(map, 0); 2192 2193 err = -ENOMEM; 2194 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 2195 if (!value) 2196 goto free_key; 2197 2198 err = -ENOTSUPP; 2199 if (map->map_type == BPF_MAP_TYPE_QUEUE || 2200 map->map_type == BPF_MAP_TYPE_STACK) { 2201 err = map->ops->map_pop_elem(map, value); 2202 } else if (map->map_type == BPF_MAP_TYPE_HASH || 2203 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2204 map->map_type == BPF_MAP_TYPE_LRU_HASH || 2205 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 2206 map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 2207 if (!bpf_map_is_offloaded(map)) { 2208 bpf_disable_instrumentation(); 2209 rcu_read_lock(); 2210 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 2211 rcu_read_unlock(); 2212 bpf_enable_instrumentation(); 2213 } 2214 } 2215 2216 if (err) 2217 goto free_value; 2218 2219 if (copy_to_user(uvalue, value, value_size) != 0) { 2220 err = -EFAULT; 2221 goto free_value; 2222 } 2223 2224 err = 0; 2225 2226 free_value: 2227 kvfree(value); 2228 free_key: 2229 kvfree(key); 2230 err_put: 2231 bpf_map_write_active_dec(map); 2232 return err; 2233 } 2234 2235 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 2236 2237 static int map_freeze(const union bpf_attr *attr) 2238 { 2239 int err = 0; 2240 struct bpf_map *map; 2241 2242 if (CHECK_ATTR(BPF_MAP_FREEZE)) 2243 return -EINVAL; 2244 2245 CLASS(fd, f)(attr->map_fd); 2246 map = __bpf_map_get(f); 2247 if (IS_ERR(map)) 2248 return PTR_ERR(map); 2249 2250 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) 2251 return -ENOTSUPP; 2252 2253 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) 2254 return -EPERM; 2255 2256 mutex_lock(&map->freeze_mutex); 2257 if (bpf_map_write_active(map)) { 2258 err = -EBUSY; 2259 goto err_put; 2260 } 2261 if (READ_ONCE(map->frozen)) { 2262 err = -EBUSY; 2263 goto err_put; 2264 } 2265 2266 WRITE_ONCE(map->frozen, true); 2267 err_put: 2268 mutex_unlock(&map->freeze_mutex); 2269 return err; 2270 } 2271 2272 static const struct bpf_prog_ops * const bpf_prog_types[] = { 2273 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 2274 [_id] = & _name ## _prog_ops, 2275 #define BPF_MAP_TYPE(_id, _ops) 2276 #define BPF_LINK_TYPE(_id, _name) 2277 #include <linux/bpf_types.h> 2278 #undef BPF_PROG_TYPE 2279 #undef BPF_MAP_TYPE 2280 #undef BPF_LINK_TYPE 2281 }; 2282 2283 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 2284 { 2285 const struct bpf_prog_ops *ops; 2286 2287 if (type >= ARRAY_SIZE(bpf_prog_types)) 2288 return -EINVAL; 2289 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 2290 ops = bpf_prog_types[type]; 2291 if (!ops) 2292 return -EINVAL; 2293 2294 if (!bpf_prog_is_offloaded(prog->aux)) 2295 prog->aux->ops = ops; 2296 else 2297 prog->aux->ops = &bpf_offload_prog_ops; 2298 prog->type = type; 2299 return 0; 2300 } 2301 2302 enum bpf_audit { 2303 BPF_AUDIT_LOAD, 2304 BPF_AUDIT_UNLOAD, 2305 BPF_AUDIT_MAX, 2306 }; 2307 2308 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2309 [BPF_AUDIT_LOAD] = "LOAD", 2310 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2311 }; 2312 2313 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2314 { 2315 struct audit_context *ctx = NULL; 2316 struct audit_buffer *ab; 2317 2318 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2319 return; 2320 if (audit_enabled == AUDIT_OFF) 2321 return; 2322 if (!in_hardirq() && !irqs_disabled()) 2323 ctx = audit_context(); 2324 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2325 if (unlikely(!ab)) 2326 return; 2327 audit_log_format(ab, "prog-id=%u op=%s", 2328 prog->aux->id, bpf_audit_str[op]); 2329 audit_log_end(ab); 2330 } 2331 2332 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2333 { 2334 int id; 2335 2336 idr_preload(GFP_KERNEL); 2337 spin_lock_bh(&prog_idr_lock); 2338 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2339 if (id > 0) 2340 prog->aux->id = id; 2341 spin_unlock_bh(&prog_idr_lock); 2342 idr_preload_end(); 2343 2344 /* id is in [1, INT_MAX) */ 2345 if (WARN_ON_ONCE(!id)) 2346 return -ENOSPC; 2347 2348 return id > 0 ? 0 : id; 2349 } 2350 2351 void bpf_prog_free_id(struct bpf_prog *prog) 2352 { 2353 unsigned long flags; 2354 2355 /* cBPF to eBPF migrations are currently not in the idr store. 2356 * Offloaded programs are removed from the store when their device 2357 * disappears - even if someone grabs an fd to them they are unusable, 2358 * simply waiting for refcnt to drop to be freed. 2359 */ 2360 if (!prog->aux->id) 2361 return; 2362 2363 spin_lock_irqsave(&prog_idr_lock, flags); 2364 idr_remove(&prog_idr, prog->aux->id); 2365 prog->aux->id = 0; 2366 spin_unlock_irqrestore(&prog_idr_lock, flags); 2367 } 2368 2369 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2370 { 2371 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2372 2373 kvfree(aux->func_info); 2374 kfree(aux->func_info_aux); 2375 free_uid(aux->user); 2376 security_bpf_prog_free(aux->prog); 2377 bpf_prog_free(aux->prog); 2378 } 2379 2380 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2381 { 2382 bpf_prog_kallsyms_del_all(prog); 2383 btf_put(prog->aux->btf); 2384 module_put(prog->aux->mod); 2385 kvfree(prog->aux->jited_linfo); 2386 kvfree(prog->aux->linfo); 2387 kfree(prog->aux->kfunc_tab); 2388 kfree(prog->aux->ctx_arg_info); 2389 if (prog->aux->attach_btf) 2390 btf_put(prog->aux->attach_btf); 2391 2392 if (deferred) { 2393 if (prog->sleepable) 2394 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2395 else 2396 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2397 } else { 2398 __bpf_prog_put_rcu(&prog->aux->rcu); 2399 } 2400 } 2401 2402 static void bpf_prog_put_deferred(struct work_struct *work) 2403 { 2404 struct bpf_prog_aux *aux; 2405 struct bpf_prog *prog; 2406 2407 aux = container_of(work, struct bpf_prog_aux, work); 2408 prog = aux->prog; 2409 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2410 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2411 bpf_prog_free_id(prog); 2412 __bpf_prog_put_noref(prog, true); 2413 } 2414 2415 static void __bpf_prog_put(struct bpf_prog *prog) 2416 { 2417 struct bpf_prog_aux *aux = prog->aux; 2418 2419 if (atomic64_dec_and_test(&aux->refcnt)) { 2420 if (in_hardirq() || irqs_disabled()) { 2421 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2422 schedule_work(&aux->work); 2423 } else { 2424 bpf_prog_put_deferred(&aux->work); 2425 } 2426 } 2427 } 2428 2429 void bpf_prog_put(struct bpf_prog *prog) 2430 { 2431 __bpf_prog_put(prog); 2432 } 2433 EXPORT_SYMBOL_GPL(bpf_prog_put); 2434 2435 static int bpf_prog_release(struct inode *inode, struct file *filp) 2436 { 2437 struct bpf_prog *prog = filp->private_data; 2438 2439 bpf_prog_put(prog); 2440 return 0; 2441 } 2442 2443 struct bpf_prog_kstats { 2444 u64 nsecs; 2445 u64 cnt; 2446 u64 misses; 2447 }; 2448 2449 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2450 { 2451 struct bpf_prog_stats *stats; 2452 unsigned int flags; 2453 2454 if (unlikely(!prog->stats)) 2455 return; 2456 2457 stats = this_cpu_ptr(prog->stats); 2458 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2459 u64_stats_inc(&stats->misses); 2460 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2461 } 2462 2463 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2464 struct bpf_prog_kstats *stats) 2465 { 2466 u64 nsecs = 0, cnt = 0, misses = 0; 2467 int cpu; 2468 2469 for_each_possible_cpu(cpu) { 2470 const struct bpf_prog_stats *st; 2471 unsigned int start; 2472 u64 tnsecs, tcnt, tmisses; 2473 2474 st = per_cpu_ptr(prog->stats, cpu); 2475 do { 2476 start = u64_stats_fetch_begin(&st->syncp); 2477 tnsecs = u64_stats_read(&st->nsecs); 2478 tcnt = u64_stats_read(&st->cnt); 2479 tmisses = u64_stats_read(&st->misses); 2480 } while (u64_stats_fetch_retry(&st->syncp, start)); 2481 nsecs += tnsecs; 2482 cnt += tcnt; 2483 misses += tmisses; 2484 } 2485 stats->nsecs = nsecs; 2486 stats->cnt = cnt; 2487 stats->misses = misses; 2488 } 2489 2490 #ifdef CONFIG_PROC_FS 2491 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2492 { 2493 const struct bpf_prog *prog = filp->private_data; 2494 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2495 struct bpf_prog_kstats stats; 2496 2497 bpf_prog_get_stats(prog, &stats); 2498 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2499 seq_printf(m, 2500 "prog_type:\t%u\n" 2501 "prog_jited:\t%u\n" 2502 "prog_tag:\t%s\n" 2503 "memlock:\t%llu\n" 2504 "prog_id:\t%u\n" 2505 "run_time_ns:\t%llu\n" 2506 "run_cnt:\t%llu\n" 2507 "recursion_misses:\t%llu\n" 2508 "verified_insns:\t%u\n", 2509 prog->type, 2510 prog->jited, 2511 prog_tag, 2512 prog->pages * 1ULL << PAGE_SHIFT, 2513 prog->aux->id, 2514 stats.nsecs, 2515 stats.cnt, 2516 stats.misses, 2517 prog->aux->verified_insns); 2518 } 2519 #endif 2520 2521 const struct file_operations bpf_prog_fops = { 2522 #ifdef CONFIG_PROC_FS 2523 .show_fdinfo = bpf_prog_show_fdinfo, 2524 #endif 2525 .release = bpf_prog_release, 2526 .read = bpf_dummy_read, 2527 .write = bpf_dummy_write, 2528 }; 2529 2530 int bpf_prog_new_fd(struct bpf_prog *prog) 2531 { 2532 int ret; 2533 2534 ret = security_bpf_prog(prog); 2535 if (ret < 0) 2536 return ret; 2537 2538 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2539 O_RDWR | O_CLOEXEC); 2540 } 2541 2542 void bpf_prog_add(struct bpf_prog *prog, int i) 2543 { 2544 atomic64_add(i, &prog->aux->refcnt); 2545 } 2546 EXPORT_SYMBOL_GPL(bpf_prog_add); 2547 2548 void bpf_prog_sub(struct bpf_prog *prog, int i) 2549 { 2550 /* Only to be used for undoing previous bpf_prog_add() in some 2551 * error path. We still know that another entity in our call 2552 * path holds a reference to the program, thus atomic_sub() can 2553 * be safely used in such cases! 2554 */ 2555 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2556 } 2557 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2558 2559 void bpf_prog_inc(struct bpf_prog *prog) 2560 { 2561 atomic64_inc(&prog->aux->refcnt); 2562 } 2563 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2564 2565 /* prog_idr_lock should have been held */ 2566 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2567 { 2568 int refold; 2569 2570 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2571 2572 if (!refold) 2573 return ERR_PTR(-ENOENT); 2574 2575 return prog; 2576 } 2577 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2578 2579 bool bpf_prog_get_ok(struct bpf_prog *prog, 2580 enum bpf_prog_type *attach_type, bool attach_drv) 2581 { 2582 /* not an attachment, just a refcount inc, always allow */ 2583 if (!attach_type) 2584 return true; 2585 2586 if (prog->type != *attach_type) 2587 return false; 2588 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2589 return false; 2590 2591 return true; 2592 } 2593 2594 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2595 bool attach_drv) 2596 { 2597 CLASS(fd, f)(ufd); 2598 struct bpf_prog *prog; 2599 2600 if (fd_empty(f)) 2601 return ERR_PTR(-EBADF); 2602 if (fd_file(f)->f_op != &bpf_prog_fops) 2603 return ERR_PTR(-EINVAL); 2604 2605 prog = fd_file(f)->private_data; 2606 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) 2607 return ERR_PTR(-EINVAL); 2608 2609 bpf_prog_inc(prog); 2610 return prog; 2611 } 2612 2613 struct bpf_prog *bpf_prog_get(u32 ufd) 2614 { 2615 return __bpf_prog_get(ufd, NULL, false); 2616 } 2617 2618 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2619 bool attach_drv) 2620 { 2621 return __bpf_prog_get(ufd, &type, attach_drv); 2622 } 2623 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2624 2625 /* Initially all BPF programs could be loaded w/o specifying 2626 * expected_attach_type. Later for some of them specifying expected_attach_type 2627 * at load time became required so that program could be validated properly. 2628 * Programs of types that are allowed to be loaded both w/ and w/o (for 2629 * backward compatibility) expected_attach_type, should have the default attach 2630 * type assigned to expected_attach_type for the latter case, so that it can be 2631 * validated later at attach time. 2632 * 2633 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2634 * prog type requires it but has some attach types that have to be backward 2635 * compatible. 2636 */ 2637 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2638 { 2639 switch (attr->prog_type) { 2640 case BPF_PROG_TYPE_CGROUP_SOCK: 2641 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2642 * exist so checking for non-zero is the way to go here. 2643 */ 2644 if (!attr->expected_attach_type) 2645 attr->expected_attach_type = 2646 BPF_CGROUP_INET_SOCK_CREATE; 2647 break; 2648 case BPF_PROG_TYPE_SK_REUSEPORT: 2649 if (!attr->expected_attach_type) 2650 attr->expected_attach_type = 2651 BPF_SK_REUSEPORT_SELECT; 2652 break; 2653 } 2654 } 2655 2656 static int 2657 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2658 enum bpf_attach_type expected_attach_type, 2659 struct btf *attach_btf, u32 btf_id, 2660 struct bpf_prog *dst_prog) 2661 { 2662 if (btf_id) { 2663 if (btf_id > BTF_MAX_TYPE) 2664 return -EINVAL; 2665 2666 if (!attach_btf && !dst_prog) 2667 return -EINVAL; 2668 2669 switch (prog_type) { 2670 case BPF_PROG_TYPE_TRACING: 2671 case BPF_PROG_TYPE_LSM: 2672 case BPF_PROG_TYPE_STRUCT_OPS: 2673 case BPF_PROG_TYPE_EXT: 2674 break; 2675 default: 2676 return -EINVAL; 2677 } 2678 } 2679 2680 if (attach_btf && (!btf_id || dst_prog)) 2681 return -EINVAL; 2682 2683 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2684 prog_type != BPF_PROG_TYPE_EXT) 2685 return -EINVAL; 2686 2687 switch (prog_type) { 2688 case BPF_PROG_TYPE_CGROUP_SOCK: 2689 switch (expected_attach_type) { 2690 case BPF_CGROUP_INET_SOCK_CREATE: 2691 case BPF_CGROUP_INET_SOCK_RELEASE: 2692 case BPF_CGROUP_INET4_POST_BIND: 2693 case BPF_CGROUP_INET6_POST_BIND: 2694 return 0; 2695 default: 2696 return -EINVAL; 2697 } 2698 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2699 switch (expected_attach_type) { 2700 case BPF_CGROUP_INET4_BIND: 2701 case BPF_CGROUP_INET6_BIND: 2702 case BPF_CGROUP_INET4_CONNECT: 2703 case BPF_CGROUP_INET6_CONNECT: 2704 case BPF_CGROUP_UNIX_CONNECT: 2705 case BPF_CGROUP_INET4_GETPEERNAME: 2706 case BPF_CGROUP_INET6_GETPEERNAME: 2707 case BPF_CGROUP_UNIX_GETPEERNAME: 2708 case BPF_CGROUP_INET4_GETSOCKNAME: 2709 case BPF_CGROUP_INET6_GETSOCKNAME: 2710 case BPF_CGROUP_UNIX_GETSOCKNAME: 2711 case BPF_CGROUP_UDP4_SENDMSG: 2712 case BPF_CGROUP_UDP6_SENDMSG: 2713 case BPF_CGROUP_UNIX_SENDMSG: 2714 case BPF_CGROUP_UDP4_RECVMSG: 2715 case BPF_CGROUP_UDP6_RECVMSG: 2716 case BPF_CGROUP_UNIX_RECVMSG: 2717 return 0; 2718 default: 2719 return -EINVAL; 2720 } 2721 case BPF_PROG_TYPE_CGROUP_SKB: 2722 switch (expected_attach_type) { 2723 case BPF_CGROUP_INET_INGRESS: 2724 case BPF_CGROUP_INET_EGRESS: 2725 return 0; 2726 default: 2727 return -EINVAL; 2728 } 2729 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2730 switch (expected_attach_type) { 2731 case BPF_CGROUP_SETSOCKOPT: 2732 case BPF_CGROUP_GETSOCKOPT: 2733 return 0; 2734 default: 2735 return -EINVAL; 2736 } 2737 case BPF_PROG_TYPE_SK_LOOKUP: 2738 if (expected_attach_type == BPF_SK_LOOKUP) 2739 return 0; 2740 return -EINVAL; 2741 case BPF_PROG_TYPE_SK_REUSEPORT: 2742 switch (expected_attach_type) { 2743 case BPF_SK_REUSEPORT_SELECT: 2744 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2745 return 0; 2746 default: 2747 return -EINVAL; 2748 } 2749 case BPF_PROG_TYPE_NETFILTER: 2750 if (expected_attach_type == BPF_NETFILTER) 2751 return 0; 2752 return -EINVAL; 2753 case BPF_PROG_TYPE_SYSCALL: 2754 case BPF_PROG_TYPE_EXT: 2755 if (expected_attach_type) 2756 return -EINVAL; 2757 fallthrough; 2758 default: 2759 return 0; 2760 } 2761 } 2762 2763 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2764 { 2765 switch (prog_type) { 2766 case BPF_PROG_TYPE_SCHED_CLS: 2767 case BPF_PROG_TYPE_SCHED_ACT: 2768 case BPF_PROG_TYPE_XDP: 2769 case BPF_PROG_TYPE_LWT_IN: 2770 case BPF_PROG_TYPE_LWT_OUT: 2771 case BPF_PROG_TYPE_LWT_XMIT: 2772 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2773 case BPF_PROG_TYPE_SK_SKB: 2774 case BPF_PROG_TYPE_SK_MSG: 2775 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2776 case BPF_PROG_TYPE_CGROUP_DEVICE: 2777 case BPF_PROG_TYPE_CGROUP_SOCK: 2778 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2779 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2780 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2781 case BPF_PROG_TYPE_SOCK_OPS: 2782 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2783 case BPF_PROG_TYPE_NETFILTER: 2784 return true; 2785 case BPF_PROG_TYPE_CGROUP_SKB: 2786 /* always unpriv */ 2787 case BPF_PROG_TYPE_SK_REUSEPORT: 2788 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2789 default: 2790 return false; 2791 } 2792 } 2793 2794 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2795 { 2796 switch (prog_type) { 2797 case BPF_PROG_TYPE_KPROBE: 2798 case BPF_PROG_TYPE_TRACEPOINT: 2799 case BPF_PROG_TYPE_PERF_EVENT: 2800 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2801 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2802 case BPF_PROG_TYPE_TRACING: 2803 case BPF_PROG_TYPE_LSM: 2804 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2805 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2806 return true; 2807 default: 2808 return false; 2809 } 2810 } 2811 2812 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, 2813 bool is_kernel) 2814 { 2815 bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); 2816 struct bpf_dynptr_kern sig_ptr, insns_ptr; 2817 struct bpf_key *key = NULL; 2818 void *sig; 2819 int err = 0; 2820 2821 if (system_keyring_id_check(attr->keyring_id) == 0) 2822 key = bpf_lookup_system_key(attr->keyring_id); 2823 else 2824 key = bpf_lookup_user_key(attr->keyring_id, 0); 2825 2826 if (!key) 2827 return -EINVAL; 2828 2829 sig = kvmemdup_bpfptr(usig, attr->signature_size); 2830 if (IS_ERR(sig)) { 2831 bpf_key_put(key); 2832 return -ENOMEM; 2833 } 2834 2835 bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, 2836 attr->signature_size); 2837 bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, 2838 prog->len * sizeof(struct bpf_insn)); 2839 2840 err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, 2841 (struct bpf_dynptr *)&sig_ptr, key); 2842 2843 bpf_key_put(key); 2844 kvfree(sig); 2845 return err; 2846 } 2847 2848 static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) 2849 { 2850 int err; 2851 int i; 2852 2853 for (i = 0; i < prog->aux->used_map_cnt; i++) { 2854 if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY) 2855 continue; 2856 2857 err = bpf_insn_array_ready(prog->aux->used_maps[i]); 2858 if (err) 2859 return err; 2860 } 2861 2862 return 0; 2863 } 2864 2865 /* last field in 'union bpf_attr' used by this command */ 2866 #define BPF_PROG_LOAD_LAST_FIELD keyring_id 2867 2868 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2869 { 2870 enum bpf_prog_type type = attr->prog_type; 2871 struct bpf_prog *prog, *dst_prog = NULL; 2872 struct btf *attach_btf = NULL; 2873 struct bpf_token *token = NULL; 2874 bool bpf_cap; 2875 int err; 2876 char license[128]; 2877 2878 if (CHECK_ATTR(BPF_PROG_LOAD)) 2879 return -EINVAL; 2880 2881 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2882 BPF_F_ANY_ALIGNMENT | 2883 BPF_F_TEST_STATE_FREQ | 2884 BPF_F_SLEEPABLE | 2885 BPF_F_TEST_RND_HI32 | 2886 BPF_F_XDP_HAS_FRAGS | 2887 BPF_F_XDP_DEV_BOUND_ONLY | 2888 BPF_F_TEST_REG_INVARIANTS | 2889 BPF_F_TOKEN_FD)) 2890 return -EINVAL; 2891 2892 bpf_prog_load_fixup_attach_type(attr); 2893 2894 if (attr->prog_flags & BPF_F_TOKEN_FD) { 2895 token = bpf_token_get_from_fd(attr->prog_token_fd); 2896 if (IS_ERR(token)) 2897 return PTR_ERR(token); 2898 /* if current token doesn't grant prog loading permissions, 2899 * then we can't use this token, so ignore it and rely on 2900 * system-wide capabilities checks 2901 */ 2902 if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || 2903 !bpf_token_allow_prog_type(token, attr->prog_type, 2904 attr->expected_attach_type)) { 2905 bpf_token_put(token); 2906 token = NULL; 2907 } 2908 } 2909 2910 bpf_cap = bpf_token_capable(token, CAP_BPF); 2911 err = -EPERM; 2912 2913 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2914 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2915 !bpf_cap) 2916 goto put_token; 2917 2918 /* Intent here is for unprivileged_bpf_disabled to block BPF program 2919 * creation for unprivileged users; other actions depend 2920 * on fd availability and access to bpffs, so are dependent on 2921 * object creation success. Even with unprivileged BPF disabled, 2922 * capability checks are still carried out for these 2923 * and other operations. 2924 */ 2925 if (sysctl_unprivileged_bpf_disabled && !bpf_cap) 2926 goto put_token; 2927 2928 if (attr->insn_cnt == 0 || 2929 attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2930 err = -E2BIG; 2931 goto put_token; 2932 } 2933 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2934 type != BPF_PROG_TYPE_CGROUP_SKB && 2935 !bpf_cap) 2936 goto put_token; 2937 2938 if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) 2939 goto put_token; 2940 if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) 2941 goto put_token; 2942 2943 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2944 * or btf, we need to check which one it is 2945 */ 2946 if (attr->attach_prog_fd) { 2947 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2948 if (IS_ERR(dst_prog)) { 2949 dst_prog = NULL; 2950 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2951 if (IS_ERR(attach_btf)) { 2952 err = -EINVAL; 2953 goto put_token; 2954 } 2955 if (!btf_is_kernel(attach_btf)) { 2956 /* attaching through specifying bpf_prog's BTF 2957 * objects directly might be supported eventually 2958 */ 2959 btf_put(attach_btf); 2960 err = -ENOTSUPP; 2961 goto put_token; 2962 } 2963 } 2964 } else if (attr->attach_btf_id) { 2965 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2966 attach_btf = bpf_get_btf_vmlinux(); 2967 if (IS_ERR(attach_btf)) { 2968 err = PTR_ERR(attach_btf); 2969 goto put_token; 2970 } 2971 if (!attach_btf) { 2972 err = -EINVAL; 2973 goto put_token; 2974 } 2975 btf_get(attach_btf); 2976 } 2977 2978 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2979 attach_btf, attr->attach_btf_id, 2980 dst_prog)) { 2981 if (dst_prog) 2982 bpf_prog_put(dst_prog); 2983 if (attach_btf) 2984 btf_put(attach_btf); 2985 err = -EINVAL; 2986 goto put_token; 2987 } 2988 2989 /* plain bpf_prog allocation */ 2990 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2991 if (!prog) { 2992 if (dst_prog) 2993 bpf_prog_put(dst_prog); 2994 if (attach_btf) 2995 btf_put(attach_btf); 2996 err = -EINVAL; 2997 goto put_token; 2998 } 2999 3000 prog->expected_attach_type = attr->expected_attach_type; 3001 prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); 3002 prog->aux->attach_btf = attach_btf; 3003 prog->aux->attach_btf_id = attr->attach_btf_id; 3004 prog->aux->dst_prog = dst_prog; 3005 prog->aux->dev_bound = !!attr->prog_ifindex; 3006 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 3007 3008 /* move token into prog->aux, reuse taken refcnt */ 3009 prog->aux->token = token; 3010 token = NULL; 3011 3012 prog->aux->user = get_current_user(); 3013 prog->len = attr->insn_cnt; 3014 3015 err = -EFAULT; 3016 if (copy_from_bpfptr(prog->insns, 3017 make_bpfptr(attr->insns, uattr.is_kernel), 3018 bpf_prog_insn_size(prog)) != 0) 3019 goto free_prog; 3020 /* copy eBPF program license from user space */ 3021 if (strncpy_from_bpfptr(license, 3022 make_bpfptr(attr->license, uattr.is_kernel), 3023 sizeof(license) - 1) < 0) 3024 goto free_prog; 3025 license[sizeof(license) - 1] = 0; 3026 3027 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 3028 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 3029 3030 if (attr->signature) { 3031 err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); 3032 if (err) 3033 goto free_prog; 3034 } 3035 3036 prog->orig_prog = NULL; 3037 prog->jited = 0; 3038 3039 atomic64_set(&prog->aux->refcnt, 1); 3040 3041 if (bpf_prog_is_dev_bound(prog->aux)) { 3042 err = bpf_prog_dev_bound_init(prog, attr); 3043 if (err) 3044 goto free_prog; 3045 } 3046 3047 if (type == BPF_PROG_TYPE_EXT && dst_prog && 3048 bpf_prog_is_dev_bound(dst_prog->aux)) { 3049 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 3050 if (err) 3051 goto free_prog; 3052 } 3053 3054 /* 3055 * Bookkeeping for managing the program attachment chain. 3056 * 3057 * It might be tempting to set attach_tracing_prog flag at the attachment 3058 * time, but this will not prevent from loading bunch of tracing prog 3059 * first, then attach them one to another. 3060 * 3061 * The flag attach_tracing_prog is set for the whole program lifecycle, and 3062 * doesn't have to be cleared in bpf_tracing_link_release, since tracing 3063 * programs cannot change attachment target. 3064 */ 3065 if (type == BPF_PROG_TYPE_TRACING && dst_prog && 3066 dst_prog->type == BPF_PROG_TYPE_TRACING) { 3067 prog->aux->attach_tracing_prog = true; 3068 } 3069 3070 /* find program type: socket_filter vs tracing_filter */ 3071 err = find_prog_type(type, prog); 3072 if (err < 0) 3073 goto free_prog; 3074 3075 prog->aux->load_time = ktime_get_boottime_ns(); 3076 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 3077 sizeof(attr->prog_name)); 3078 if (err < 0) 3079 goto free_prog; 3080 3081 err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 3082 if (err) 3083 goto free_prog_sec; 3084 3085 /* run eBPF verifier */ 3086 err = bpf_check(&prog, attr, uattr, uattr_size); 3087 if (err < 0) 3088 goto free_used_maps; 3089 3090 prog = bpf_prog_select_runtime(prog, &err); 3091 if (err < 0) 3092 goto free_used_maps; 3093 3094 err = bpf_prog_mark_insn_arrays_ready(prog); 3095 if (err < 0) 3096 goto free_used_maps; 3097 3098 err = bpf_prog_alloc_id(prog); 3099 if (err) 3100 goto free_used_maps; 3101 3102 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 3103 * effectively publicly exposed. However, retrieving via 3104 * bpf_prog_get_fd_by_id() will take another reference, 3105 * therefore it cannot be gone underneath us. 3106 * 3107 * Only for the time /after/ successful bpf_prog_new_fd() 3108 * and before returning to userspace, we might just hold 3109 * one reference and any parallel close on that fd could 3110 * rip everything out. Hence, below notifications must 3111 * happen before bpf_prog_new_fd(). 3112 * 3113 * Also, any failure handling from this point onwards must 3114 * be using bpf_prog_put() given the program is exposed. 3115 */ 3116 bpf_prog_kallsyms_add(prog); 3117 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 3118 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 3119 3120 err = bpf_prog_new_fd(prog); 3121 if (err < 0) 3122 bpf_prog_put(prog); 3123 return err; 3124 3125 free_used_maps: 3126 /* In case we have subprogs, we need to wait for a grace 3127 * period before we can tear down JIT memory since symbols 3128 * are already exposed under kallsyms. 3129 */ 3130 __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); 3131 return err; 3132 3133 free_prog_sec: 3134 security_bpf_prog_free(prog); 3135 free_prog: 3136 free_uid(prog->aux->user); 3137 if (prog->aux->attach_btf) 3138 btf_put(prog->aux->attach_btf); 3139 bpf_prog_free(prog); 3140 put_token: 3141 bpf_token_put(token); 3142 return err; 3143 } 3144 3145 #define BPF_OBJ_LAST_FIELD path_fd 3146 3147 static int bpf_obj_pin(const union bpf_attr *attr) 3148 { 3149 int path_fd; 3150 3151 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 3152 return -EINVAL; 3153 3154 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3155 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3156 return -EINVAL; 3157 3158 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3159 return bpf_obj_pin_user(attr->bpf_fd, path_fd, 3160 u64_to_user_ptr(attr->pathname)); 3161 } 3162 3163 static int bpf_obj_get(const union bpf_attr *attr) 3164 { 3165 int path_fd; 3166 3167 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 3168 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 3169 return -EINVAL; 3170 3171 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 3172 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 3173 return -EINVAL; 3174 3175 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 3176 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 3177 attr->file_flags); 3178 } 3179 3180 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has 3181 * "sleepable" semantics, which normally would mean that BPF link's attach 3182 * hook can dereference link or link's underlying program for some time after 3183 * detachment due to RCU Tasks Trace-based lifetime protection scheme. 3184 * BPF program itself can be non-sleepable, yet, because it's transitively 3185 * reachable through BPF link, its freeing has to be delayed until after RCU 3186 * Tasks Trace GP. 3187 */ 3188 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, 3189 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3190 enum bpf_attach_type attach_type, bool sleepable) 3191 { 3192 WARN_ON(ops->dealloc && ops->dealloc_deferred); 3193 atomic64_set(&link->refcnt, 1); 3194 link->type = type; 3195 link->sleepable = sleepable; 3196 link->id = 0; 3197 link->ops = ops; 3198 link->prog = prog; 3199 link->attach_type = attach_type; 3200 } 3201 3202 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 3203 const struct bpf_link_ops *ops, struct bpf_prog *prog, 3204 enum bpf_attach_type attach_type) 3205 { 3206 bpf_link_init_sleepable(link, type, ops, prog, attach_type, false); 3207 } 3208 3209 static void bpf_link_free_id(int id) 3210 { 3211 if (!id) 3212 return; 3213 3214 spin_lock_bh(&link_idr_lock); 3215 idr_remove(&link_idr, id); 3216 spin_unlock_bh(&link_idr_lock); 3217 } 3218 3219 /* Clean up bpf_link and corresponding anon_inode file and FD. After 3220 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 3221 * anon_inode's release() call. This helper marks bpf_link as 3222 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 3223 * is not decremented, it's the responsibility of a calling code that failed 3224 * to complete bpf_link initialization. 3225 * This helper eventually calls link's dealloc callback, but does not call 3226 * link's release callback. 3227 */ 3228 void bpf_link_cleanup(struct bpf_link_primer *primer) 3229 { 3230 primer->link->prog = NULL; 3231 bpf_link_free_id(primer->id); 3232 fput(primer->file); 3233 put_unused_fd(primer->fd); 3234 } 3235 3236 void bpf_link_inc(struct bpf_link *link) 3237 { 3238 atomic64_inc(&link->refcnt); 3239 } 3240 3241 static void bpf_link_dealloc(struct bpf_link *link) 3242 { 3243 /* now that we know that bpf_link itself can't be reached, put underlying BPF program */ 3244 if (link->prog) 3245 bpf_prog_put(link->prog); 3246 3247 /* free bpf_link and its containing memory */ 3248 if (link->ops->dealloc_deferred) 3249 link->ops->dealloc_deferred(link); 3250 else 3251 link->ops->dealloc(link); 3252 } 3253 3254 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) 3255 { 3256 struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); 3257 3258 bpf_link_dealloc(link); 3259 } 3260 3261 static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) 3262 { 3263 if (rcu_trace_implies_rcu_gp()) 3264 bpf_link_defer_dealloc_rcu_gp(rcu); 3265 else 3266 call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); 3267 } 3268 3269 /* bpf_link_free is guaranteed to be called from process context */ 3270 static void bpf_link_free(struct bpf_link *link) 3271 { 3272 const struct bpf_link_ops *ops = link->ops; 3273 3274 bpf_link_free_id(link->id); 3275 /* detach BPF program, clean up used resources */ 3276 if (link->prog) 3277 ops->release(link); 3278 if (ops->dealloc_deferred) { 3279 /* Schedule BPF link deallocation, which will only then 3280 * trigger putting BPF program refcount. 3281 * If underlying BPF program is sleepable or BPF link's target 3282 * attach hookpoint is sleepable or otherwise requires RCU GPs 3283 * to ensure link and its underlying BPF program is not 3284 * reachable anymore, we need to first wait for RCU tasks 3285 * trace sync, and then go through "classic" RCU grace period 3286 */ 3287 if (link->sleepable || (link->prog && link->prog->sleepable)) 3288 call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); 3289 else 3290 call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); 3291 } else if (ops->dealloc) { 3292 bpf_link_dealloc(link); 3293 } 3294 } 3295 3296 static void bpf_link_put_deferred(struct work_struct *work) 3297 { 3298 struct bpf_link *link = container_of(work, struct bpf_link, work); 3299 3300 bpf_link_free(link); 3301 } 3302 3303 /* bpf_link_put might be called from atomic context. It needs to be called 3304 * from sleepable context in order to acquire sleeping locks during the process. 3305 */ 3306 void bpf_link_put(struct bpf_link *link) 3307 { 3308 if (!atomic64_dec_and_test(&link->refcnt)) 3309 return; 3310 3311 INIT_WORK(&link->work, bpf_link_put_deferred); 3312 schedule_work(&link->work); 3313 } 3314 EXPORT_SYMBOL(bpf_link_put); 3315 3316 static void bpf_link_put_direct(struct bpf_link *link) 3317 { 3318 if (!atomic64_dec_and_test(&link->refcnt)) 3319 return; 3320 bpf_link_free(link); 3321 } 3322 3323 static int bpf_link_release(struct inode *inode, struct file *filp) 3324 { 3325 struct bpf_link *link = filp->private_data; 3326 3327 bpf_link_put_direct(link); 3328 return 0; 3329 } 3330 3331 #ifdef CONFIG_PROC_FS 3332 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 3333 #define BPF_MAP_TYPE(_id, _ops) 3334 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 3335 static const char *bpf_link_type_strs[] = { 3336 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 3337 #include <linux/bpf_types.h> 3338 }; 3339 #undef BPF_PROG_TYPE 3340 #undef BPF_MAP_TYPE 3341 #undef BPF_LINK_TYPE 3342 3343 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 3344 { 3345 const struct bpf_link *link = filp->private_data; 3346 const struct bpf_prog *prog = link->prog; 3347 enum bpf_link_type type = link->type; 3348 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 3349 3350 if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { 3351 if (link->type == BPF_LINK_TYPE_KPROBE_MULTI) 3352 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ? 3353 "kretprobe_multi" : "kprobe_multi"); 3354 else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI) 3355 seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ? 3356 "uretprobe_multi" : "uprobe_multi"); 3357 else 3358 seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); 3359 } else { 3360 WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); 3361 seq_printf(m, "link_type:\t<%u>\n", type); 3362 } 3363 seq_printf(m, "link_id:\t%u\n", link->id); 3364 3365 if (prog) { 3366 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 3367 seq_printf(m, 3368 "prog_tag:\t%s\n" 3369 "prog_id:\t%u\n", 3370 prog_tag, 3371 prog->aux->id); 3372 } 3373 if (link->ops->show_fdinfo) 3374 link->ops->show_fdinfo(link, m); 3375 } 3376 #endif 3377 3378 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) 3379 { 3380 struct bpf_link *link = file->private_data; 3381 3382 return link->ops->poll(file, pts); 3383 } 3384 3385 static const struct file_operations bpf_link_fops = { 3386 #ifdef CONFIG_PROC_FS 3387 .show_fdinfo = bpf_link_show_fdinfo, 3388 #endif 3389 .release = bpf_link_release, 3390 .read = bpf_dummy_read, 3391 .write = bpf_dummy_write, 3392 }; 3393 3394 static const struct file_operations bpf_link_fops_poll = { 3395 #ifdef CONFIG_PROC_FS 3396 .show_fdinfo = bpf_link_show_fdinfo, 3397 #endif 3398 .release = bpf_link_release, 3399 .read = bpf_dummy_read, 3400 .write = bpf_dummy_write, 3401 .poll = bpf_link_poll, 3402 }; 3403 3404 static int bpf_link_alloc_id(struct bpf_link *link) 3405 { 3406 int id; 3407 3408 idr_preload(GFP_KERNEL); 3409 spin_lock_bh(&link_idr_lock); 3410 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 3411 spin_unlock_bh(&link_idr_lock); 3412 idr_preload_end(); 3413 3414 return id; 3415 } 3416 3417 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 3418 * reserving unused FD and allocating ID from link_idr. This is to be paired 3419 * with bpf_link_settle() to install FD and ID and expose bpf_link to 3420 * user-space, if bpf_link is successfully attached. If not, bpf_link and 3421 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 3422 * transient state is passed around in struct bpf_link_primer. 3423 * This is preferred way to create and initialize bpf_link, especially when 3424 * there are complicated and expensive operations in between creating bpf_link 3425 * itself and attaching it to BPF hook. By using bpf_link_prime() and 3426 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 3427 * expensive (and potentially failing) roll back operations in a rare case 3428 * that file, FD, or ID can't be allocated. 3429 */ 3430 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 3431 { 3432 struct file *file; 3433 int fd, id; 3434 3435 fd = get_unused_fd_flags(O_CLOEXEC); 3436 if (fd < 0) 3437 return fd; 3438 3439 3440 id = bpf_link_alloc_id(link); 3441 if (id < 0) { 3442 put_unused_fd(fd); 3443 return id; 3444 } 3445 3446 file = anon_inode_getfile("bpf_link", 3447 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3448 link, O_CLOEXEC); 3449 if (IS_ERR(file)) { 3450 bpf_link_free_id(id); 3451 put_unused_fd(fd); 3452 return PTR_ERR(file); 3453 } 3454 3455 primer->link = link; 3456 primer->file = file; 3457 primer->fd = fd; 3458 primer->id = id; 3459 return 0; 3460 } 3461 3462 int bpf_link_settle(struct bpf_link_primer *primer) 3463 { 3464 /* make bpf_link fetchable by ID */ 3465 spin_lock_bh(&link_idr_lock); 3466 primer->link->id = primer->id; 3467 spin_unlock_bh(&link_idr_lock); 3468 /* make bpf_link fetchable by FD */ 3469 fd_install(primer->fd, primer->file); 3470 /* pass through installed FD */ 3471 return primer->fd; 3472 } 3473 3474 int bpf_link_new_fd(struct bpf_link *link) 3475 { 3476 return anon_inode_getfd("bpf-link", 3477 link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, 3478 link, O_CLOEXEC); 3479 } 3480 3481 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 3482 { 3483 CLASS(fd, f)(ufd); 3484 struct bpf_link *link; 3485 3486 if (fd_empty(f)) 3487 return ERR_PTR(-EBADF); 3488 if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) 3489 return ERR_PTR(-EINVAL); 3490 3491 link = fd_file(f)->private_data; 3492 bpf_link_inc(link); 3493 return link; 3494 } 3495 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); 3496 3497 static void bpf_tracing_link_release(struct bpf_link *link) 3498 { 3499 struct bpf_tracing_link *tr_link = 3500 container_of(link, struct bpf_tracing_link, link.link); 3501 3502 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3503 tr_link->trampoline, 3504 tr_link->tgt_prog)); 3505 3506 bpf_trampoline_put(tr_link->trampoline); 3507 3508 /* tgt_prog is NULL if target is a kernel function */ 3509 if (tr_link->tgt_prog) 3510 bpf_prog_put(tr_link->tgt_prog); 3511 } 3512 3513 static void bpf_tracing_link_dealloc(struct bpf_link *link) 3514 { 3515 struct bpf_tracing_link *tr_link = 3516 container_of(link, struct bpf_tracing_link, link.link); 3517 3518 kfree(tr_link); 3519 } 3520 3521 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 3522 struct seq_file *seq) 3523 { 3524 struct bpf_tracing_link *tr_link = 3525 container_of(link, struct bpf_tracing_link, link.link); 3526 u32 target_btf_id, target_obj_id; 3527 3528 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3529 &target_obj_id, &target_btf_id); 3530 seq_printf(seq, 3531 "attach_type:\t%d\n" 3532 "target_obj_id:\t%u\n" 3533 "target_btf_id:\t%u\n" 3534 "cookie:\t%llu\n", 3535 link->attach_type, 3536 target_obj_id, 3537 target_btf_id, 3538 tr_link->link.cookie); 3539 } 3540 3541 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 3542 struct bpf_link_info *info) 3543 { 3544 struct bpf_tracing_link *tr_link = 3545 container_of(link, struct bpf_tracing_link, link.link); 3546 3547 info->tracing.attach_type = link->attach_type; 3548 info->tracing.cookie = tr_link->link.cookie; 3549 bpf_trampoline_unpack_key(tr_link->trampoline->key, 3550 &info->tracing.target_obj_id, 3551 &info->tracing.target_btf_id); 3552 3553 return 0; 3554 } 3555 3556 static const struct bpf_link_ops bpf_tracing_link_lops = { 3557 .release = bpf_tracing_link_release, 3558 .dealloc = bpf_tracing_link_dealloc, 3559 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3560 .fill_link_info = bpf_tracing_link_fill_link_info, 3561 }; 3562 3563 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3564 int tgt_prog_fd, 3565 u32 btf_id, 3566 u64 bpf_cookie, 3567 enum bpf_attach_type attach_type) 3568 { 3569 struct bpf_link_primer link_primer; 3570 struct bpf_prog *tgt_prog = NULL; 3571 struct bpf_trampoline *tr = NULL; 3572 struct bpf_tracing_link *link; 3573 u64 key = 0; 3574 int err; 3575 3576 switch (prog->type) { 3577 case BPF_PROG_TYPE_TRACING: 3578 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3579 prog->expected_attach_type != BPF_TRACE_FEXIT && 3580 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3581 err = -EINVAL; 3582 goto out_put_prog; 3583 } 3584 break; 3585 case BPF_PROG_TYPE_EXT: 3586 if (prog->expected_attach_type != 0) { 3587 err = -EINVAL; 3588 goto out_put_prog; 3589 } 3590 break; 3591 case BPF_PROG_TYPE_LSM: 3592 if (prog->expected_attach_type != BPF_LSM_MAC) { 3593 err = -EINVAL; 3594 goto out_put_prog; 3595 } 3596 break; 3597 default: 3598 err = -EINVAL; 3599 goto out_put_prog; 3600 } 3601 3602 if (!!tgt_prog_fd != !!btf_id) { 3603 err = -EINVAL; 3604 goto out_put_prog; 3605 } 3606 3607 if (tgt_prog_fd) { 3608 /* 3609 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this 3610 * part would be changed to implement the same for 3611 * BPF_PROG_TYPE_TRACING, do not forget to update the way how 3612 * attach_tracing_prog flag is set. 3613 */ 3614 if (prog->type != BPF_PROG_TYPE_EXT) { 3615 err = -EINVAL; 3616 goto out_put_prog; 3617 } 3618 3619 tgt_prog = bpf_prog_get(tgt_prog_fd); 3620 if (IS_ERR(tgt_prog)) { 3621 err = PTR_ERR(tgt_prog); 3622 tgt_prog = NULL; 3623 goto out_put_prog; 3624 } 3625 3626 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3627 } 3628 3629 link = kzalloc(sizeof(*link), GFP_USER); 3630 if (!link) { 3631 err = -ENOMEM; 3632 goto out_put_prog; 3633 } 3634 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3635 &bpf_tracing_link_lops, prog, attach_type); 3636 3637 link->link.cookie = bpf_cookie; 3638 3639 mutex_lock(&prog->aux->dst_mutex); 3640 3641 /* There are a few possible cases here: 3642 * 3643 * - if prog->aux->dst_trampoline is set, the program was just loaded 3644 * and not yet attached to anything, so we can use the values stored 3645 * in prog->aux 3646 * 3647 * - if prog->aux->dst_trampoline is NULL, the program has already been 3648 * attached to a target and its initial target was cleared (below) 3649 * 3650 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3651 * target_btf_id using the link_create API. 3652 * 3653 * - if tgt_prog == NULL when this function was called using the old 3654 * raw_tracepoint_open API, and we need a target from prog->aux 3655 * 3656 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3657 * was detached and is going for re-attachment. 3658 * 3659 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf 3660 * are NULL, then program was already attached and user did not provide 3661 * tgt_prog_fd so we have no way to find out or create trampoline 3662 */ 3663 if (!prog->aux->dst_trampoline && !tgt_prog) { 3664 /* 3665 * Allow re-attach for TRACING and LSM programs. If it's 3666 * currently linked, bpf_trampoline_link_prog will fail. 3667 * EXT programs need to specify tgt_prog_fd, so they 3668 * re-attach in separate code path. 3669 */ 3670 if (prog->type != BPF_PROG_TYPE_TRACING && 3671 prog->type != BPF_PROG_TYPE_LSM) { 3672 err = -EINVAL; 3673 goto out_unlock; 3674 } 3675 /* We can allow re-attach only if we have valid attach_btf. */ 3676 if (!prog->aux->attach_btf) { 3677 err = -EINVAL; 3678 goto out_unlock; 3679 } 3680 btf_id = prog->aux->attach_btf_id; 3681 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3682 } 3683 3684 if (!prog->aux->dst_trampoline || 3685 (key && key != prog->aux->dst_trampoline->key)) { 3686 /* If there is no saved target, or the specified target is 3687 * different from the destination specified at load time, we 3688 * need a new trampoline and a check for compatibility 3689 */ 3690 struct bpf_attach_target_info tgt_info = {}; 3691 3692 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3693 &tgt_info); 3694 if (err) 3695 goto out_unlock; 3696 3697 if (tgt_info.tgt_mod) { 3698 module_put(prog->aux->mod); 3699 prog->aux->mod = tgt_info.tgt_mod; 3700 } 3701 3702 tr = bpf_trampoline_get(key, &tgt_info); 3703 if (!tr) { 3704 err = -ENOMEM; 3705 goto out_unlock; 3706 } 3707 } else { 3708 /* The caller didn't specify a target, or the target was the 3709 * same as the destination supplied during program load. This 3710 * means we can reuse the trampoline and reference from program 3711 * load time, and there is no need to allocate a new one. This 3712 * can only happen once for any program, as the saved values in 3713 * prog->aux are cleared below. 3714 */ 3715 tr = prog->aux->dst_trampoline; 3716 tgt_prog = prog->aux->dst_prog; 3717 } 3718 3719 err = bpf_link_prime(&link->link.link, &link_primer); 3720 if (err) 3721 goto out_unlock; 3722 3723 err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3724 if (err) { 3725 bpf_link_cleanup(&link_primer); 3726 link = NULL; 3727 goto out_unlock; 3728 } 3729 3730 link->tgt_prog = tgt_prog; 3731 link->trampoline = tr; 3732 3733 /* Always clear the trampoline and target prog from prog->aux to make 3734 * sure the original attach destination is not kept alive after a 3735 * program is (re-)attached to another target. 3736 */ 3737 if (prog->aux->dst_prog && 3738 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3739 /* got extra prog ref from syscall, or attaching to different prog */ 3740 bpf_prog_put(prog->aux->dst_prog); 3741 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3742 /* we allocated a new trampoline, so free the old one */ 3743 bpf_trampoline_put(prog->aux->dst_trampoline); 3744 3745 prog->aux->dst_prog = NULL; 3746 prog->aux->dst_trampoline = NULL; 3747 mutex_unlock(&prog->aux->dst_mutex); 3748 3749 return bpf_link_settle(&link_primer); 3750 out_unlock: 3751 if (tr && tr != prog->aux->dst_trampoline) 3752 bpf_trampoline_put(tr); 3753 mutex_unlock(&prog->aux->dst_mutex); 3754 kfree(link); 3755 out_put_prog: 3756 if (tgt_prog_fd && tgt_prog) 3757 bpf_prog_put(tgt_prog); 3758 return err; 3759 } 3760 3761 static void bpf_raw_tp_link_release(struct bpf_link *link) 3762 { 3763 struct bpf_raw_tp_link *raw_tp = 3764 container_of(link, struct bpf_raw_tp_link, link); 3765 3766 bpf_probe_unregister(raw_tp->btp, raw_tp); 3767 bpf_put_raw_tracepoint(raw_tp->btp); 3768 } 3769 3770 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3771 { 3772 struct bpf_raw_tp_link *raw_tp = 3773 container_of(link, struct bpf_raw_tp_link, link); 3774 3775 kfree(raw_tp); 3776 } 3777 3778 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3779 struct seq_file *seq) 3780 { 3781 struct bpf_raw_tp_link *raw_tp_link = 3782 container_of(link, struct bpf_raw_tp_link, link); 3783 3784 seq_printf(seq, 3785 "tp_name:\t%s\n" 3786 "cookie:\t%llu\n", 3787 raw_tp_link->btp->tp->name, 3788 raw_tp_link->cookie); 3789 } 3790 3791 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen, 3792 u32 len) 3793 { 3794 if (ulen >= len + 1) { 3795 if (copy_to_user(ubuf, buf, len + 1)) 3796 return -EFAULT; 3797 } else { 3798 char zero = '\0'; 3799 3800 if (copy_to_user(ubuf, buf, ulen - 1)) 3801 return -EFAULT; 3802 if (put_user(zero, ubuf + ulen - 1)) 3803 return -EFAULT; 3804 return -ENOSPC; 3805 } 3806 3807 return 0; 3808 } 3809 3810 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3811 struct bpf_link_info *info) 3812 { 3813 struct bpf_raw_tp_link *raw_tp_link = 3814 container_of(link, struct bpf_raw_tp_link, link); 3815 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3816 const char *tp_name = raw_tp_link->btp->tp->name; 3817 u32 ulen = info->raw_tracepoint.tp_name_len; 3818 size_t tp_len = strlen(tp_name); 3819 3820 if (!ulen ^ !ubuf) 3821 return -EINVAL; 3822 3823 info->raw_tracepoint.tp_name_len = tp_len + 1; 3824 info->raw_tracepoint.cookie = raw_tp_link->cookie; 3825 3826 if (!ubuf) 3827 return 0; 3828 3829 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len); 3830 } 3831 3832 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3833 .release = bpf_raw_tp_link_release, 3834 .dealloc_deferred = bpf_raw_tp_link_dealloc, 3835 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3836 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3837 }; 3838 3839 #ifdef CONFIG_PERF_EVENTS 3840 struct bpf_perf_link { 3841 struct bpf_link link; 3842 struct file *perf_file; 3843 }; 3844 3845 static void bpf_perf_link_release(struct bpf_link *link) 3846 { 3847 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3848 struct perf_event *event = perf_link->perf_file->private_data; 3849 3850 perf_event_free_bpf_prog(event); 3851 fput(perf_link->perf_file); 3852 } 3853 3854 static void bpf_perf_link_dealloc(struct bpf_link *link) 3855 { 3856 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3857 3858 kfree(perf_link); 3859 } 3860 3861 static int bpf_perf_link_fill_common(const struct perf_event *event, 3862 char __user *uname, u32 *ulenp, 3863 u64 *probe_offset, u64 *probe_addr, 3864 u32 *fd_type, unsigned long *missed) 3865 { 3866 const char *buf; 3867 u32 prog_id, ulen; 3868 size_t len; 3869 int err; 3870 3871 ulen = *ulenp; 3872 if (!ulen ^ !uname) 3873 return -EINVAL; 3874 3875 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, 3876 probe_offset, probe_addr, missed); 3877 if (err) 3878 return err; 3879 3880 if (buf) { 3881 len = strlen(buf); 3882 *ulenp = len + 1; 3883 } else { 3884 *ulenp = 1; 3885 } 3886 if (!uname) 3887 return 0; 3888 3889 if (buf) { 3890 err = bpf_copy_to_user(uname, buf, ulen, len); 3891 if (err) 3892 return err; 3893 } else { 3894 char zero = '\0'; 3895 3896 if (put_user(zero, uname)) 3897 return -EFAULT; 3898 } 3899 return 0; 3900 } 3901 3902 #ifdef CONFIG_KPROBE_EVENTS 3903 static int bpf_perf_link_fill_kprobe(const struct perf_event *event, 3904 struct bpf_link_info *info) 3905 { 3906 unsigned long missed; 3907 char __user *uname; 3908 u64 addr, offset; 3909 u32 ulen, type; 3910 int err; 3911 3912 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); 3913 ulen = info->perf_event.kprobe.name_len; 3914 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, 3915 &type, &missed); 3916 if (err) 3917 return err; 3918 if (type == BPF_FD_TYPE_KRETPROBE) 3919 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE; 3920 else 3921 info->perf_event.type = BPF_PERF_EVENT_KPROBE; 3922 info->perf_event.kprobe.name_len = ulen; 3923 info->perf_event.kprobe.offset = offset; 3924 info->perf_event.kprobe.missed = missed; 3925 if (!kallsyms_show_value(current_cred())) 3926 addr = 0; 3927 info->perf_event.kprobe.addr = addr; 3928 info->perf_event.kprobe.cookie = event->bpf_cookie; 3929 return 0; 3930 } 3931 3932 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event, 3933 struct seq_file *seq) 3934 { 3935 const char *name; 3936 int err; 3937 u32 prog_id, type; 3938 u64 offset, addr; 3939 unsigned long missed; 3940 3941 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3942 &offset, &addr, &missed); 3943 if (err) 3944 return; 3945 3946 seq_printf(seq, 3947 "name:\t%s\n" 3948 "offset:\t%#llx\n" 3949 "missed:\t%lu\n" 3950 "addr:\t%#llx\n" 3951 "event_type:\t%s\n" 3952 "cookie:\t%llu\n", 3953 name, offset, missed, addr, 3954 type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe", 3955 event->bpf_cookie); 3956 } 3957 #endif 3958 3959 #ifdef CONFIG_UPROBE_EVENTS 3960 static int bpf_perf_link_fill_uprobe(const struct perf_event *event, 3961 struct bpf_link_info *info) 3962 { 3963 u64 ref_ctr_offset, offset; 3964 char __user *uname; 3965 u32 ulen, type; 3966 int err; 3967 3968 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); 3969 ulen = info->perf_event.uprobe.name_len; 3970 err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, 3971 &type, NULL); 3972 if (err) 3973 return err; 3974 3975 if (type == BPF_FD_TYPE_URETPROBE) 3976 info->perf_event.type = BPF_PERF_EVENT_URETPROBE; 3977 else 3978 info->perf_event.type = BPF_PERF_EVENT_UPROBE; 3979 info->perf_event.uprobe.name_len = ulen; 3980 info->perf_event.uprobe.offset = offset; 3981 info->perf_event.uprobe.cookie = event->bpf_cookie; 3982 info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; 3983 return 0; 3984 } 3985 3986 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event, 3987 struct seq_file *seq) 3988 { 3989 const char *name; 3990 int err; 3991 u32 prog_id, type; 3992 u64 offset, ref_ctr_offset; 3993 unsigned long missed; 3994 3995 err = bpf_get_perf_event_info(event, &prog_id, &type, &name, 3996 &offset, &ref_ctr_offset, &missed); 3997 if (err) 3998 return; 3999 4000 seq_printf(seq, 4001 "name:\t%s\n" 4002 "offset:\t%#llx\n" 4003 "ref_ctr_offset:\t%#llx\n" 4004 "event_type:\t%s\n" 4005 "cookie:\t%llu\n", 4006 name, offset, ref_ctr_offset, 4007 type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe", 4008 event->bpf_cookie); 4009 } 4010 #endif 4011 4012 static int bpf_perf_link_fill_probe(const struct perf_event *event, 4013 struct bpf_link_info *info) 4014 { 4015 #ifdef CONFIG_KPROBE_EVENTS 4016 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4017 return bpf_perf_link_fill_kprobe(event, info); 4018 #endif 4019 #ifdef CONFIG_UPROBE_EVENTS 4020 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4021 return bpf_perf_link_fill_uprobe(event, info); 4022 #endif 4023 return -EOPNOTSUPP; 4024 } 4025 4026 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, 4027 struct bpf_link_info *info) 4028 { 4029 char __user *uname; 4030 u32 ulen; 4031 int err; 4032 4033 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); 4034 ulen = info->perf_event.tracepoint.name_len; 4035 err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL); 4036 if (err) 4037 return err; 4038 4039 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; 4040 info->perf_event.tracepoint.name_len = ulen; 4041 info->perf_event.tracepoint.cookie = event->bpf_cookie; 4042 return 0; 4043 } 4044 4045 static int bpf_perf_link_fill_perf_event(const struct perf_event *event, 4046 struct bpf_link_info *info) 4047 { 4048 info->perf_event.event.type = event->attr.type; 4049 info->perf_event.event.config = event->attr.config; 4050 info->perf_event.event.cookie = event->bpf_cookie; 4051 info->perf_event.type = BPF_PERF_EVENT_EVENT; 4052 return 0; 4053 } 4054 4055 static int bpf_perf_link_fill_link_info(const struct bpf_link *link, 4056 struct bpf_link_info *info) 4057 { 4058 struct bpf_perf_link *perf_link; 4059 const struct perf_event *event; 4060 4061 perf_link = container_of(link, struct bpf_perf_link, link); 4062 event = perf_get_event(perf_link->perf_file); 4063 if (IS_ERR(event)) 4064 return PTR_ERR(event); 4065 4066 switch (event->prog->type) { 4067 case BPF_PROG_TYPE_PERF_EVENT: 4068 return bpf_perf_link_fill_perf_event(event, info); 4069 case BPF_PROG_TYPE_TRACEPOINT: 4070 return bpf_perf_link_fill_tracepoint(event, info); 4071 case BPF_PROG_TYPE_KPROBE: 4072 return bpf_perf_link_fill_probe(event, info); 4073 default: 4074 return -EOPNOTSUPP; 4075 } 4076 } 4077 4078 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event, 4079 struct seq_file *seq) 4080 { 4081 seq_printf(seq, 4082 "type:\t%u\n" 4083 "config:\t%llu\n" 4084 "event_type:\t%s\n" 4085 "cookie:\t%llu\n", 4086 event->attr.type, event->attr.config, 4087 "event", event->bpf_cookie); 4088 } 4089 4090 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event, 4091 struct seq_file *seq) 4092 { 4093 int err; 4094 const char *name; 4095 u32 prog_id; 4096 4097 err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL, 4098 NULL, NULL); 4099 if (err) 4100 return; 4101 4102 seq_printf(seq, 4103 "tp_name:\t%s\n" 4104 "event_type:\t%s\n" 4105 "cookie:\t%llu\n", 4106 name, "tracepoint", event->bpf_cookie); 4107 } 4108 4109 static void bpf_probe_link_show_fdinfo(const struct perf_event *event, 4110 struct seq_file *seq) 4111 { 4112 #ifdef CONFIG_KPROBE_EVENTS 4113 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE) 4114 return bpf_perf_link_fdinfo_kprobe(event, seq); 4115 #endif 4116 4117 #ifdef CONFIG_UPROBE_EVENTS 4118 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE) 4119 return bpf_perf_link_fdinfo_uprobe(event, seq); 4120 #endif 4121 } 4122 4123 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link, 4124 struct seq_file *seq) 4125 { 4126 struct bpf_perf_link *perf_link; 4127 const struct perf_event *event; 4128 4129 perf_link = container_of(link, struct bpf_perf_link, link); 4130 event = perf_get_event(perf_link->perf_file); 4131 if (IS_ERR(event)) 4132 return; 4133 4134 switch (event->prog->type) { 4135 case BPF_PROG_TYPE_PERF_EVENT: 4136 return bpf_perf_event_link_show_fdinfo(event, seq); 4137 case BPF_PROG_TYPE_TRACEPOINT: 4138 return bpf_tracepoint_link_show_fdinfo(event, seq); 4139 case BPF_PROG_TYPE_KPROBE: 4140 return bpf_probe_link_show_fdinfo(event, seq); 4141 default: 4142 return; 4143 } 4144 } 4145 4146 static const struct bpf_link_ops bpf_perf_link_lops = { 4147 .release = bpf_perf_link_release, 4148 .dealloc = bpf_perf_link_dealloc, 4149 .fill_link_info = bpf_perf_link_fill_link_info, 4150 .show_fdinfo = bpf_perf_link_show_fdinfo, 4151 }; 4152 4153 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4154 { 4155 struct bpf_link_primer link_primer; 4156 struct bpf_perf_link *link; 4157 struct perf_event *event; 4158 struct file *perf_file; 4159 int err; 4160 4161 if (attr->link_create.flags) 4162 return -EINVAL; 4163 4164 perf_file = perf_event_get(attr->link_create.target_fd); 4165 if (IS_ERR(perf_file)) 4166 return PTR_ERR(perf_file); 4167 4168 link = kzalloc(sizeof(*link), GFP_USER); 4169 if (!link) { 4170 err = -ENOMEM; 4171 goto out_put_file; 4172 } 4173 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog, 4174 attr->link_create.attach_type); 4175 link->perf_file = perf_file; 4176 4177 err = bpf_link_prime(&link->link, &link_primer); 4178 if (err) { 4179 kfree(link); 4180 goto out_put_file; 4181 } 4182 4183 event = perf_file->private_data; 4184 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 4185 if (err) { 4186 bpf_link_cleanup(&link_primer); 4187 goto out_put_file; 4188 } 4189 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 4190 bpf_prog_inc(prog); 4191 4192 return bpf_link_settle(&link_primer); 4193 4194 out_put_file: 4195 fput(perf_file); 4196 return err; 4197 } 4198 #else 4199 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4200 { 4201 return -EOPNOTSUPP; 4202 } 4203 #endif /* CONFIG_PERF_EVENTS */ 4204 4205 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 4206 const char __user *user_tp_name, u64 cookie, 4207 enum bpf_attach_type attach_type) 4208 { 4209 struct bpf_link_primer link_primer; 4210 struct bpf_raw_tp_link *link; 4211 struct bpf_raw_event_map *btp; 4212 const char *tp_name; 4213 char buf[128]; 4214 int err; 4215 4216 switch (prog->type) { 4217 case BPF_PROG_TYPE_TRACING: 4218 case BPF_PROG_TYPE_EXT: 4219 case BPF_PROG_TYPE_LSM: 4220 if (user_tp_name) 4221 /* The attach point for this category of programs 4222 * should be specified via btf_id during program load. 4223 */ 4224 return -EINVAL; 4225 if (prog->type == BPF_PROG_TYPE_TRACING && 4226 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 4227 tp_name = prog->aux->attach_func_name; 4228 break; 4229 } 4230 return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type); 4231 case BPF_PROG_TYPE_RAW_TRACEPOINT: 4232 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 4233 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 4234 return -EFAULT; 4235 buf[sizeof(buf) - 1] = 0; 4236 tp_name = buf; 4237 break; 4238 default: 4239 return -EINVAL; 4240 } 4241 4242 btp = bpf_get_raw_tracepoint(tp_name); 4243 if (!btp) 4244 return -ENOENT; 4245 4246 link = kzalloc(sizeof(*link), GFP_USER); 4247 if (!link) { 4248 err = -ENOMEM; 4249 goto out_put_btp; 4250 } 4251 bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 4252 &bpf_raw_tp_link_lops, prog, attach_type, 4253 tracepoint_is_faultable(btp->tp)); 4254 link->btp = btp; 4255 link->cookie = cookie; 4256 4257 err = bpf_link_prime(&link->link, &link_primer); 4258 if (err) { 4259 kfree(link); 4260 goto out_put_btp; 4261 } 4262 4263 err = bpf_probe_register(link->btp, link); 4264 if (err) { 4265 bpf_link_cleanup(&link_primer); 4266 goto out_put_btp; 4267 } 4268 4269 return bpf_link_settle(&link_primer); 4270 4271 out_put_btp: 4272 bpf_put_raw_tracepoint(btp); 4273 return err; 4274 } 4275 4276 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie 4277 4278 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 4279 { 4280 struct bpf_prog *prog; 4281 void __user *tp_name; 4282 __u64 cookie; 4283 int fd; 4284 4285 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 4286 return -EINVAL; 4287 4288 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 4289 if (IS_ERR(prog)) 4290 return PTR_ERR(prog); 4291 4292 tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); 4293 cookie = attr->raw_tracepoint.cookie; 4294 fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type); 4295 if (fd < 0) 4296 bpf_prog_put(prog); 4297 return fd; 4298 } 4299 4300 static enum bpf_prog_type 4301 attach_type_to_prog_type(enum bpf_attach_type attach_type) 4302 { 4303 switch (attach_type) { 4304 case BPF_CGROUP_INET_INGRESS: 4305 case BPF_CGROUP_INET_EGRESS: 4306 return BPF_PROG_TYPE_CGROUP_SKB; 4307 case BPF_CGROUP_INET_SOCK_CREATE: 4308 case BPF_CGROUP_INET_SOCK_RELEASE: 4309 case BPF_CGROUP_INET4_POST_BIND: 4310 case BPF_CGROUP_INET6_POST_BIND: 4311 return BPF_PROG_TYPE_CGROUP_SOCK; 4312 case BPF_CGROUP_INET4_BIND: 4313 case BPF_CGROUP_INET6_BIND: 4314 case BPF_CGROUP_INET4_CONNECT: 4315 case BPF_CGROUP_INET6_CONNECT: 4316 case BPF_CGROUP_UNIX_CONNECT: 4317 case BPF_CGROUP_INET4_GETPEERNAME: 4318 case BPF_CGROUP_INET6_GETPEERNAME: 4319 case BPF_CGROUP_UNIX_GETPEERNAME: 4320 case BPF_CGROUP_INET4_GETSOCKNAME: 4321 case BPF_CGROUP_INET6_GETSOCKNAME: 4322 case BPF_CGROUP_UNIX_GETSOCKNAME: 4323 case BPF_CGROUP_UDP4_SENDMSG: 4324 case BPF_CGROUP_UDP6_SENDMSG: 4325 case BPF_CGROUP_UNIX_SENDMSG: 4326 case BPF_CGROUP_UDP4_RECVMSG: 4327 case BPF_CGROUP_UDP6_RECVMSG: 4328 case BPF_CGROUP_UNIX_RECVMSG: 4329 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 4330 case BPF_CGROUP_SOCK_OPS: 4331 return BPF_PROG_TYPE_SOCK_OPS; 4332 case BPF_CGROUP_DEVICE: 4333 return BPF_PROG_TYPE_CGROUP_DEVICE; 4334 case BPF_SK_MSG_VERDICT: 4335 return BPF_PROG_TYPE_SK_MSG; 4336 case BPF_SK_SKB_STREAM_PARSER: 4337 case BPF_SK_SKB_STREAM_VERDICT: 4338 case BPF_SK_SKB_VERDICT: 4339 return BPF_PROG_TYPE_SK_SKB; 4340 case BPF_LIRC_MODE2: 4341 return BPF_PROG_TYPE_LIRC_MODE2; 4342 case BPF_FLOW_DISSECTOR: 4343 return BPF_PROG_TYPE_FLOW_DISSECTOR; 4344 case BPF_CGROUP_SYSCTL: 4345 return BPF_PROG_TYPE_CGROUP_SYSCTL; 4346 case BPF_CGROUP_GETSOCKOPT: 4347 case BPF_CGROUP_SETSOCKOPT: 4348 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 4349 case BPF_TRACE_ITER: 4350 case BPF_TRACE_RAW_TP: 4351 case BPF_TRACE_FENTRY: 4352 case BPF_TRACE_FEXIT: 4353 case BPF_MODIFY_RETURN: 4354 return BPF_PROG_TYPE_TRACING; 4355 case BPF_LSM_MAC: 4356 return BPF_PROG_TYPE_LSM; 4357 case BPF_SK_LOOKUP: 4358 return BPF_PROG_TYPE_SK_LOOKUP; 4359 case BPF_XDP: 4360 return BPF_PROG_TYPE_XDP; 4361 case BPF_LSM_CGROUP: 4362 return BPF_PROG_TYPE_LSM; 4363 case BPF_TCX_INGRESS: 4364 case BPF_TCX_EGRESS: 4365 case BPF_NETKIT_PRIMARY: 4366 case BPF_NETKIT_PEER: 4367 return BPF_PROG_TYPE_SCHED_CLS; 4368 default: 4369 return BPF_PROG_TYPE_UNSPEC; 4370 } 4371 } 4372 4373 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 4374 enum bpf_attach_type attach_type) 4375 { 4376 enum bpf_prog_type ptype; 4377 4378 switch (prog->type) { 4379 case BPF_PROG_TYPE_CGROUP_SOCK: 4380 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4381 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4382 case BPF_PROG_TYPE_SK_LOOKUP: 4383 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 4384 case BPF_PROG_TYPE_CGROUP_SKB: 4385 if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) 4386 /* cg-skb progs can be loaded by unpriv user. 4387 * check permissions at attach time. 4388 */ 4389 return -EPERM; 4390 4391 ptype = attach_type_to_prog_type(attach_type); 4392 if (prog->type != ptype) 4393 return -EINVAL; 4394 4395 return prog->enforce_expected_attach_type && 4396 prog->expected_attach_type != attach_type ? 4397 -EINVAL : 0; 4398 case BPF_PROG_TYPE_EXT: 4399 return 0; 4400 case BPF_PROG_TYPE_NETFILTER: 4401 if (attach_type != BPF_NETFILTER) 4402 return -EINVAL; 4403 return 0; 4404 case BPF_PROG_TYPE_PERF_EVENT: 4405 case BPF_PROG_TYPE_TRACEPOINT: 4406 if (attach_type != BPF_PERF_EVENT) 4407 return -EINVAL; 4408 return 0; 4409 case BPF_PROG_TYPE_KPROBE: 4410 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 4411 attach_type != BPF_TRACE_KPROBE_MULTI) 4412 return -EINVAL; 4413 if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION && 4414 attach_type != BPF_TRACE_KPROBE_SESSION) 4415 return -EINVAL; 4416 if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 4417 attach_type != BPF_TRACE_UPROBE_MULTI) 4418 return -EINVAL; 4419 if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION && 4420 attach_type != BPF_TRACE_UPROBE_SESSION) 4421 return -EINVAL; 4422 if (attach_type != BPF_PERF_EVENT && 4423 attach_type != BPF_TRACE_KPROBE_MULTI && 4424 attach_type != BPF_TRACE_KPROBE_SESSION && 4425 attach_type != BPF_TRACE_UPROBE_MULTI && 4426 attach_type != BPF_TRACE_UPROBE_SESSION) 4427 return -EINVAL; 4428 return 0; 4429 case BPF_PROG_TYPE_SCHED_CLS: 4430 if (attach_type != BPF_TCX_INGRESS && 4431 attach_type != BPF_TCX_EGRESS && 4432 attach_type != BPF_NETKIT_PRIMARY && 4433 attach_type != BPF_NETKIT_PEER) 4434 return -EINVAL; 4435 return 0; 4436 default: 4437 ptype = attach_type_to_prog_type(attach_type); 4438 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 4439 return -EINVAL; 4440 return 0; 4441 } 4442 } 4443 4444 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype, 4445 bool check_atype) 4446 { 4447 switch (ptype) { 4448 case BPF_PROG_TYPE_CGROUP_DEVICE: 4449 case BPF_PROG_TYPE_CGROUP_SKB: 4450 case BPF_PROG_TYPE_CGROUP_SOCK: 4451 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4452 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4453 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4454 case BPF_PROG_TYPE_SOCK_OPS: 4455 return true; 4456 case BPF_PROG_TYPE_LSM: 4457 return check_atype ? atype == BPF_LSM_CGROUP : true; 4458 default: 4459 return false; 4460 } 4461 } 4462 4463 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision 4464 4465 #define BPF_F_ATTACH_MASK_BASE \ 4466 (BPF_F_ALLOW_OVERRIDE | \ 4467 BPF_F_ALLOW_MULTI | \ 4468 BPF_F_REPLACE | \ 4469 BPF_F_PREORDER) 4470 4471 #define BPF_F_ATTACH_MASK_MPROG \ 4472 (BPF_F_REPLACE | \ 4473 BPF_F_BEFORE | \ 4474 BPF_F_AFTER | \ 4475 BPF_F_ID | \ 4476 BPF_F_LINK) 4477 4478 static int bpf_prog_attach(const union bpf_attr *attr) 4479 { 4480 enum bpf_prog_type ptype; 4481 struct bpf_prog *prog; 4482 int ret; 4483 4484 if (CHECK_ATTR(BPF_PROG_ATTACH)) 4485 return -EINVAL; 4486 4487 ptype = attach_type_to_prog_type(attr->attach_type); 4488 if (ptype == BPF_PROG_TYPE_UNSPEC) 4489 return -EINVAL; 4490 if (bpf_mprog_supported(ptype)) { 4491 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4492 return -EINVAL; 4493 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4494 if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG)) 4495 return -EINVAL; 4496 } else { 4497 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) 4498 return -EINVAL; 4499 if (attr->relative_fd || 4500 attr->expected_revision) 4501 return -EINVAL; 4502 } 4503 4504 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4505 if (IS_ERR(prog)) 4506 return PTR_ERR(prog); 4507 4508 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 4509 bpf_prog_put(prog); 4510 return -EINVAL; 4511 } 4512 4513 if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) { 4514 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 4515 goto out; 4516 } 4517 4518 switch (ptype) { 4519 case BPF_PROG_TYPE_SK_SKB: 4520 case BPF_PROG_TYPE_SK_MSG: 4521 ret = sock_map_get_from_fd(attr, prog); 4522 break; 4523 case BPF_PROG_TYPE_LIRC_MODE2: 4524 ret = lirc_prog_attach(attr, prog); 4525 break; 4526 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4527 ret = netns_bpf_prog_attach(attr, prog); 4528 break; 4529 case BPF_PROG_TYPE_SCHED_CLS: 4530 if (attr->attach_type == BPF_TCX_INGRESS || 4531 attr->attach_type == BPF_TCX_EGRESS) 4532 ret = tcx_prog_attach(attr, prog); 4533 else 4534 ret = netkit_prog_attach(attr, prog); 4535 break; 4536 default: 4537 ret = -EINVAL; 4538 } 4539 out: 4540 if (ret) 4541 bpf_prog_put(prog); 4542 return ret; 4543 } 4544 4545 #define BPF_PROG_DETACH_LAST_FIELD expected_revision 4546 4547 static int bpf_prog_detach(const union bpf_attr *attr) 4548 { 4549 struct bpf_prog *prog = NULL; 4550 enum bpf_prog_type ptype; 4551 int ret; 4552 4553 if (CHECK_ATTR(BPF_PROG_DETACH)) 4554 return -EINVAL; 4555 4556 ptype = attach_type_to_prog_type(attr->attach_type); 4557 if (bpf_mprog_supported(ptype)) { 4558 if (ptype == BPF_PROG_TYPE_UNSPEC) 4559 return -EINVAL; 4560 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) 4561 return -EINVAL; 4562 if (attr->attach_bpf_fd) { 4563 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 4564 if (IS_ERR(prog)) 4565 return PTR_ERR(prog); 4566 } 4567 } else if (is_cgroup_prog_type(ptype, 0, false)) { 4568 if (attr->attach_flags || attr->relative_fd) 4569 return -EINVAL; 4570 } else if (attr->attach_flags || 4571 attr->relative_fd || 4572 attr->expected_revision) { 4573 return -EINVAL; 4574 } 4575 4576 switch (ptype) { 4577 case BPF_PROG_TYPE_SK_MSG: 4578 case BPF_PROG_TYPE_SK_SKB: 4579 ret = sock_map_prog_detach(attr, ptype); 4580 break; 4581 case BPF_PROG_TYPE_LIRC_MODE2: 4582 ret = lirc_prog_detach(attr); 4583 break; 4584 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4585 ret = netns_bpf_prog_detach(attr, ptype); 4586 break; 4587 case BPF_PROG_TYPE_CGROUP_DEVICE: 4588 case BPF_PROG_TYPE_CGROUP_SKB: 4589 case BPF_PROG_TYPE_CGROUP_SOCK: 4590 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4591 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4592 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4593 case BPF_PROG_TYPE_SOCK_OPS: 4594 case BPF_PROG_TYPE_LSM: 4595 ret = cgroup_bpf_prog_detach(attr, ptype); 4596 break; 4597 case BPF_PROG_TYPE_SCHED_CLS: 4598 if (attr->attach_type == BPF_TCX_INGRESS || 4599 attr->attach_type == BPF_TCX_EGRESS) 4600 ret = tcx_prog_detach(attr, prog); 4601 else 4602 ret = netkit_prog_detach(attr, prog); 4603 break; 4604 default: 4605 ret = -EINVAL; 4606 } 4607 4608 if (prog) 4609 bpf_prog_put(prog); 4610 return ret; 4611 } 4612 4613 #define BPF_PROG_QUERY_LAST_FIELD query.revision 4614 4615 static int bpf_prog_query(const union bpf_attr *attr, 4616 union bpf_attr __user *uattr) 4617 { 4618 if (!bpf_net_capable()) 4619 return -EPERM; 4620 if (CHECK_ATTR(BPF_PROG_QUERY)) 4621 return -EINVAL; 4622 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 4623 return -EINVAL; 4624 4625 switch (attr->query.attach_type) { 4626 case BPF_CGROUP_INET_INGRESS: 4627 case BPF_CGROUP_INET_EGRESS: 4628 case BPF_CGROUP_INET_SOCK_CREATE: 4629 case BPF_CGROUP_INET_SOCK_RELEASE: 4630 case BPF_CGROUP_INET4_BIND: 4631 case BPF_CGROUP_INET6_BIND: 4632 case BPF_CGROUP_INET4_POST_BIND: 4633 case BPF_CGROUP_INET6_POST_BIND: 4634 case BPF_CGROUP_INET4_CONNECT: 4635 case BPF_CGROUP_INET6_CONNECT: 4636 case BPF_CGROUP_UNIX_CONNECT: 4637 case BPF_CGROUP_INET4_GETPEERNAME: 4638 case BPF_CGROUP_INET6_GETPEERNAME: 4639 case BPF_CGROUP_UNIX_GETPEERNAME: 4640 case BPF_CGROUP_INET4_GETSOCKNAME: 4641 case BPF_CGROUP_INET6_GETSOCKNAME: 4642 case BPF_CGROUP_UNIX_GETSOCKNAME: 4643 case BPF_CGROUP_UDP4_SENDMSG: 4644 case BPF_CGROUP_UDP6_SENDMSG: 4645 case BPF_CGROUP_UNIX_SENDMSG: 4646 case BPF_CGROUP_UDP4_RECVMSG: 4647 case BPF_CGROUP_UDP6_RECVMSG: 4648 case BPF_CGROUP_UNIX_RECVMSG: 4649 case BPF_CGROUP_SOCK_OPS: 4650 case BPF_CGROUP_DEVICE: 4651 case BPF_CGROUP_SYSCTL: 4652 case BPF_CGROUP_GETSOCKOPT: 4653 case BPF_CGROUP_SETSOCKOPT: 4654 case BPF_LSM_CGROUP: 4655 return cgroup_bpf_prog_query(attr, uattr); 4656 case BPF_LIRC_MODE2: 4657 return lirc_prog_query(attr, uattr); 4658 case BPF_FLOW_DISSECTOR: 4659 case BPF_SK_LOOKUP: 4660 return netns_bpf_prog_query(attr, uattr); 4661 case BPF_SK_SKB_STREAM_PARSER: 4662 case BPF_SK_SKB_STREAM_VERDICT: 4663 case BPF_SK_MSG_VERDICT: 4664 case BPF_SK_SKB_VERDICT: 4665 return sock_map_bpf_prog_query(attr, uattr); 4666 case BPF_TCX_INGRESS: 4667 case BPF_TCX_EGRESS: 4668 return tcx_prog_query(attr, uattr); 4669 case BPF_NETKIT_PRIMARY: 4670 case BPF_NETKIT_PEER: 4671 return netkit_prog_query(attr, uattr); 4672 default: 4673 return -EINVAL; 4674 } 4675 } 4676 4677 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 4678 4679 static int bpf_prog_test_run(const union bpf_attr *attr, 4680 union bpf_attr __user *uattr) 4681 { 4682 struct bpf_prog *prog; 4683 int ret = -ENOTSUPP; 4684 4685 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 4686 return -EINVAL; 4687 4688 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 4689 (!attr->test.ctx_size_in && attr->test.ctx_in)) 4690 return -EINVAL; 4691 4692 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 4693 (!attr->test.ctx_size_out && attr->test.ctx_out)) 4694 return -EINVAL; 4695 4696 prog = bpf_prog_get(attr->test.prog_fd); 4697 if (IS_ERR(prog)) 4698 return PTR_ERR(prog); 4699 4700 if (prog->aux->ops->test_run) 4701 ret = prog->aux->ops->test_run(prog, attr, uattr); 4702 4703 bpf_prog_put(prog); 4704 return ret; 4705 } 4706 4707 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 4708 4709 static int bpf_obj_get_next_id(const union bpf_attr *attr, 4710 union bpf_attr __user *uattr, 4711 struct idr *idr, 4712 spinlock_t *lock) 4713 { 4714 u32 next_id = attr->start_id; 4715 int err = 0; 4716 4717 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 4718 return -EINVAL; 4719 4720 if (!capable(CAP_SYS_ADMIN)) 4721 return -EPERM; 4722 4723 next_id++; 4724 spin_lock_bh(lock); 4725 if (!idr_get_next(idr, &next_id)) 4726 err = -ENOENT; 4727 spin_unlock_bh(lock); 4728 4729 if (!err) 4730 err = put_user(next_id, &uattr->next_id); 4731 4732 return err; 4733 } 4734 4735 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 4736 { 4737 struct bpf_map *map; 4738 4739 spin_lock_bh(&map_idr_lock); 4740 again: 4741 map = idr_get_next(&map_idr, id); 4742 if (map) { 4743 map = __bpf_map_inc_not_zero(map, false); 4744 if (IS_ERR(map)) { 4745 (*id)++; 4746 goto again; 4747 } 4748 } 4749 spin_unlock_bh(&map_idr_lock); 4750 4751 return map; 4752 } 4753 4754 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 4755 { 4756 struct bpf_prog *prog; 4757 4758 spin_lock_bh(&prog_idr_lock); 4759 again: 4760 prog = idr_get_next(&prog_idr, id); 4761 if (prog) { 4762 prog = bpf_prog_inc_not_zero(prog); 4763 if (IS_ERR(prog)) { 4764 (*id)++; 4765 goto again; 4766 } 4767 } 4768 spin_unlock_bh(&prog_idr_lock); 4769 4770 return prog; 4771 } 4772 4773 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 4774 4775 struct bpf_prog *bpf_prog_by_id(u32 id) 4776 { 4777 struct bpf_prog *prog; 4778 4779 if (!id) 4780 return ERR_PTR(-ENOENT); 4781 4782 spin_lock_bh(&prog_idr_lock); 4783 prog = idr_find(&prog_idr, id); 4784 if (prog) 4785 prog = bpf_prog_inc_not_zero(prog); 4786 else 4787 prog = ERR_PTR(-ENOENT); 4788 spin_unlock_bh(&prog_idr_lock); 4789 return prog; 4790 } 4791 4792 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 4793 { 4794 struct bpf_prog *prog; 4795 u32 id = attr->prog_id; 4796 int fd; 4797 4798 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 4799 return -EINVAL; 4800 4801 if (!capable(CAP_SYS_ADMIN)) 4802 return -EPERM; 4803 4804 prog = bpf_prog_by_id(id); 4805 if (IS_ERR(prog)) 4806 return PTR_ERR(prog); 4807 4808 fd = bpf_prog_new_fd(prog); 4809 if (fd < 0) 4810 bpf_prog_put(prog); 4811 4812 return fd; 4813 } 4814 4815 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 4816 4817 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 4818 { 4819 struct bpf_map *map; 4820 u32 id = attr->map_id; 4821 int f_flags; 4822 int fd; 4823 4824 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 4825 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 4826 return -EINVAL; 4827 4828 if (!capable(CAP_SYS_ADMIN)) 4829 return -EPERM; 4830 4831 f_flags = bpf_get_file_flag(attr->open_flags); 4832 if (f_flags < 0) 4833 return f_flags; 4834 4835 spin_lock_bh(&map_idr_lock); 4836 map = idr_find(&map_idr, id); 4837 if (map) 4838 map = __bpf_map_inc_not_zero(map, true); 4839 else 4840 map = ERR_PTR(-ENOENT); 4841 spin_unlock_bh(&map_idr_lock); 4842 4843 if (IS_ERR(map)) 4844 return PTR_ERR(map); 4845 4846 fd = bpf_map_new_fd(map, f_flags); 4847 if (fd < 0) 4848 bpf_map_put_with_uref(map); 4849 4850 return fd; 4851 } 4852 4853 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 4854 unsigned long addr, u32 *off, 4855 u32 *type) 4856 { 4857 const struct bpf_map *map; 4858 int i; 4859 4860 mutex_lock(&prog->aux->used_maps_mutex); 4861 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 4862 map = prog->aux->used_maps[i]; 4863 if (map == (void *)addr) { 4864 *type = BPF_PSEUDO_MAP_FD; 4865 goto out; 4866 } 4867 if (!map->ops->map_direct_value_meta) 4868 continue; 4869 if (!map->ops->map_direct_value_meta(map, addr, off)) { 4870 *type = BPF_PSEUDO_MAP_VALUE; 4871 goto out; 4872 } 4873 } 4874 map = NULL; 4875 4876 out: 4877 mutex_unlock(&prog->aux->used_maps_mutex); 4878 return map; 4879 } 4880 4881 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 4882 const struct cred *f_cred) 4883 { 4884 const struct bpf_map *map; 4885 struct bpf_insn *insns; 4886 u32 off, type; 4887 u64 imm; 4888 u8 code; 4889 int i; 4890 4891 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 4892 GFP_USER); 4893 if (!insns) 4894 return insns; 4895 4896 for (i = 0; i < prog->len; i++) { 4897 code = insns[i].code; 4898 4899 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 4900 insns[i].code = BPF_JMP | BPF_CALL; 4901 insns[i].imm = BPF_FUNC_tail_call; 4902 /* fall-through */ 4903 } 4904 if (code == (BPF_JMP | BPF_CALL) || 4905 code == (BPF_JMP | BPF_CALL_ARGS)) { 4906 if (code == (BPF_JMP | BPF_CALL_ARGS)) 4907 insns[i].code = BPF_JMP | BPF_CALL; 4908 if (!bpf_dump_raw_ok(f_cred)) 4909 insns[i].imm = 0; 4910 continue; 4911 } 4912 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 4913 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 4914 continue; 4915 } 4916 4917 if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX || 4918 BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) { 4919 insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM; 4920 continue; 4921 } 4922 4923 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 4924 continue; 4925 4926 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 4927 map = bpf_map_from_imm(prog, imm, &off, &type); 4928 if (map) { 4929 insns[i].src_reg = type; 4930 insns[i].imm = map->id; 4931 insns[i + 1].imm = off; 4932 continue; 4933 } 4934 } 4935 4936 return insns; 4937 } 4938 4939 static int set_info_rec_size(struct bpf_prog_info *info) 4940 { 4941 /* 4942 * Ensure info.*_rec_size is the same as kernel expected size 4943 * 4944 * or 4945 * 4946 * Only allow zero *_rec_size if both _rec_size and _cnt are 4947 * zero. In this case, the kernel will set the expected 4948 * _rec_size back to the info. 4949 */ 4950 4951 if ((info->nr_func_info || info->func_info_rec_size) && 4952 info->func_info_rec_size != sizeof(struct bpf_func_info)) 4953 return -EINVAL; 4954 4955 if ((info->nr_line_info || info->line_info_rec_size) && 4956 info->line_info_rec_size != sizeof(struct bpf_line_info)) 4957 return -EINVAL; 4958 4959 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 4960 info->jited_line_info_rec_size != sizeof(__u64)) 4961 return -EINVAL; 4962 4963 info->func_info_rec_size = sizeof(struct bpf_func_info); 4964 info->line_info_rec_size = sizeof(struct bpf_line_info); 4965 info->jited_line_info_rec_size = sizeof(__u64); 4966 4967 return 0; 4968 } 4969 4970 static int bpf_prog_get_info_by_fd(struct file *file, 4971 struct bpf_prog *prog, 4972 const union bpf_attr *attr, 4973 union bpf_attr __user *uattr) 4974 { 4975 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4976 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 4977 struct bpf_prog_info info; 4978 u32 info_len = attr->info.info_len; 4979 struct bpf_prog_kstats stats; 4980 char __user *uinsns; 4981 u32 ulen; 4982 int err; 4983 4984 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4985 if (err) 4986 return err; 4987 info_len = min_t(u32, sizeof(info), info_len); 4988 4989 memset(&info, 0, sizeof(info)); 4990 if (copy_from_user(&info, uinfo, info_len)) 4991 return -EFAULT; 4992 4993 info.type = prog->type; 4994 info.id = prog->aux->id; 4995 info.load_time = prog->aux->load_time; 4996 info.created_by_uid = from_kuid_munged(current_user_ns(), 4997 prog->aux->user->uid); 4998 info.gpl_compatible = prog->gpl_compatible; 4999 5000 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 5001 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 5002 5003 mutex_lock(&prog->aux->used_maps_mutex); 5004 ulen = info.nr_map_ids; 5005 info.nr_map_ids = prog->aux->used_map_cnt; 5006 ulen = min_t(u32, info.nr_map_ids, ulen); 5007 if (ulen) { 5008 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 5009 u32 i; 5010 5011 for (i = 0; i < ulen; i++) 5012 if (put_user(prog->aux->used_maps[i]->id, 5013 &user_map_ids[i])) { 5014 mutex_unlock(&prog->aux->used_maps_mutex); 5015 return -EFAULT; 5016 } 5017 } 5018 mutex_unlock(&prog->aux->used_maps_mutex); 5019 5020 err = set_info_rec_size(&info); 5021 if (err) 5022 return err; 5023 5024 bpf_prog_get_stats(prog, &stats); 5025 info.run_time_ns = stats.nsecs; 5026 info.run_cnt = stats.cnt; 5027 info.recursion_misses = stats.misses; 5028 5029 info.verified_insns = prog->aux->verified_insns; 5030 if (prog->aux->btf) 5031 info.btf_id = btf_obj_id(prog->aux->btf); 5032 5033 if (!bpf_capable()) { 5034 info.jited_prog_len = 0; 5035 info.xlated_prog_len = 0; 5036 info.nr_jited_ksyms = 0; 5037 info.nr_jited_func_lens = 0; 5038 info.nr_func_info = 0; 5039 info.nr_line_info = 0; 5040 info.nr_jited_line_info = 0; 5041 goto done; 5042 } 5043 5044 ulen = info.xlated_prog_len; 5045 info.xlated_prog_len = bpf_prog_insn_size(prog); 5046 if (info.xlated_prog_len && ulen) { 5047 struct bpf_insn *insns_sanitized; 5048 bool fault; 5049 5050 if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) { 5051 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 5052 if (!insns_sanitized) 5053 return -ENOMEM; 5054 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 5055 ulen = min_t(u32, info.xlated_prog_len, ulen); 5056 fault = copy_to_user(uinsns, insns_sanitized, ulen); 5057 kfree(insns_sanitized); 5058 if (fault) 5059 return -EFAULT; 5060 } else { 5061 info.xlated_prog_insns = 0; 5062 } 5063 } 5064 5065 if (bpf_prog_is_offloaded(prog->aux)) { 5066 err = bpf_prog_offload_info_fill(&info, prog); 5067 if (err) 5068 return err; 5069 goto done; 5070 } 5071 5072 /* NOTE: the following code is supposed to be skipped for offload. 5073 * bpf_prog_offload_info_fill() is the place to fill similar fields 5074 * for offload. 5075 */ 5076 ulen = info.jited_prog_len; 5077 if (prog->aux->func_cnt) { 5078 u32 i; 5079 5080 info.jited_prog_len = 0; 5081 for (i = 0; i < prog->aux->func_cnt; i++) 5082 info.jited_prog_len += prog->aux->func[i]->jited_len; 5083 } else { 5084 info.jited_prog_len = prog->jited_len; 5085 } 5086 5087 if (info.jited_prog_len && ulen) { 5088 if (bpf_dump_raw_ok(file->f_cred)) { 5089 uinsns = u64_to_user_ptr(info.jited_prog_insns); 5090 ulen = min_t(u32, info.jited_prog_len, ulen); 5091 5092 /* for multi-function programs, copy the JITed 5093 * instructions for all the functions 5094 */ 5095 if (prog->aux->func_cnt) { 5096 u32 len, free, i; 5097 u8 *img; 5098 5099 free = ulen; 5100 for (i = 0; i < prog->aux->func_cnt; i++) { 5101 len = prog->aux->func[i]->jited_len; 5102 len = min_t(u32, len, free); 5103 img = (u8 *) prog->aux->func[i]->bpf_func; 5104 if (copy_to_user(uinsns, img, len)) 5105 return -EFAULT; 5106 uinsns += len; 5107 free -= len; 5108 if (!free) 5109 break; 5110 } 5111 } else { 5112 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 5113 return -EFAULT; 5114 } 5115 } else { 5116 info.jited_prog_insns = 0; 5117 } 5118 } 5119 5120 ulen = info.nr_jited_ksyms; 5121 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 5122 if (ulen) { 5123 if (bpf_dump_raw_ok(file->f_cred)) { 5124 unsigned long ksym_addr; 5125 u64 __user *user_ksyms; 5126 u32 i; 5127 5128 /* copy the address of the kernel symbol 5129 * corresponding to each function 5130 */ 5131 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 5132 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 5133 if (prog->aux->func_cnt) { 5134 for (i = 0; i < ulen; i++) { 5135 ksym_addr = (unsigned long) 5136 prog->aux->func[i]->bpf_func; 5137 if (put_user((u64) ksym_addr, 5138 &user_ksyms[i])) 5139 return -EFAULT; 5140 } 5141 } else { 5142 ksym_addr = (unsigned long) prog->bpf_func; 5143 if (put_user((u64) ksym_addr, &user_ksyms[0])) 5144 return -EFAULT; 5145 } 5146 } else { 5147 info.jited_ksyms = 0; 5148 } 5149 } 5150 5151 ulen = info.nr_jited_func_lens; 5152 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 5153 if (ulen) { 5154 if (bpf_dump_raw_ok(file->f_cred)) { 5155 u32 __user *user_lens; 5156 u32 func_len, i; 5157 5158 /* copy the JITed image lengths for each function */ 5159 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 5160 user_lens = u64_to_user_ptr(info.jited_func_lens); 5161 if (prog->aux->func_cnt) { 5162 for (i = 0; i < ulen; i++) { 5163 func_len = 5164 prog->aux->func[i]->jited_len; 5165 if (put_user(func_len, &user_lens[i])) 5166 return -EFAULT; 5167 } 5168 } else { 5169 func_len = prog->jited_len; 5170 if (put_user(func_len, &user_lens[0])) 5171 return -EFAULT; 5172 } 5173 } else { 5174 info.jited_func_lens = 0; 5175 } 5176 } 5177 5178 info.attach_btf_id = prog->aux->attach_btf_id; 5179 if (attach_btf) 5180 info.attach_btf_obj_id = btf_obj_id(attach_btf); 5181 5182 ulen = info.nr_func_info; 5183 info.nr_func_info = prog->aux->func_info_cnt; 5184 if (info.nr_func_info && ulen) { 5185 char __user *user_finfo; 5186 5187 user_finfo = u64_to_user_ptr(info.func_info); 5188 ulen = min_t(u32, info.nr_func_info, ulen); 5189 if (copy_to_user(user_finfo, prog->aux->func_info, 5190 info.func_info_rec_size * ulen)) 5191 return -EFAULT; 5192 } 5193 5194 ulen = info.nr_line_info; 5195 info.nr_line_info = prog->aux->nr_linfo; 5196 if (info.nr_line_info && ulen) { 5197 __u8 __user *user_linfo; 5198 5199 user_linfo = u64_to_user_ptr(info.line_info); 5200 ulen = min_t(u32, info.nr_line_info, ulen); 5201 if (copy_to_user(user_linfo, prog->aux->linfo, 5202 info.line_info_rec_size * ulen)) 5203 return -EFAULT; 5204 } 5205 5206 ulen = info.nr_jited_line_info; 5207 if (prog->aux->jited_linfo) 5208 info.nr_jited_line_info = prog->aux->nr_linfo; 5209 else 5210 info.nr_jited_line_info = 0; 5211 if (info.nr_jited_line_info && ulen) { 5212 if (bpf_dump_raw_ok(file->f_cred)) { 5213 unsigned long line_addr; 5214 __u64 __user *user_linfo; 5215 u32 i; 5216 5217 user_linfo = u64_to_user_ptr(info.jited_line_info); 5218 ulen = min_t(u32, info.nr_jited_line_info, ulen); 5219 for (i = 0; i < ulen; i++) { 5220 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 5221 if (put_user((__u64)line_addr, &user_linfo[i])) 5222 return -EFAULT; 5223 } 5224 } else { 5225 info.jited_line_info = 0; 5226 } 5227 } 5228 5229 ulen = info.nr_prog_tags; 5230 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 5231 if (ulen) { 5232 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 5233 u32 i; 5234 5235 user_prog_tags = u64_to_user_ptr(info.prog_tags); 5236 ulen = min_t(u32, info.nr_prog_tags, ulen); 5237 if (prog->aux->func_cnt) { 5238 for (i = 0; i < ulen; i++) { 5239 if (copy_to_user(user_prog_tags[i], 5240 prog->aux->func[i]->tag, 5241 BPF_TAG_SIZE)) 5242 return -EFAULT; 5243 } 5244 } else { 5245 if (copy_to_user(user_prog_tags[0], 5246 prog->tag, BPF_TAG_SIZE)) 5247 return -EFAULT; 5248 } 5249 } 5250 5251 done: 5252 if (copy_to_user(uinfo, &info, info_len) || 5253 put_user(info_len, &uattr->info.info_len)) 5254 return -EFAULT; 5255 5256 return 0; 5257 } 5258 5259 static int bpf_map_get_info_by_fd(struct file *file, 5260 struct bpf_map *map, 5261 const union bpf_attr *attr, 5262 union bpf_attr __user *uattr) 5263 { 5264 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5265 struct bpf_map_info info; 5266 u32 info_len = attr->info.info_len; 5267 int err; 5268 5269 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5270 if (err) 5271 return err; 5272 info_len = min_t(u32, sizeof(info), info_len); 5273 5274 memset(&info, 0, sizeof(info)); 5275 if (copy_from_user(&info, uinfo, info_len)) 5276 return -EFAULT; 5277 5278 info.type = map->map_type; 5279 info.id = map->id; 5280 info.key_size = map->key_size; 5281 info.value_size = map->value_size; 5282 info.max_entries = map->max_entries; 5283 info.map_flags = map->map_flags; 5284 info.map_extra = map->map_extra; 5285 memcpy(info.name, map->name, sizeof(map->name)); 5286 5287 if (map->btf) { 5288 info.btf_id = btf_obj_id(map->btf); 5289 info.btf_key_type_id = map->btf_key_type_id; 5290 info.btf_value_type_id = map->btf_value_type_id; 5291 } 5292 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5293 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) 5294 bpf_map_struct_ops_info_fill(&info, map); 5295 5296 if (bpf_map_is_offloaded(map)) { 5297 err = bpf_map_offload_info_fill(&info, map); 5298 if (err) 5299 return err; 5300 } 5301 5302 if (info.hash) { 5303 char __user *uhash = u64_to_user_ptr(info.hash); 5304 5305 if (!map->ops->map_get_hash) 5306 return -EINVAL; 5307 5308 if (info.hash_size != SHA256_DIGEST_SIZE) 5309 return -EINVAL; 5310 5311 err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); 5312 if (err != 0) 5313 return err; 5314 5315 if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) 5316 return -EFAULT; 5317 } else if (info.hash_size) { 5318 return -EINVAL; 5319 } 5320 5321 if (copy_to_user(uinfo, &info, info_len) || 5322 put_user(info_len, &uattr->info.info_len)) 5323 return -EFAULT; 5324 5325 return 0; 5326 } 5327 5328 static int bpf_btf_get_info_by_fd(struct file *file, 5329 struct btf *btf, 5330 const union bpf_attr *attr, 5331 union bpf_attr __user *uattr) 5332 { 5333 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5334 u32 info_len = attr->info.info_len; 5335 int err; 5336 5337 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5338 if (err) 5339 return err; 5340 5341 return btf_get_info_by_fd(btf, attr, uattr); 5342 } 5343 5344 static int bpf_link_get_info_by_fd(struct file *file, 5345 struct bpf_link *link, 5346 const union bpf_attr *attr, 5347 union bpf_attr __user *uattr) 5348 { 5349 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5350 struct bpf_link_info info; 5351 u32 info_len = attr->info.info_len; 5352 int err; 5353 5354 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 5355 if (err) 5356 return err; 5357 info_len = min_t(u32, sizeof(info), info_len); 5358 5359 memset(&info, 0, sizeof(info)); 5360 if (copy_from_user(&info, uinfo, info_len)) 5361 return -EFAULT; 5362 5363 info.type = link->type; 5364 info.id = link->id; 5365 if (link->prog) 5366 info.prog_id = link->prog->aux->id; 5367 5368 if (link->ops->fill_link_info) { 5369 err = link->ops->fill_link_info(link, &info); 5370 if (err) 5371 return err; 5372 } 5373 5374 if (copy_to_user(uinfo, &info, info_len) || 5375 put_user(info_len, &uattr->info.info_len)) 5376 return -EFAULT; 5377 5378 return 0; 5379 } 5380 5381 5382 static int token_get_info_by_fd(struct file *file, 5383 struct bpf_token *token, 5384 const union bpf_attr *attr, 5385 union bpf_attr __user *uattr) 5386 { 5387 struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); 5388 u32 info_len = attr->info.info_len; 5389 int err; 5390 5391 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 5392 if (err) 5393 return err; 5394 return bpf_token_get_info_by_fd(token, attr, uattr); 5395 } 5396 5397 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 5398 5399 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 5400 union bpf_attr __user *uattr) 5401 { 5402 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 5403 return -EINVAL; 5404 5405 CLASS(fd, f)(attr->info.bpf_fd); 5406 if (fd_empty(f)) 5407 return -EBADFD; 5408 5409 if (fd_file(f)->f_op == &bpf_prog_fops) 5410 return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5411 uattr); 5412 else if (fd_file(f)->f_op == &bpf_map_fops) 5413 return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, 5414 uattr); 5415 else if (fd_file(f)->f_op == &btf_fops) 5416 return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr); 5417 else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll) 5418 return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5419 attr, uattr); 5420 else if (fd_file(f)->f_op == &bpf_token_fops) 5421 return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data, 5422 attr, uattr); 5423 return -EINVAL; 5424 } 5425 5426 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd 5427 5428 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 5429 { 5430 struct bpf_token *token = NULL; 5431 5432 if (CHECK_ATTR(BPF_BTF_LOAD)) 5433 return -EINVAL; 5434 5435 if (attr->btf_flags & ~BPF_F_TOKEN_FD) 5436 return -EINVAL; 5437 5438 if (attr->btf_flags & BPF_F_TOKEN_FD) { 5439 token = bpf_token_get_from_fd(attr->btf_token_fd); 5440 if (IS_ERR(token)) 5441 return PTR_ERR(token); 5442 if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { 5443 bpf_token_put(token); 5444 token = NULL; 5445 } 5446 } 5447 5448 if (!bpf_token_capable(token, CAP_BPF)) { 5449 bpf_token_put(token); 5450 return -EPERM; 5451 } 5452 5453 bpf_token_put(token); 5454 5455 return btf_new_fd(attr, uattr, uattr_size); 5456 } 5457 5458 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5459 5460 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5461 { 5462 struct bpf_token *token = NULL; 5463 5464 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5465 return -EINVAL; 5466 5467 if (attr->open_flags & ~BPF_F_TOKEN_FD) 5468 return -EINVAL; 5469 5470 if (attr->open_flags & BPF_F_TOKEN_FD) { 5471 token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5472 if (IS_ERR(token)) 5473 return PTR_ERR(token); 5474 if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5475 bpf_token_put(token); 5476 token = NULL; 5477 } 5478 } 5479 5480 if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5481 bpf_token_put(token); 5482 return -EPERM; 5483 } 5484 5485 bpf_token_put(token); 5486 5487 return btf_get_fd_by_id(attr->btf_id); 5488 } 5489 5490 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 5491 union bpf_attr __user *uattr, 5492 u32 prog_id, u32 fd_type, 5493 const char *buf, u64 probe_offset, 5494 u64 probe_addr) 5495 { 5496 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 5497 u32 len = buf ? strlen(buf) : 0, input_len; 5498 int err = 0; 5499 5500 if (put_user(len, &uattr->task_fd_query.buf_len)) 5501 return -EFAULT; 5502 input_len = attr->task_fd_query.buf_len; 5503 if (input_len && ubuf) { 5504 if (!len) { 5505 /* nothing to copy, just make ubuf NULL terminated */ 5506 char zero = '\0'; 5507 5508 if (put_user(zero, ubuf)) 5509 return -EFAULT; 5510 } else { 5511 err = bpf_copy_to_user(ubuf, buf, input_len, len); 5512 if (err == -EFAULT) 5513 return err; 5514 } 5515 } 5516 5517 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 5518 put_user(fd_type, &uattr->task_fd_query.fd_type) || 5519 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 5520 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 5521 return -EFAULT; 5522 5523 return err; 5524 } 5525 5526 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 5527 5528 static int bpf_task_fd_query(const union bpf_attr *attr, 5529 union bpf_attr __user *uattr) 5530 { 5531 pid_t pid = attr->task_fd_query.pid; 5532 u32 fd = attr->task_fd_query.fd; 5533 const struct perf_event *event; 5534 struct task_struct *task; 5535 struct file *file; 5536 int err; 5537 5538 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 5539 return -EINVAL; 5540 5541 if (!capable(CAP_SYS_ADMIN)) 5542 return -EPERM; 5543 5544 if (attr->task_fd_query.flags != 0) 5545 return -EINVAL; 5546 5547 rcu_read_lock(); 5548 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 5549 rcu_read_unlock(); 5550 if (!task) 5551 return -ENOENT; 5552 5553 err = 0; 5554 file = fget_task(task, fd); 5555 put_task_struct(task); 5556 if (!file) 5557 return -EBADF; 5558 5559 if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { 5560 struct bpf_link *link = file->private_data; 5561 5562 if (link->ops == &bpf_raw_tp_link_lops) { 5563 struct bpf_raw_tp_link *raw_tp = 5564 container_of(link, struct bpf_raw_tp_link, link); 5565 struct bpf_raw_event_map *btp = raw_tp->btp; 5566 5567 err = bpf_task_fd_query_copy(attr, uattr, 5568 raw_tp->link.prog->aux->id, 5569 BPF_FD_TYPE_RAW_TRACEPOINT, 5570 btp->tp->name, 0, 0); 5571 goto put_file; 5572 } 5573 goto out_not_supp; 5574 } 5575 5576 event = perf_get_event(file); 5577 if (!IS_ERR(event)) { 5578 u64 probe_offset, probe_addr; 5579 u32 prog_id, fd_type; 5580 const char *buf; 5581 5582 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 5583 &buf, &probe_offset, 5584 &probe_addr, NULL); 5585 if (!err) 5586 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 5587 fd_type, buf, 5588 probe_offset, 5589 probe_addr); 5590 goto put_file; 5591 } 5592 5593 out_not_supp: 5594 err = -ENOTSUPP; 5595 put_file: 5596 fput(file); 5597 return err; 5598 } 5599 5600 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 5601 5602 #define BPF_DO_BATCH(fn, ...) \ 5603 do { \ 5604 if (!fn) { \ 5605 err = -ENOTSUPP; \ 5606 goto err_put; \ 5607 } \ 5608 err = fn(__VA_ARGS__); \ 5609 } while (0) 5610 5611 static int bpf_map_do_batch(const union bpf_attr *attr, 5612 union bpf_attr __user *uattr, 5613 int cmd) 5614 { 5615 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 5616 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 5617 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 5618 struct bpf_map *map; 5619 int err; 5620 5621 if (CHECK_ATTR(BPF_MAP_BATCH)) 5622 return -EINVAL; 5623 5624 CLASS(fd, f)(attr->batch.map_fd); 5625 5626 map = __bpf_map_get(f); 5627 if (IS_ERR(map)) 5628 return PTR_ERR(map); 5629 if (has_write) 5630 bpf_map_write_active_inc(map); 5631 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 5632 err = -EPERM; 5633 goto err_put; 5634 } 5635 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 5636 err = -EPERM; 5637 goto err_put; 5638 } 5639 5640 if (cmd == BPF_MAP_LOOKUP_BATCH) 5641 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 5642 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 5643 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 5644 else if (cmd == BPF_MAP_UPDATE_BATCH) 5645 BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr); 5646 else 5647 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 5648 err_put: 5649 if (has_write) { 5650 maybe_wait_bpf_programs(map); 5651 bpf_map_write_active_dec(map); 5652 } 5653 return err; 5654 } 5655 5656 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 5657 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 5658 { 5659 struct bpf_prog *prog; 5660 int ret; 5661 5662 if (CHECK_ATTR(BPF_LINK_CREATE)) 5663 return -EINVAL; 5664 5665 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 5666 return bpf_struct_ops_link_create(attr); 5667 5668 prog = bpf_prog_get(attr->link_create.prog_fd); 5669 if (IS_ERR(prog)) 5670 return PTR_ERR(prog); 5671 5672 ret = bpf_prog_attach_check_attach_type(prog, 5673 attr->link_create.attach_type); 5674 if (ret) 5675 goto out; 5676 5677 switch (prog->type) { 5678 case BPF_PROG_TYPE_CGROUP_SKB: 5679 case BPF_PROG_TYPE_CGROUP_SOCK: 5680 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 5681 case BPF_PROG_TYPE_SOCK_OPS: 5682 case BPF_PROG_TYPE_CGROUP_DEVICE: 5683 case BPF_PROG_TYPE_CGROUP_SYSCTL: 5684 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 5685 ret = cgroup_bpf_link_attach(attr, prog); 5686 break; 5687 case BPF_PROG_TYPE_EXT: 5688 ret = bpf_tracing_prog_attach(prog, 5689 attr->link_create.target_fd, 5690 attr->link_create.target_btf_id, 5691 attr->link_create.tracing.cookie, 5692 attr->link_create.attach_type); 5693 break; 5694 case BPF_PROG_TYPE_LSM: 5695 case BPF_PROG_TYPE_TRACING: 5696 if (attr->link_create.attach_type != prog->expected_attach_type) { 5697 ret = -EINVAL; 5698 goto out; 5699 } 5700 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 5701 ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie, 5702 attr->link_create.attach_type); 5703 else if (prog->expected_attach_type == BPF_TRACE_ITER) 5704 ret = bpf_iter_link_attach(attr, uattr, prog); 5705 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 5706 ret = cgroup_bpf_link_attach(attr, prog); 5707 else 5708 ret = bpf_tracing_prog_attach(prog, 5709 attr->link_create.target_fd, 5710 attr->link_create.target_btf_id, 5711 attr->link_create.tracing.cookie, 5712 attr->link_create.attach_type); 5713 break; 5714 case BPF_PROG_TYPE_FLOW_DISSECTOR: 5715 case BPF_PROG_TYPE_SK_LOOKUP: 5716 ret = netns_bpf_link_create(attr, prog); 5717 break; 5718 case BPF_PROG_TYPE_SK_MSG: 5719 case BPF_PROG_TYPE_SK_SKB: 5720 ret = sock_map_link_create(attr, prog); 5721 break; 5722 #ifdef CONFIG_NET 5723 case BPF_PROG_TYPE_XDP: 5724 ret = bpf_xdp_link_attach(attr, prog); 5725 break; 5726 case BPF_PROG_TYPE_SCHED_CLS: 5727 if (attr->link_create.attach_type == BPF_TCX_INGRESS || 5728 attr->link_create.attach_type == BPF_TCX_EGRESS) 5729 ret = tcx_link_attach(attr, prog); 5730 else 5731 ret = netkit_link_attach(attr, prog); 5732 break; 5733 case BPF_PROG_TYPE_NETFILTER: 5734 ret = bpf_nf_link_attach(attr, prog); 5735 break; 5736 #endif 5737 case BPF_PROG_TYPE_PERF_EVENT: 5738 case BPF_PROG_TYPE_TRACEPOINT: 5739 ret = bpf_perf_link_attach(attr, prog); 5740 break; 5741 case BPF_PROG_TYPE_KPROBE: 5742 if (attr->link_create.attach_type == BPF_PERF_EVENT) 5743 ret = bpf_perf_link_attach(attr, prog); 5744 else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI || 5745 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION) 5746 ret = bpf_kprobe_multi_link_attach(attr, prog); 5747 else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI || 5748 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION) 5749 ret = bpf_uprobe_multi_link_attach(attr, prog); 5750 break; 5751 default: 5752 ret = -EINVAL; 5753 } 5754 5755 out: 5756 if (ret < 0) 5757 bpf_prog_put(prog); 5758 return ret; 5759 } 5760 5761 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 5762 { 5763 struct bpf_map *new_map, *old_map = NULL; 5764 int ret; 5765 5766 new_map = bpf_map_get(attr->link_update.new_map_fd); 5767 if (IS_ERR(new_map)) 5768 return PTR_ERR(new_map); 5769 5770 if (attr->link_update.flags & BPF_F_REPLACE) { 5771 old_map = bpf_map_get(attr->link_update.old_map_fd); 5772 if (IS_ERR(old_map)) { 5773 ret = PTR_ERR(old_map); 5774 goto out_put; 5775 } 5776 } else if (attr->link_update.old_map_fd) { 5777 ret = -EINVAL; 5778 goto out_put; 5779 } 5780 5781 ret = link->ops->update_map(link, new_map, old_map); 5782 5783 if (old_map) 5784 bpf_map_put(old_map); 5785 out_put: 5786 bpf_map_put(new_map); 5787 return ret; 5788 } 5789 5790 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 5791 5792 static int link_update(union bpf_attr *attr) 5793 { 5794 struct bpf_prog *old_prog = NULL, *new_prog; 5795 struct bpf_link *link; 5796 u32 flags; 5797 int ret; 5798 5799 if (CHECK_ATTR(BPF_LINK_UPDATE)) 5800 return -EINVAL; 5801 5802 flags = attr->link_update.flags; 5803 if (flags & ~BPF_F_REPLACE) 5804 return -EINVAL; 5805 5806 link = bpf_link_get_from_fd(attr->link_update.link_fd); 5807 if (IS_ERR(link)) 5808 return PTR_ERR(link); 5809 5810 if (link->ops->update_map) { 5811 ret = link_update_map(link, attr); 5812 goto out_put_link; 5813 } 5814 5815 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 5816 if (IS_ERR(new_prog)) { 5817 ret = PTR_ERR(new_prog); 5818 goto out_put_link; 5819 } 5820 5821 if (flags & BPF_F_REPLACE) { 5822 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 5823 if (IS_ERR(old_prog)) { 5824 ret = PTR_ERR(old_prog); 5825 old_prog = NULL; 5826 goto out_put_progs; 5827 } 5828 } else if (attr->link_update.old_prog_fd) { 5829 ret = -EINVAL; 5830 goto out_put_progs; 5831 } 5832 5833 if (link->ops->update_prog) 5834 ret = link->ops->update_prog(link, new_prog, old_prog); 5835 else 5836 ret = -EINVAL; 5837 5838 out_put_progs: 5839 if (old_prog) 5840 bpf_prog_put(old_prog); 5841 if (ret) 5842 bpf_prog_put(new_prog); 5843 out_put_link: 5844 bpf_link_put_direct(link); 5845 return ret; 5846 } 5847 5848 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 5849 5850 static int link_detach(union bpf_attr *attr) 5851 { 5852 struct bpf_link *link; 5853 int ret; 5854 5855 if (CHECK_ATTR(BPF_LINK_DETACH)) 5856 return -EINVAL; 5857 5858 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 5859 if (IS_ERR(link)) 5860 return PTR_ERR(link); 5861 5862 if (link->ops->detach) 5863 ret = link->ops->detach(link); 5864 else 5865 ret = -EOPNOTSUPP; 5866 5867 bpf_link_put_direct(link); 5868 return ret; 5869 } 5870 5871 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 5872 { 5873 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 5874 } 5875 EXPORT_SYMBOL(bpf_link_inc_not_zero); 5876 5877 struct bpf_link *bpf_link_by_id(u32 id) 5878 { 5879 struct bpf_link *link; 5880 5881 if (!id) 5882 return ERR_PTR(-ENOENT); 5883 5884 spin_lock_bh(&link_idr_lock); 5885 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 5886 link = idr_find(&link_idr, id); 5887 if (link) { 5888 if (link->id) 5889 link = bpf_link_inc_not_zero(link); 5890 else 5891 link = ERR_PTR(-EAGAIN); 5892 } else { 5893 link = ERR_PTR(-ENOENT); 5894 } 5895 spin_unlock_bh(&link_idr_lock); 5896 return link; 5897 } 5898 5899 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 5900 { 5901 struct bpf_link *link; 5902 5903 spin_lock_bh(&link_idr_lock); 5904 again: 5905 link = idr_get_next(&link_idr, id); 5906 if (link) { 5907 link = bpf_link_inc_not_zero(link); 5908 if (IS_ERR(link)) { 5909 (*id)++; 5910 goto again; 5911 } 5912 } 5913 spin_unlock_bh(&link_idr_lock); 5914 5915 return link; 5916 } 5917 5918 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 5919 5920 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 5921 { 5922 struct bpf_link *link; 5923 u32 id = attr->link_id; 5924 int fd; 5925 5926 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 5927 return -EINVAL; 5928 5929 if (!capable(CAP_SYS_ADMIN)) 5930 return -EPERM; 5931 5932 link = bpf_link_by_id(id); 5933 if (IS_ERR(link)) 5934 return PTR_ERR(link); 5935 5936 fd = bpf_link_new_fd(link); 5937 if (fd < 0) 5938 bpf_link_put_direct(link); 5939 5940 return fd; 5941 } 5942 5943 DEFINE_MUTEX(bpf_stats_enabled_mutex); 5944 5945 static int bpf_stats_release(struct inode *inode, struct file *file) 5946 { 5947 mutex_lock(&bpf_stats_enabled_mutex); 5948 static_key_slow_dec(&bpf_stats_enabled_key.key); 5949 mutex_unlock(&bpf_stats_enabled_mutex); 5950 return 0; 5951 } 5952 5953 static const struct file_operations bpf_stats_fops = { 5954 .release = bpf_stats_release, 5955 }; 5956 5957 static int bpf_enable_runtime_stats(void) 5958 { 5959 int fd; 5960 5961 mutex_lock(&bpf_stats_enabled_mutex); 5962 5963 /* Set a very high limit to avoid overflow */ 5964 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 5965 mutex_unlock(&bpf_stats_enabled_mutex); 5966 return -EBUSY; 5967 } 5968 5969 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 5970 if (fd >= 0) 5971 static_key_slow_inc(&bpf_stats_enabled_key.key); 5972 5973 mutex_unlock(&bpf_stats_enabled_mutex); 5974 return fd; 5975 } 5976 5977 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 5978 5979 static int bpf_enable_stats(union bpf_attr *attr) 5980 { 5981 5982 if (CHECK_ATTR(BPF_ENABLE_STATS)) 5983 return -EINVAL; 5984 5985 if (!capable(CAP_SYS_ADMIN)) 5986 return -EPERM; 5987 5988 switch (attr->enable_stats.type) { 5989 case BPF_STATS_RUN_TIME: 5990 return bpf_enable_runtime_stats(); 5991 default: 5992 break; 5993 } 5994 return -EINVAL; 5995 } 5996 5997 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 5998 5999 static int bpf_iter_create(union bpf_attr *attr) 6000 { 6001 struct bpf_link *link; 6002 int err; 6003 6004 if (CHECK_ATTR(BPF_ITER_CREATE)) 6005 return -EINVAL; 6006 6007 if (attr->iter_create.flags) 6008 return -EINVAL; 6009 6010 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 6011 if (IS_ERR(link)) 6012 return PTR_ERR(link); 6013 6014 err = bpf_iter_new_fd(link); 6015 bpf_link_put_direct(link); 6016 6017 return err; 6018 } 6019 6020 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 6021 6022 static int bpf_prog_bind_map(union bpf_attr *attr) 6023 { 6024 struct bpf_prog *prog; 6025 struct bpf_map *map; 6026 struct bpf_map **used_maps_old, **used_maps_new; 6027 int i, ret = 0; 6028 6029 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 6030 return -EINVAL; 6031 6032 if (attr->prog_bind_map.flags) 6033 return -EINVAL; 6034 6035 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 6036 if (IS_ERR(prog)) 6037 return PTR_ERR(prog); 6038 6039 map = bpf_map_get(attr->prog_bind_map.map_fd); 6040 if (IS_ERR(map)) { 6041 ret = PTR_ERR(map); 6042 goto out_prog_put; 6043 } 6044 6045 mutex_lock(&prog->aux->used_maps_mutex); 6046 6047 used_maps_old = prog->aux->used_maps; 6048 6049 for (i = 0; i < prog->aux->used_map_cnt; i++) 6050 if (used_maps_old[i] == map) { 6051 bpf_map_put(map); 6052 goto out_unlock; 6053 } 6054 6055 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 6056 sizeof(used_maps_new[0]), 6057 GFP_KERNEL); 6058 if (!used_maps_new) { 6059 ret = -ENOMEM; 6060 goto out_unlock; 6061 } 6062 6063 /* The bpf program will not access the bpf map, but for the sake of 6064 * simplicity, increase sleepable_refcnt for sleepable program as well. 6065 */ 6066 if (prog->sleepable) 6067 atomic64_inc(&map->sleepable_refcnt); 6068 memcpy(used_maps_new, used_maps_old, 6069 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 6070 used_maps_new[prog->aux->used_map_cnt] = map; 6071 6072 prog->aux->used_map_cnt++; 6073 prog->aux->used_maps = used_maps_new; 6074 6075 kfree(used_maps_old); 6076 6077 out_unlock: 6078 mutex_unlock(&prog->aux->used_maps_mutex); 6079 6080 if (ret) 6081 bpf_map_put(map); 6082 out_prog_put: 6083 bpf_prog_put(prog); 6084 return ret; 6085 } 6086 6087 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd 6088 6089 static int token_create(union bpf_attr *attr) 6090 { 6091 if (CHECK_ATTR(BPF_TOKEN_CREATE)) 6092 return -EINVAL; 6093 6094 /* no flags are supported yet */ 6095 if (attr->token_create.flags) 6096 return -EINVAL; 6097 6098 return bpf_token_create(attr); 6099 } 6100 6101 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd 6102 6103 static int prog_stream_read(union bpf_attr *attr) 6104 { 6105 char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); 6106 u32 len = attr->prog_stream_read.stream_buf_len; 6107 struct bpf_prog *prog; 6108 int ret; 6109 6110 if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) 6111 return -EINVAL; 6112 6113 prog = bpf_prog_get(attr->prog_stream_read.prog_fd); 6114 if (IS_ERR(prog)) 6115 return PTR_ERR(prog); 6116 6117 ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); 6118 bpf_prog_put(prog); 6119 6120 return ret; 6121 } 6122 6123 #define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd 6124 6125 static int prog_assoc_struct_ops(union bpf_attr *attr) 6126 { 6127 struct bpf_prog *prog; 6128 struct bpf_map *map; 6129 int ret; 6130 6131 if (CHECK_ATTR(BPF_PROG_ASSOC_STRUCT_OPS)) 6132 return -EINVAL; 6133 6134 if (attr->prog_assoc_struct_ops.flags) 6135 return -EINVAL; 6136 6137 prog = bpf_prog_get(attr->prog_assoc_struct_ops.prog_fd); 6138 if (IS_ERR(prog)) 6139 return PTR_ERR(prog); 6140 6141 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { 6142 ret = -EINVAL; 6143 goto put_prog; 6144 } 6145 6146 map = bpf_map_get(attr->prog_assoc_struct_ops.map_fd); 6147 if (IS_ERR(map)) { 6148 ret = PTR_ERR(map); 6149 goto put_prog; 6150 } 6151 6152 if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS) { 6153 ret = -EINVAL; 6154 goto put_map; 6155 } 6156 6157 ret = bpf_prog_assoc_struct_ops(prog, map); 6158 6159 put_map: 6160 bpf_map_put(map); 6161 put_prog: 6162 bpf_prog_put(prog); 6163 return ret; 6164 } 6165 6166 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) 6167 { 6168 union bpf_attr attr; 6169 int err; 6170 6171 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 6172 if (err) 6173 return err; 6174 size = min_t(u32, size, sizeof(attr)); 6175 6176 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 6177 memset(&attr, 0, sizeof(attr)); 6178 if (copy_from_bpfptr(&attr, uattr, size) != 0) 6179 return -EFAULT; 6180 6181 err = security_bpf(cmd, &attr, size, uattr.is_kernel); 6182 if (err < 0) 6183 return err; 6184 6185 switch (cmd) { 6186 case BPF_MAP_CREATE: 6187 err = map_create(&attr, uattr); 6188 break; 6189 case BPF_MAP_LOOKUP_ELEM: 6190 err = map_lookup_elem(&attr); 6191 break; 6192 case BPF_MAP_UPDATE_ELEM: 6193 err = map_update_elem(&attr, uattr); 6194 break; 6195 case BPF_MAP_DELETE_ELEM: 6196 err = map_delete_elem(&attr, uattr); 6197 break; 6198 case BPF_MAP_GET_NEXT_KEY: 6199 err = map_get_next_key(&attr); 6200 break; 6201 case BPF_MAP_FREEZE: 6202 err = map_freeze(&attr); 6203 break; 6204 case BPF_PROG_LOAD: 6205 err = bpf_prog_load(&attr, uattr, size); 6206 break; 6207 case BPF_OBJ_PIN: 6208 err = bpf_obj_pin(&attr); 6209 break; 6210 case BPF_OBJ_GET: 6211 err = bpf_obj_get(&attr); 6212 break; 6213 case BPF_PROG_ATTACH: 6214 err = bpf_prog_attach(&attr); 6215 break; 6216 case BPF_PROG_DETACH: 6217 err = bpf_prog_detach(&attr); 6218 break; 6219 case BPF_PROG_QUERY: 6220 err = bpf_prog_query(&attr, uattr.user); 6221 break; 6222 case BPF_PROG_TEST_RUN: 6223 err = bpf_prog_test_run(&attr, uattr.user); 6224 break; 6225 case BPF_PROG_GET_NEXT_ID: 6226 err = bpf_obj_get_next_id(&attr, uattr.user, 6227 &prog_idr, &prog_idr_lock); 6228 break; 6229 case BPF_MAP_GET_NEXT_ID: 6230 err = bpf_obj_get_next_id(&attr, uattr.user, 6231 &map_idr, &map_idr_lock); 6232 break; 6233 case BPF_BTF_GET_NEXT_ID: 6234 err = bpf_obj_get_next_id(&attr, uattr.user, 6235 &btf_idr, &btf_idr_lock); 6236 break; 6237 case BPF_PROG_GET_FD_BY_ID: 6238 err = bpf_prog_get_fd_by_id(&attr); 6239 break; 6240 case BPF_MAP_GET_FD_BY_ID: 6241 err = bpf_map_get_fd_by_id(&attr); 6242 break; 6243 case BPF_OBJ_GET_INFO_BY_FD: 6244 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 6245 break; 6246 case BPF_RAW_TRACEPOINT_OPEN: 6247 err = bpf_raw_tracepoint_open(&attr); 6248 break; 6249 case BPF_BTF_LOAD: 6250 err = bpf_btf_load(&attr, uattr, size); 6251 break; 6252 case BPF_BTF_GET_FD_BY_ID: 6253 err = bpf_btf_get_fd_by_id(&attr); 6254 break; 6255 case BPF_TASK_FD_QUERY: 6256 err = bpf_task_fd_query(&attr, uattr.user); 6257 break; 6258 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 6259 err = map_lookup_and_delete_elem(&attr); 6260 break; 6261 case BPF_MAP_LOOKUP_BATCH: 6262 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 6263 break; 6264 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 6265 err = bpf_map_do_batch(&attr, uattr.user, 6266 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 6267 break; 6268 case BPF_MAP_UPDATE_BATCH: 6269 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 6270 break; 6271 case BPF_MAP_DELETE_BATCH: 6272 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 6273 break; 6274 case BPF_LINK_CREATE: 6275 err = link_create(&attr, uattr); 6276 break; 6277 case BPF_LINK_UPDATE: 6278 err = link_update(&attr); 6279 break; 6280 case BPF_LINK_GET_FD_BY_ID: 6281 err = bpf_link_get_fd_by_id(&attr); 6282 break; 6283 case BPF_LINK_GET_NEXT_ID: 6284 err = bpf_obj_get_next_id(&attr, uattr.user, 6285 &link_idr, &link_idr_lock); 6286 break; 6287 case BPF_ENABLE_STATS: 6288 err = bpf_enable_stats(&attr); 6289 break; 6290 case BPF_ITER_CREATE: 6291 err = bpf_iter_create(&attr); 6292 break; 6293 case BPF_LINK_DETACH: 6294 err = link_detach(&attr); 6295 break; 6296 case BPF_PROG_BIND_MAP: 6297 err = bpf_prog_bind_map(&attr); 6298 break; 6299 case BPF_TOKEN_CREATE: 6300 err = token_create(&attr); 6301 break; 6302 case BPF_PROG_STREAM_READ_BY_FD: 6303 err = prog_stream_read(&attr); 6304 break; 6305 case BPF_PROG_ASSOC_STRUCT_OPS: 6306 err = prog_assoc_struct_ops(&attr); 6307 break; 6308 default: 6309 err = -EINVAL; 6310 break; 6311 } 6312 6313 return err; 6314 } 6315 6316 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 6317 { 6318 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 6319 } 6320 6321 static bool syscall_prog_is_valid_access(int off, int size, 6322 enum bpf_access_type type, 6323 const struct bpf_prog *prog, 6324 struct bpf_insn_access_aux *info) 6325 { 6326 if (off < 0 || off >= U16_MAX) 6327 return false; 6328 if (off % size != 0) 6329 return false; 6330 return true; 6331 } 6332 6333 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 6334 { 6335 switch (cmd) { 6336 case BPF_MAP_CREATE: 6337 case BPF_MAP_DELETE_ELEM: 6338 case BPF_MAP_UPDATE_ELEM: 6339 case BPF_MAP_FREEZE: 6340 case BPF_MAP_GET_FD_BY_ID: 6341 case BPF_PROG_LOAD: 6342 case BPF_BTF_LOAD: 6343 case BPF_LINK_CREATE: 6344 case BPF_RAW_TRACEPOINT_OPEN: 6345 break; 6346 default: 6347 return -EINVAL; 6348 } 6349 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 6350 } 6351 6352 6353 /* To shut up -Wmissing-prototypes. 6354 * This function is used by the kernel light skeleton 6355 * to load bpf programs when modules are loaded or during kernel boot. 6356 * See tools/lib/bpf/skel_internal.h 6357 */ 6358 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 6359 6360 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 6361 { 6362 struct bpf_prog * __maybe_unused prog; 6363 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 6364 6365 switch (cmd) { 6366 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 6367 case BPF_PROG_TEST_RUN: 6368 if (attr->test.data_in || attr->test.data_out || 6369 attr->test.ctx_out || attr->test.duration || 6370 attr->test.repeat || attr->test.flags) 6371 return -EINVAL; 6372 6373 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 6374 if (IS_ERR(prog)) 6375 return PTR_ERR(prog); 6376 6377 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 6378 attr->test.ctx_size_in > U16_MAX) { 6379 bpf_prog_put(prog); 6380 return -EINVAL; 6381 } 6382 6383 run_ctx.bpf_cookie = 0; 6384 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 6385 /* recursion detected */ 6386 __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx); 6387 bpf_prog_put(prog); 6388 return -EBUSY; 6389 } 6390 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 6391 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 6392 &run_ctx); 6393 bpf_prog_put(prog); 6394 return 0; 6395 #endif 6396 default: 6397 return ____bpf_sys_bpf(cmd, attr, size); 6398 } 6399 } 6400 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); 6401 6402 static const struct bpf_func_proto bpf_sys_bpf_proto = { 6403 .func = bpf_sys_bpf, 6404 .gpl_only = false, 6405 .ret_type = RET_INTEGER, 6406 .arg1_type = ARG_ANYTHING, 6407 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6408 .arg3_type = ARG_CONST_SIZE, 6409 }; 6410 6411 const struct bpf_func_proto * __weak 6412 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6413 { 6414 return bpf_base_func_proto(func_id, prog); 6415 } 6416 6417 BPF_CALL_1(bpf_sys_close, u32, fd) 6418 { 6419 /* When bpf program calls this helper there should not be 6420 * an fdget() without matching completed fdput(). 6421 * This helper is allowed in the following callchain only: 6422 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 6423 */ 6424 return close_fd(fd); 6425 } 6426 6427 static const struct bpf_func_proto bpf_sys_close_proto = { 6428 .func = bpf_sys_close, 6429 .gpl_only = false, 6430 .ret_type = RET_INTEGER, 6431 .arg1_type = ARG_ANYTHING, 6432 }; 6433 6434 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 6435 { 6436 *res = 0; 6437 if (flags) 6438 return -EINVAL; 6439 6440 if (name_sz <= 1 || name[name_sz - 1]) 6441 return -EINVAL; 6442 6443 if (!bpf_dump_raw_ok(current_cred())) 6444 return -EPERM; 6445 6446 *res = kallsyms_lookup_name(name); 6447 return *res ? 0 : -ENOENT; 6448 } 6449 6450 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 6451 .func = bpf_kallsyms_lookup_name, 6452 .gpl_only = false, 6453 .ret_type = RET_INTEGER, 6454 .arg1_type = ARG_PTR_TO_MEM, 6455 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 6456 .arg3_type = ARG_ANYTHING, 6457 .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, 6458 .arg4_size = sizeof(u64), 6459 }; 6460 6461 static const struct bpf_func_proto * 6462 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 6463 { 6464 switch (func_id) { 6465 case BPF_FUNC_sys_bpf: 6466 return !bpf_token_capable(prog->aux->token, CAP_PERFMON) 6467 ? NULL : &bpf_sys_bpf_proto; 6468 case BPF_FUNC_btf_find_by_name_kind: 6469 return &bpf_btf_find_by_name_kind_proto; 6470 case BPF_FUNC_sys_close: 6471 return &bpf_sys_close_proto; 6472 case BPF_FUNC_kallsyms_lookup_name: 6473 return &bpf_kallsyms_lookup_name_proto; 6474 default: 6475 return tracing_prog_func_proto(func_id, prog); 6476 } 6477 } 6478 6479 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 6480 .get_func_proto = syscall_prog_func_proto, 6481 .is_valid_access = syscall_prog_is_valid_access, 6482 }; 6483 6484 const struct bpf_prog_ops bpf_syscall_prog_ops = { 6485 .test_run = bpf_prog_test_run_syscall, 6486 }; 6487 6488 #ifdef CONFIG_SYSCTL 6489 static int bpf_stats_handler(const struct ctl_table *table, int write, 6490 void *buffer, size_t *lenp, loff_t *ppos) 6491 { 6492 struct static_key *key = (struct static_key *)table->data; 6493 static int saved_val; 6494 int val, ret; 6495 struct ctl_table tmp = { 6496 .data = &val, 6497 .maxlen = sizeof(val), 6498 .mode = table->mode, 6499 .extra1 = SYSCTL_ZERO, 6500 .extra2 = SYSCTL_ONE, 6501 }; 6502 6503 if (write && !capable(CAP_SYS_ADMIN)) 6504 return -EPERM; 6505 6506 mutex_lock(&bpf_stats_enabled_mutex); 6507 val = saved_val; 6508 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6509 if (write && !ret && val != saved_val) { 6510 if (val) 6511 static_key_slow_inc(key); 6512 else 6513 static_key_slow_dec(key); 6514 saved_val = val; 6515 } 6516 mutex_unlock(&bpf_stats_enabled_mutex); 6517 return ret; 6518 } 6519 6520 void __weak unpriv_ebpf_notify(int new_state) 6521 { 6522 } 6523 6524 static int bpf_unpriv_handler(const struct ctl_table *table, int write, 6525 void *buffer, size_t *lenp, loff_t *ppos) 6526 { 6527 int ret, unpriv_enable = *(int *)table->data; 6528 bool locked_state = unpriv_enable == 1; 6529 struct ctl_table tmp = *table; 6530 6531 if (write && !capable(CAP_SYS_ADMIN)) 6532 return -EPERM; 6533 6534 tmp.data = &unpriv_enable; 6535 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 6536 if (write && !ret) { 6537 if (locked_state && unpriv_enable != 1) 6538 return -EPERM; 6539 *(int *)table->data = unpriv_enable; 6540 } 6541 6542 if (write) 6543 unpriv_ebpf_notify(unpriv_enable); 6544 6545 return ret; 6546 } 6547 6548 static const struct ctl_table bpf_syscall_table[] = { 6549 { 6550 .procname = "unprivileged_bpf_disabled", 6551 .data = &sysctl_unprivileged_bpf_disabled, 6552 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 6553 .mode = 0644, 6554 .proc_handler = bpf_unpriv_handler, 6555 .extra1 = SYSCTL_ZERO, 6556 .extra2 = SYSCTL_TWO, 6557 }, 6558 { 6559 .procname = "bpf_stats_enabled", 6560 .data = &bpf_stats_enabled_key.key, 6561 .mode = 0644, 6562 .proc_handler = bpf_stats_handler, 6563 }, 6564 }; 6565 6566 static int __init bpf_syscall_sysctl_init(void) 6567 { 6568 register_sysctl_init("kernel", bpf_syscall_table); 6569 return 0; 6570 } 6571 late_initcall(bpf_syscall_sysctl_init); 6572 #endif /* CONFIG_SYSCTL */ 6573