1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/bpf-cgroup.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/bpf_lirc.h> 8 #include <linux/bpf_verifier.h> 9 #include <linux/bsearch.h> 10 #include <linux/btf.h> 11 #include <linux/syscalls.h> 12 #include <linux/slab.h> 13 #include <linux/sched/signal.h> 14 #include <linux/vmalloc.h> 15 #include <linux/mmzone.h> 16 #include <linux/anon_inodes.h> 17 #include <linux/fdtable.h> 18 #include <linux/file.h> 19 #include <linux/fs.h> 20 #include <linux/license.h> 21 #include <linux/filter.h> 22 #include <linux/kernel.h> 23 #include <linux/idr.h> 24 #include <linux/cred.h> 25 #include <linux/timekeeping.h> 26 #include <linux/ctype.h> 27 #include <linux/nospec.h> 28 #include <linux/audit.h> 29 #include <uapi/linux/btf.h> 30 #include <linux/pgtable.h> 31 #include <linux/bpf_lsm.h> 32 #include <linux/poll.h> 33 #include <linux/sort.h> 34 #include <linux/bpf-netns.h> 35 #include <linux/rcupdate_trace.h> 36 #include <linux/memcontrol.h> 37 #include <linux/trace_events.h> 38 39 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 40 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 41 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 42 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 43 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 44 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 45 IS_FD_HASH(map)) 46 47 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 48 49 DEFINE_PER_CPU(int, bpf_prog_active); 50 static DEFINE_IDR(prog_idr); 51 static DEFINE_SPINLOCK(prog_idr_lock); 52 static DEFINE_IDR(map_idr); 53 static DEFINE_SPINLOCK(map_idr_lock); 54 static DEFINE_IDR(link_idr); 55 static DEFINE_SPINLOCK(link_idr_lock); 56 57 int sysctl_unprivileged_bpf_disabled __read_mostly = 58 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 59 60 static const struct bpf_map_ops * const bpf_map_types[] = { 61 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 62 #define BPF_MAP_TYPE(_id, _ops) \ 63 [_id] = &_ops, 64 #define BPF_LINK_TYPE(_id, _name) 65 #include <linux/bpf_types.h> 66 #undef BPF_PROG_TYPE 67 #undef BPF_MAP_TYPE 68 #undef BPF_LINK_TYPE 69 }; 70 71 /* 72 * If we're handed a bigger struct than we know of, ensure all the unknown bits 73 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 74 * we don't know about yet. 75 * 76 * There is a ToCToU between this function call and the following 77 * copy_from_user() call. However, this is not a concern since this function is 78 * meant to be a future-proofing of bits. 79 */ 80 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 81 size_t expected_size, 82 size_t actual_size) 83 { 84 int res; 85 86 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 87 return -E2BIG; 88 89 if (actual_size <= expected_size) 90 return 0; 91 92 if (uaddr.is_kernel) 93 res = memchr_inv(uaddr.kernel + expected_size, 0, 94 actual_size - expected_size) == NULL; 95 else 96 res = check_zeroed_user(uaddr.user + expected_size, 97 actual_size - expected_size); 98 if (res < 0) 99 return res; 100 return res ? 0 : -E2BIG; 101 } 102 103 const struct bpf_map_ops bpf_map_offload_ops = { 104 .map_meta_equal = bpf_map_meta_equal, 105 .map_alloc = bpf_map_offload_map_alloc, 106 .map_free = bpf_map_offload_map_free, 107 .map_check_btf = map_check_no_btf, 108 }; 109 110 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 111 { 112 const struct bpf_map_ops *ops; 113 u32 type = attr->map_type; 114 struct bpf_map *map; 115 int err; 116 117 if (type >= ARRAY_SIZE(bpf_map_types)) 118 return ERR_PTR(-EINVAL); 119 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); 120 ops = bpf_map_types[type]; 121 if (!ops) 122 return ERR_PTR(-EINVAL); 123 124 if (ops->map_alloc_check) { 125 err = ops->map_alloc_check(attr); 126 if (err) 127 return ERR_PTR(err); 128 } 129 if (attr->map_ifindex) 130 ops = &bpf_map_offload_ops; 131 map = ops->map_alloc(attr); 132 if (IS_ERR(map)) 133 return map; 134 map->ops = ops; 135 map->map_type = type; 136 return map; 137 } 138 139 static void bpf_map_write_active_inc(struct bpf_map *map) 140 { 141 atomic64_inc(&map->writecnt); 142 } 143 144 static void bpf_map_write_active_dec(struct bpf_map *map) 145 { 146 atomic64_dec(&map->writecnt); 147 } 148 149 bool bpf_map_write_active(const struct bpf_map *map) 150 { 151 return atomic64_read(&map->writecnt) != 0; 152 } 153 154 static u32 bpf_map_value_size(const struct bpf_map *map) 155 { 156 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 157 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 158 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 159 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 160 return round_up(map->value_size, 8) * num_possible_cpus(); 161 else if (IS_FD_MAP(map)) 162 return sizeof(u32); 163 else 164 return map->value_size; 165 } 166 167 static void maybe_wait_bpf_programs(struct bpf_map *map) 168 { 169 /* Wait for any running BPF programs to complete so that 170 * userspace, when we return to it, knows that all programs 171 * that could be running use the new map value. 172 */ 173 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 174 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 175 synchronize_rcu(); 176 } 177 178 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 179 void *key, void *value, __u64 flags) 180 { 181 int err; 182 183 /* Need to create a kthread, thus must support schedule */ 184 if (bpf_map_is_offloaded(map)) { 185 return bpf_map_offload_update_elem(map, key, value, flags); 186 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 187 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 188 return map->ops->map_update_elem(map, key, value, flags); 189 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 190 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 191 return sock_map_update_elem_sys(map, key, value, flags); 192 } else if (IS_FD_PROG_ARRAY(map)) { 193 return bpf_fd_array_map_update_elem(map, map_file, key, value, 194 flags); 195 } 196 197 bpf_disable_instrumentation(); 198 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 199 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 200 err = bpf_percpu_hash_update(map, key, value, flags); 201 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 202 err = bpf_percpu_array_update(map, key, value, flags); 203 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 204 err = bpf_percpu_cgroup_storage_update(map, key, value, 205 flags); 206 } else if (IS_FD_ARRAY(map)) { 207 rcu_read_lock(); 208 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 209 flags); 210 rcu_read_unlock(); 211 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 212 rcu_read_lock(); 213 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 214 flags); 215 rcu_read_unlock(); 216 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 217 /* rcu_read_lock() is not needed */ 218 err = bpf_fd_reuseport_array_update_elem(map, key, value, 219 flags); 220 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 221 map->map_type == BPF_MAP_TYPE_STACK || 222 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 223 err = map->ops->map_push_elem(map, value, flags); 224 } else { 225 rcu_read_lock(); 226 err = map->ops->map_update_elem(map, key, value, flags); 227 rcu_read_unlock(); 228 } 229 bpf_enable_instrumentation(); 230 maybe_wait_bpf_programs(map); 231 232 return err; 233 } 234 235 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 236 __u64 flags) 237 { 238 void *ptr; 239 int err; 240 241 if (bpf_map_is_offloaded(map)) 242 return bpf_map_offload_lookup_elem(map, key, value); 243 244 bpf_disable_instrumentation(); 245 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 246 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 247 err = bpf_percpu_hash_copy(map, key, value); 248 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 249 err = bpf_percpu_array_copy(map, key, value); 250 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 251 err = bpf_percpu_cgroup_storage_copy(map, key, value); 252 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 253 err = bpf_stackmap_copy(map, key, value); 254 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 255 err = bpf_fd_array_map_lookup_elem(map, key, value); 256 } else if (IS_FD_HASH(map)) { 257 err = bpf_fd_htab_map_lookup_elem(map, key, value); 258 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 259 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 260 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 261 map->map_type == BPF_MAP_TYPE_STACK || 262 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 263 err = map->ops->map_peek_elem(map, value); 264 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 265 /* struct_ops map requires directly updating "value" */ 266 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 267 } else { 268 rcu_read_lock(); 269 if (map->ops->map_lookup_elem_sys_only) 270 ptr = map->ops->map_lookup_elem_sys_only(map, key); 271 else 272 ptr = map->ops->map_lookup_elem(map, key); 273 if (IS_ERR(ptr)) { 274 err = PTR_ERR(ptr); 275 } else if (!ptr) { 276 err = -ENOENT; 277 } else { 278 err = 0; 279 if (flags & BPF_F_LOCK) 280 /* lock 'ptr' and copy everything but lock */ 281 copy_map_value_locked(map, value, ptr, true); 282 else 283 copy_map_value(map, value, ptr); 284 /* mask lock and timer, since value wasn't zero inited */ 285 check_and_init_map_value(map, value); 286 } 287 rcu_read_unlock(); 288 } 289 290 bpf_enable_instrumentation(); 291 maybe_wait_bpf_programs(map); 292 293 return err; 294 } 295 296 /* Please, do not use this function outside from the map creation path 297 * (e.g. in map update path) without taking care of setting the active 298 * memory cgroup (see at bpf_map_kmalloc_node() for example). 299 */ 300 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 301 { 302 /* We really just want to fail instead of triggering OOM killer 303 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 304 * which is used for lower order allocation requests. 305 * 306 * It has been observed that higher order allocation requests done by 307 * vmalloc with __GFP_NORETRY being set might fail due to not trying 308 * to reclaim memory from the page cache, thus we set 309 * __GFP_RETRY_MAYFAIL to avoid such situations. 310 */ 311 312 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 313 unsigned int flags = 0; 314 unsigned long align = 1; 315 void *area; 316 317 if (size >= SIZE_MAX) 318 return NULL; 319 320 /* kmalloc()'ed memory can't be mmap()'ed */ 321 if (mmapable) { 322 BUG_ON(!PAGE_ALIGNED(size)); 323 align = SHMLBA; 324 flags = VM_USERMAP; 325 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 326 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 327 numa_node); 328 if (area != NULL) 329 return area; 330 } 331 332 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 333 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 334 flags, numa_node, __builtin_return_address(0)); 335 } 336 337 void *bpf_map_area_alloc(u64 size, int numa_node) 338 { 339 return __bpf_map_area_alloc(size, numa_node, false); 340 } 341 342 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 343 { 344 return __bpf_map_area_alloc(size, numa_node, true); 345 } 346 347 void bpf_map_area_free(void *area) 348 { 349 kvfree(area); 350 } 351 352 static u32 bpf_map_flags_retain_permanent(u32 flags) 353 { 354 /* Some map creation flags are not tied to the map object but 355 * rather to the map fd instead, so they have no meaning upon 356 * map object inspection since multiple file descriptors with 357 * different (access) properties can exist here. Thus, given 358 * this has zero meaning for the map itself, lets clear these 359 * from here. 360 */ 361 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 362 } 363 364 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 365 { 366 map->map_type = attr->map_type; 367 map->key_size = attr->key_size; 368 map->value_size = attr->value_size; 369 map->max_entries = attr->max_entries; 370 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 371 map->numa_node = bpf_map_attr_numa_node(attr); 372 map->map_extra = attr->map_extra; 373 } 374 375 static int bpf_map_alloc_id(struct bpf_map *map) 376 { 377 int id; 378 379 idr_preload(GFP_KERNEL); 380 spin_lock_bh(&map_idr_lock); 381 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 382 if (id > 0) 383 map->id = id; 384 spin_unlock_bh(&map_idr_lock); 385 idr_preload_end(); 386 387 if (WARN_ON_ONCE(!id)) 388 return -ENOSPC; 389 390 return id > 0 ? 0 : id; 391 } 392 393 void bpf_map_free_id(struct bpf_map *map) 394 { 395 unsigned long flags; 396 397 /* Offloaded maps are removed from the IDR store when their device 398 * disappears - even if someone holds an fd to them they are unusable, 399 * the memory is gone, all ops will fail; they are simply waiting for 400 * refcnt to drop to be freed. 401 */ 402 if (!map->id) 403 return; 404 405 spin_lock_irqsave(&map_idr_lock, flags); 406 407 idr_remove(&map_idr, map->id); 408 map->id = 0; 409 410 spin_unlock_irqrestore(&map_idr_lock, flags); 411 } 412 413 #ifdef CONFIG_MEMCG_KMEM 414 static void bpf_map_save_memcg(struct bpf_map *map) 415 { 416 /* Currently if a map is created by a process belonging to the root 417 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 418 * So we have to check map->objcg for being NULL each time it's 419 * being used. 420 */ 421 if (memcg_bpf_enabled()) 422 map->objcg = get_obj_cgroup_from_current(); 423 } 424 425 static void bpf_map_release_memcg(struct bpf_map *map) 426 { 427 if (map->objcg) 428 obj_cgroup_put(map->objcg); 429 } 430 431 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 432 { 433 if (map->objcg) 434 return get_mem_cgroup_from_objcg(map->objcg); 435 436 return root_mem_cgroup; 437 } 438 439 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 440 int node) 441 { 442 struct mem_cgroup *memcg, *old_memcg; 443 void *ptr; 444 445 memcg = bpf_map_get_memcg(map); 446 old_memcg = set_active_memcg(memcg); 447 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 448 set_active_memcg(old_memcg); 449 mem_cgroup_put(memcg); 450 451 return ptr; 452 } 453 454 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 455 { 456 struct mem_cgroup *memcg, *old_memcg; 457 void *ptr; 458 459 memcg = bpf_map_get_memcg(map); 460 old_memcg = set_active_memcg(memcg); 461 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 462 set_active_memcg(old_memcg); 463 mem_cgroup_put(memcg); 464 465 return ptr; 466 } 467 468 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 469 gfp_t flags) 470 { 471 struct mem_cgroup *memcg, *old_memcg; 472 void *ptr; 473 474 memcg = bpf_map_get_memcg(map); 475 old_memcg = set_active_memcg(memcg); 476 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 477 set_active_memcg(old_memcg); 478 mem_cgroup_put(memcg); 479 480 return ptr; 481 } 482 483 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 484 size_t align, gfp_t flags) 485 { 486 struct mem_cgroup *memcg, *old_memcg; 487 void __percpu *ptr; 488 489 memcg = bpf_map_get_memcg(map); 490 old_memcg = set_active_memcg(memcg); 491 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 492 set_active_memcg(old_memcg); 493 mem_cgroup_put(memcg); 494 495 return ptr; 496 } 497 498 #else 499 static void bpf_map_save_memcg(struct bpf_map *map) 500 { 501 } 502 503 static void bpf_map_release_memcg(struct bpf_map *map) 504 { 505 } 506 #endif 507 508 static int btf_field_cmp(const void *a, const void *b) 509 { 510 const struct btf_field *f1 = a, *f2 = b; 511 512 if (f1->offset < f2->offset) 513 return -1; 514 else if (f1->offset > f2->offset) 515 return 1; 516 return 0; 517 } 518 519 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 520 enum btf_field_type type) 521 { 522 struct btf_field *field; 523 524 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type)) 525 return NULL; 526 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 527 if (!field || !(field->type & type)) 528 return NULL; 529 return field; 530 } 531 532 void btf_record_free(struct btf_record *rec) 533 { 534 int i; 535 536 if (IS_ERR_OR_NULL(rec)) 537 return; 538 for (i = 0; i < rec->cnt; i++) { 539 switch (rec->fields[i].type) { 540 case BPF_KPTR_UNREF: 541 case BPF_KPTR_REF: 542 if (rec->fields[i].kptr.module) 543 module_put(rec->fields[i].kptr.module); 544 btf_put(rec->fields[i].kptr.btf); 545 break; 546 case BPF_LIST_HEAD: 547 case BPF_LIST_NODE: 548 case BPF_RB_ROOT: 549 case BPF_RB_NODE: 550 case BPF_SPIN_LOCK: 551 case BPF_TIMER: 552 /* Nothing to release */ 553 break; 554 default: 555 WARN_ON_ONCE(1); 556 continue; 557 } 558 } 559 kfree(rec); 560 } 561 562 void bpf_map_free_record(struct bpf_map *map) 563 { 564 btf_record_free(map->record); 565 map->record = NULL; 566 } 567 568 struct btf_record *btf_record_dup(const struct btf_record *rec) 569 { 570 const struct btf_field *fields; 571 struct btf_record *new_rec; 572 int ret, size, i; 573 574 if (IS_ERR_OR_NULL(rec)) 575 return NULL; 576 size = offsetof(struct btf_record, fields[rec->cnt]); 577 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 578 if (!new_rec) 579 return ERR_PTR(-ENOMEM); 580 /* Do a deep copy of the btf_record */ 581 fields = rec->fields; 582 new_rec->cnt = 0; 583 for (i = 0; i < rec->cnt; i++) { 584 switch (fields[i].type) { 585 case BPF_KPTR_UNREF: 586 case BPF_KPTR_REF: 587 btf_get(fields[i].kptr.btf); 588 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 589 ret = -ENXIO; 590 goto free; 591 } 592 break; 593 case BPF_LIST_HEAD: 594 case BPF_LIST_NODE: 595 case BPF_RB_ROOT: 596 case BPF_RB_NODE: 597 case BPF_SPIN_LOCK: 598 case BPF_TIMER: 599 /* Nothing to acquire */ 600 break; 601 default: 602 ret = -EFAULT; 603 WARN_ON_ONCE(1); 604 goto free; 605 } 606 new_rec->cnt++; 607 } 608 return new_rec; 609 free: 610 btf_record_free(new_rec); 611 return ERR_PTR(ret); 612 } 613 614 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 615 { 616 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 617 int size; 618 619 if (!a_has_fields && !b_has_fields) 620 return true; 621 if (a_has_fields != b_has_fields) 622 return false; 623 if (rec_a->cnt != rec_b->cnt) 624 return false; 625 size = offsetof(struct btf_record, fields[rec_a->cnt]); 626 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 627 * members are zeroed out. So memcmp is safe to do without worrying 628 * about padding/unused fields. 629 * 630 * While spin_lock, timer, and kptr have no relation to map BTF, 631 * list_head metadata is specific to map BTF, the btf and value_rec 632 * members in particular. btf is the map BTF, while value_rec points to 633 * btf_record in that map BTF. 634 * 635 * So while by default, we don't rely on the map BTF (which the records 636 * were parsed from) matching for both records, which is not backwards 637 * compatible, in case list_head is part of it, we implicitly rely on 638 * that by way of depending on memcmp succeeding for it. 639 */ 640 return !memcmp(rec_a, rec_b, size); 641 } 642 643 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 644 { 645 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 646 return; 647 bpf_timer_cancel_and_free(obj + rec->timer_off); 648 } 649 650 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 651 { 652 const struct btf_field *fields; 653 int i; 654 655 if (IS_ERR_OR_NULL(rec)) 656 return; 657 fields = rec->fields; 658 for (i = 0; i < rec->cnt; i++) { 659 const struct btf_field *field = &fields[i]; 660 void *field_ptr = obj + field->offset; 661 662 switch (fields[i].type) { 663 case BPF_SPIN_LOCK: 664 break; 665 case BPF_TIMER: 666 bpf_timer_cancel_and_free(field_ptr); 667 break; 668 case BPF_KPTR_UNREF: 669 WRITE_ONCE(*(u64 *)field_ptr, 0); 670 break; 671 case BPF_KPTR_REF: 672 field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0)); 673 break; 674 case BPF_LIST_HEAD: 675 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 676 continue; 677 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 678 break; 679 case BPF_RB_ROOT: 680 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 681 continue; 682 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 683 break; 684 case BPF_LIST_NODE: 685 case BPF_RB_NODE: 686 break; 687 default: 688 WARN_ON_ONCE(1); 689 continue; 690 } 691 } 692 } 693 694 /* called from workqueue */ 695 static void bpf_map_free_deferred(struct work_struct *work) 696 { 697 struct bpf_map *map = container_of(work, struct bpf_map, work); 698 struct btf_field_offs *foffs = map->field_offs; 699 struct btf_record *rec = map->record; 700 701 security_bpf_map_free(map); 702 bpf_map_release_memcg(map); 703 /* implementation dependent freeing */ 704 map->ops->map_free(map); 705 /* Delay freeing of field_offs and btf_record for maps, as map_free 706 * callback usually needs access to them. It is better to do it here 707 * than require each callback to do the free itself manually. 708 * 709 * Note that the btf_record stashed in map->inner_map_meta->record was 710 * already freed using the map_free callback for map in map case which 711 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 712 * template bpf_map struct used during verification. 713 */ 714 kfree(foffs); 715 btf_record_free(rec); 716 } 717 718 static void bpf_map_put_uref(struct bpf_map *map) 719 { 720 if (atomic64_dec_and_test(&map->usercnt)) { 721 if (map->ops->map_release_uref) 722 map->ops->map_release_uref(map); 723 } 724 } 725 726 /* decrement map refcnt and schedule it for freeing via workqueue 727 * (underlying map implementation ops->map_free() might sleep) 728 */ 729 void bpf_map_put(struct bpf_map *map) 730 { 731 if (atomic64_dec_and_test(&map->refcnt)) { 732 /* bpf_map_free_id() must be called first */ 733 bpf_map_free_id(map); 734 btf_put(map->btf); 735 INIT_WORK(&map->work, bpf_map_free_deferred); 736 /* Avoid spawning kworkers, since they all might contend 737 * for the same mutex like slab_mutex. 738 */ 739 queue_work(system_unbound_wq, &map->work); 740 } 741 } 742 EXPORT_SYMBOL_GPL(bpf_map_put); 743 744 void bpf_map_put_with_uref(struct bpf_map *map) 745 { 746 bpf_map_put_uref(map); 747 bpf_map_put(map); 748 } 749 750 static int bpf_map_release(struct inode *inode, struct file *filp) 751 { 752 struct bpf_map *map = filp->private_data; 753 754 if (map->ops->map_release) 755 map->ops->map_release(map, filp); 756 757 bpf_map_put_with_uref(map); 758 return 0; 759 } 760 761 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 762 { 763 fmode_t mode = f.file->f_mode; 764 765 /* Our file permissions may have been overridden by global 766 * map permissions facing syscall side. 767 */ 768 if (READ_ONCE(map->frozen)) 769 mode &= ~FMODE_CAN_WRITE; 770 return mode; 771 } 772 773 #ifdef CONFIG_PROC_FS 774 /* Provides an approximation of the map's memory footprint. 775 * Used only to provide a backward compatibility and display 776 * a reasonable "memlock" info. 777 */ 778 static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) 779 { 780 unsigned long size; 781 782 size = round_up(map->key_size + bpf_map_value_size(map), 8); 783 784 return round_up(map->max_entries * size, PAGE_SIZE); 785 } 786 787 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 788 { 789 struct bpf_map *map = filp->private_data; 790 u32 type = 0, jited = 0; 791 792 if (map_type_contains_progs(map)) { 793 spin_lock(&map->owner.lock); 794 type = map->owner.type; 795 jited = map->owner.jited; 796 spin_unlock(&map->owner.lock); 797 } 798 799 seq_printf(m, 800 "map_type:\t%u\n" 801 "key_size:\t%u\n" 802 "value_size:\t%u\n" 803 "max_entries:\t%u\n" 804 "map_flags:\t%#x\n" 805 "map_extra:\t%#llx\n" 806 "memlock:\t%lu\n" 807 "map_id:\t%u\n" 808 "frozen:\t%u\n", 809 map->map_type, 810 map->key_size, 811 map->value_size, 812 map->max_entries, 813 map->map_flags, 814 (unsigned long long)map->map_extra, 815 bpf_map_memory_footprint(map), 816 map->id, 817 READ_ONCE(map->frozen)); 818 if (type) { 819 seq_printf(m, "owner_prog_type:\t%u\n", type); 820 seq_printf(m, "owner_jited:\t%u\n", jited); 821 } 822 } 823 #endif 824 825 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 826 loff_t *ppos) 827 { 828 /* We need this handler such that alloc_file() enables 829 * f_mode with FMODE_CAN_READ. 830 */ 831 return -EINVAL; 832 } 833 834 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 835 size_t siz, loff_t *ppos) 836 { 837 /* We need this handler such that alloc_file() enables 838 * f_mode with FMODE_CAN_WRITE. 839 */ 840 return -EINVAL; 841 } 842 843 /* called for any extra memory-mapped regions (except initial) */ 844 static void bpf_map_mmap_open(struct vm_area_struct *vma) 845 { 846 struct bpf_map *map = vma->vm_file->private_data; 847 848 if (vma->vm_flags & VM_MAYWRITE) 849 bpf_map_write_active_inc(map); 850 } 851 852 /* called for all unmapped memory region (including initial) */ 853 static void bpf_map_mmap_close(struct vm_area_struct *vma) 854 { 855 struct bpf_map *map = vma->vm_file->private_data; 856 857 if (vma->vm_flags & VM_MAYWRITE) 858 bpf_map_write_active_dec(map); 859 } 860 861 static const struct vm_operations_struct bpf_map_default_vmops = { 862 .open = bpf_map_mmap_open, 863 .close = bpf_map_mmap_close, 864 }; 865 866 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 867 { 868 struct bpf_map *map = filp->private_data; 869 int err; 870 871 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 872 return -ENOTSUPP; 873 874 if (!(vma->vm_flags & VM_SHARED)) 875 return -EINVAL; 876 877 mutex_lock(&map->freeze_mutex); 878 879 if (vma->vm_flags & VM_WRITE) { 880 if (map->frozen) { 881 err = -EPERM; 882 goto out; 883 } 884 /* map is meant to be read-only, so do not allow mapping as 885 * writable, because it's possible to leak a writable page 886 * reference and allows user-space to still modify it after 887 * freezing, while verifier will assume contents do not change 888 */ 889 if (map->map_flags & BPF_F_RDONLY_PROG) { 890 err = -EACCES; 891 goto out; 892 } 893 } 894 895 /* set default open/close callbacks */ 896 vma->vm_ops = &bpf_map_default_vmops; 897 vma->vm_private_data = map; 898 vm_flags_clear(vma, VM_MAYEXEC); 899 if (!(vma->vm_flags & VM_WRITE)) 900 /* disallow re-mapping with PROT_WRITE */ 901 vm_flags_clear(vma, VM_MAYWRITE); 902 903 err = map->ops->map_mmap(map, vma); 904 if (err) 905 goto out; 906 907 if (vma->vm_flags & VM_MAYWRITE) 908 bpf_map_write_active_inc(map); 909 out: 910 mutex_unlock(&map->freeze_mutex); 911 return err; 912 } 913 914 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 915 { 916 struct bpf_map *map = filp->private_data; 917 918 if (map->ops->map_poll) 919 return map->ops->map_poll(map, filp, pts); 920 921 return EPOLLERR; 922 } 923 924 const struct file_operations bpf_map_fops = { 925 #ifdef CONFIG_PROC_FS 926 .show_fdinfo = bpf_map_show_fdinfo, 927 #endif 928 .release = bpf_map_release, 929 .read = bpf_dummy_read, 930 .write = bpf_dummy_write, 931 .mmap = bpf_map_mmap, 932 .poll = bpf_map_poll, 933 }; 934 935 int bpf_map_new_fd(struct bpf_map *map, int flags) 936 { 937 int ret; 938 939 ret = security_bpf_map(map, OPEN_FMODE(flags)); 940 if (ret < 0) 941 return ret; 942 943 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 944 flags | O_CLOEXEC); 945 } 946 947 int bpf_get_file_flag(int flags) 948 { 949 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 950 return -EINVAL; 951 if (flags & BPF_F_RDONLY) 952 return O_RDONLY; 953 if (flags & BPF_F_WRONLY) 954 return O_WRONLY; 955 return O_RDWR; 956 } 957 958 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 959 #define CHECK_ATTR(CMD) \ 960 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 961 sizeof(attr->CMD##_LAST_FIELD), 0, \ 962 sizeof(*attr) - \ 963 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 964 sizeof(attr->CMD##_LAST_FIELD)) != NULL 965 966 /* dst and src must have at least "size" number of bytes. 967 * Return strlen on success and < 0 on error. 968 */ 969 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 970 { 971 const char *end = src + size; 972 const char *orig_src = src; 973 974 memset(dst, 0, size); 975 /* Copy all isalnum(), '_' and '.' chars. */ 976 while (src < end && *src) { 977 if (!isalnum(*src) && 978 *src != '_' && *src != '.') 979 return -EINVAL; 980 *dst++ = *src++; 981 } 982 983 /* No '\0' found in "size" number of bytes */ 984 if (src == end) 985 return -EINVAL; 986 987 return src - orig_src; 988 } 989 990 int map_check_no_btf(const struct bpf_map *map, 991 const struct btf *btf, 992 const struct btf_type *key_type, 993 const struct btf_type *value_type) 994 { 995 return -ENOTSUPP; 996 } 997 998 static int map_check_btf(struct bpf_map *map, const struct btf *btf, 999 u32 btf_key_id, u32 btf_value_id) 1000 { 1001 const struct btf_type *key_type, *value_type; 1002 u32 key_size, value_size; 1003 int ret = 0; 1004 1005 /* Some maps allow key to be unspecified. */ 1006 if (btf_key_id) { 1007 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1008 if (!key_type || key_size != map->key_size) 1009 return -EINVAL; 1010 } else { 1011 key_type = btf_type_by_id(btf, 0); 1012 if (!map->ops->map_check_btf) 1013 return -EINVAL; 1014 } 1015 1016 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1017 if (!value_type || value_size != map->value_size) 1018 return -EINVAL; 1019 1020 map->record = btf_parse_fields(btf, value_type, 1021 BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1022 BPF_RB_ROOT, 1023 map->value_size); 1024 if (!IS_ERR_OR_NULL(map->record)) { 1025 int i; 1026 1027 if (!bpf_capable()) { 1028 ret = -EPERM; 1029 goto free_map_tab; 1030 } 1031 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1032 ret = -EACCES; 1033 goto free_map_tab; 1034 } 1035 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1036 switch (map->record->field_mask & (1 << i)) { 1037 case 0: 1038 continue; 1039 case BPF_SPIN_LOCK: 1040 if (map->map_type != BPF_MAP_TYPE_HASH && 1041 map->map_type != BPF_MAP_TYPE_ARRAY && 1042 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1043 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1044 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1045 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1046 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1047 ret = -EOPNOTSUPP; 1048 goto free_map_tab; 1049 } 1050 break; 1051 case BPF_TIMER: 1052 if (map->map_type != BPF_MAP_TYPE_HASH && 1053 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1054 map->map_type != BPF_MAP_TYPE_ARRAY) { 1055 ret = -EOPNOTSUPP; 1056 goto free_map_tab; 1057 } 1058 break; 1059 case BPF_KPTR_UNREF: 1060 case BPF_KPTR_REF: 1061 if (map->map_type != BPF_MAP_TYPE_HASH && 1062 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1063 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1064 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1065 map->map_type != BPF_MAP_TYPE_ARRAY && 1066 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1067 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1068 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1069 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1070 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1071 ret = -EOPNOTSUPP; 1072 goto free_map_tab; 1073 } 1074 break; 1075 case BPF_LIST_HEAD: 1076 case BPF_RB_ROOT: 1077 if (map->map_type != BPF_MAP_TYPE_HASH && 1078 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1079 map->map_type != BPF_MAP_TYPE_ARRAY) { 1080 ret = -EOPNOTSUPP; 1081 goto free_map_tab; 1082 } 1083 break; 1084 default: 1085 /* Fail if map_type checks are missing for a field type */ 1086 ret = -EOPNOTSUPP; 1087 goto free_map_tab; 1088 } 1089 } 1090 } 1091 1092 ret = btf_check_and_fixup_fields(btf, map->record); 1093 if (ret < 0) 1094 goto free_map_tab; 1095 1096 if (map->ops->map_check_btf) { 1097 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1098 if (ret < 0) 1099 goto free_map_tab; 1100 } 1101 1102 return ret; 1103 free_map_tab: 1104 bpf_map_free_record(map); 1105 return ret; 1106 } 1107 1108 #define BPF_MAP_CREATE_LAST_FIELD map_extra 1109 /* called via syscall */ 1110 static int map_create(union bpf_attr *attr) 1111 { 1112 int numa_node = bpf_map_attr_numa_node(attr); 1113 struct btf_field_offs *foffs; 1114 struct bpf_map *map; 1115 int f_flags; 1116 int err; 1117 1118 err = CHECK_ATTR(BPF_MAP_CREATE); 1119 if (err) 1120 return -EINVAL; 1121 1122 if (attr->btf_vmlinux_value_type_id) { 1123 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1124 attr->btf_key_type_id || attr->btf_value_type_id) 1125 return -EINVAL; 1126 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1127 return -EINVAL; 1128 } 1129 1130 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1131 attr->map_extra != 0) 1132 return -EINVAL; 1133 1134 f_flags = bpf_get_file_flag(attr->map_flags); 1135 if (f_flags < 0) 1136 return f_flags; 1137 1138 if (numa_node != NUMA_NO_NODE && 1139 ((unsigned int)numa_node >= nr_node_ids || 1140 !node_online(numa_node))) 1141 return -EINVAL; 1142 1143 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1144 map = find_and_alloc_map(attr); 1145 if (IS_ERR(map)) 1146 return PTR_ERR(map); 1147 1148 err = bpf_obj_name_cpy(map->name, attr->map_name, 1149 sizeof(attr->map_name)); 1150 if (err < 0) 1151 goto free_map; 1152 1153 atomic64_set(&map->refcnt, 1); 1154 atomic64_set(&map->usercnt, 1); 1155 mutex_init(&map->freeze_mutex); 1156 spin_lock_init(&map->owner.lock); 1157 1158 if (attr->btf_key_type_id || attr->btf_value_type_id || 1159 /* Even the map's value is a kernel's struct, 1160 * the bpf_prog.o must have BTF to begin with 1161 * to figure out the corresponding kernel's 1162 * counter part. Thus, attr->btf_fd has 1163 * to be valid also. 1164 */ 1165 attr->btf_vmlinux_value_type_id) { 1166 struct btf *btf; 1167 1168 btf = btf_get_by_fd(attr->btf_fd); 1169 if (IS_ERR(btf)) { 1170 err = PTR_ERR(btf); 1171 goto free_map; 1172 } 1173 if (btf_is_kernel(btf)) { 1174 btf_put(btf); 1175 err = -EACCES; 1176 goto free_map; 1177 } 1178 map->btf = btf; 1179 1180 if (attr->btf_value_type_id) { 1181 err = map_check_btf(map, btf, attr->btf_key_type_id, 1182 attr->btf_value_type_id); 1183 if (err) 1184 goto free_map; 1185 } 1186 1187 map->btf_key_type_id = attr->btf_key_type_id; 1188 map->btf_value_type_id = attr->btf_value_type_id; 1189 map->btf_vmlinux_value_type_id = 1190 attr->btf_vmlinux_value_type_id; 1191 } 1192 1193 1194 foffs = btf_parse_field_offs(map->record); 1195 if (IS_ERR(foffs)) { 1196 err = PTR_ERR(foffs); 1197 goto free_map; 1198 } 1199 map->field_offs = foffs; 1200 1201 err = security_bpf_map_alloc(map); 1202 if (err) 1203 goto free_map_field_offs; 1204 1205 err = bpf_map_alloc_id(map); 1206 if (err) 1207 goto free_map_sec; 1208 1209 bpf_map_save_memcg(map); 1210 1211 err = bpf_map_new_fd(map, f_flags); 1212 if (err < 0) { 1213 /* failed to allocate fd. 1214 * bpf_map_put_with_uref() is needed because the above 1215 * bpf_map_alloc_id() has published the map 1216 * to the userspace and the userspace may 1217 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1218 */ 1219 bpf_map_put_with_uref(map); 1220 return err; 1221 } 1222 1223 return err; 1224 1225 free_map_sec: 1226 security_bpf_map_free(map); 1227 free_map_field_offs: 1228 kfree(map->field_offs); 1229 free_map: 1230 btf_put(map->btf); 1231 map->ops->map_free(map); 1232 return err; 1233 } 1234 1235 /* if error is returned, fd is released. 1236 * On success caller should complete fd access with matching fdput() 1237 */ 1238 struct bpf_map *__bpf_map_get(struct fd f) 1239 { 1240 if (!f.file) 1241 return ERR_PTR(-EBADF); 1242 if (f.file->f_op != &bpf_map_fops) { 1243 fdput(f); 1244 return ERR_PTR(-EINVAL); 1245 } 1246 1247 return f.file->private_data; 1248 } 1249 1250 void bpf_map_inc(struct bpf_map *map) 1251 { 1252 atomic64_inc(&map->refcnt); 1253 } 1254 EXPORT_SYMBOL_GPL(bpf_map_inc); 1255 1256 void bpf_map_inc_with_uref(struct bpf_map *map) 1257 { 1258 atomic64_inc(&map->refcnt); 1259 atomic64_inc(&map->usercnt); 1260 } 1261 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1262 1263 struct bpf_map *bpf_map_get(u32 ufd) 1264 { 1265 struct fd f = fdget(ufd); 1266 struct bpf_map *map; 1267 1268 map = __bpf_map_get(f); 1269 if (IS_ERR(map)) 1270 return map; 1271 1272 bpf_map_inc(map); 1273 fdput(f); 1274 1275 return map; 1276 } 1277 EXPORT_SYMBOL(bpf_map_get); 1278 1279 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1280 { 1281 struct fd f = fdget(ufd); 1282 struct bpf_map *map; 1283 1284 map = __bpf_map_get(f); 1285 if (IS_ERR(map)) 1286 return map; 1287 1288 bpf_map_inc_with_uref(map); 1289 fdput(f); 1290 1291 return map; 1292 } 1293 1294 /* map_idr_lock should have been held */ 1295 static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1296 { 1297 int refold; 1298 1299 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1300 if (!refold) 1301 return ERR_PTR(-ENOENT); 1302 if (uref) 1303 atomic64_inc(&map->usercnt); 1304 1305 return map; 1306 } 1307 1308 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1309 { 1310 spin_lock_bh(&map_idr_lock); 1311 map = __bpf_map_inc_not_zero(map, false); 1312 spin_unlock_bh(&map_idr_lock); 1313 1314 return map; 1315 } 1316 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1317 1318 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 1319 { 1320 return -ENOTSUPP; 1321 } 1322 1323 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1324 { 1325 if (key_size) 1326 return vmemdup_user(ukey, key_size); 1327 1328 if (ukey) 1329 return ERR_PTR(-EINVAL); 1330 1331 return NULL; 1332 } 1333 1334 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1335 { 1336 if (key_size) 1337 return kvmemdup_bpfptr(ukey, key_size); 1338 1339 if (!bpfptr_is_null(ukey)) 1340 return ERR_PTR(-EINVAL); 1341 1342 return NULL; 1343 } 1344 1345 /* last field in 'union bpf_attr' used by this command */ 1346 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1347 1348 static int map_lookup_elem(union bpf_attr *attr) 1349 { 1350 void __user *ukey = u64_to_user_ptr(attr->key); 1351 void __user *uvalue = u64_to_user_ptr(attr->value); 1352 int ufd = attr->map_fd; 1353 struct bpf_map *map; 1354 void *key, *value; 1355 u32 value_size; 1356 struct fd f; 1357 int err; 1358 1359 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1360 return -EINVAL; 1361 1362 if (attr->flags & ~BPF_F_LOCK) 1363 return -EINVAL; 1364 1365 f = fdget(ufd); 1366 map = __bpf_map_get(f); 1367 if (IS_ERR(map)) 1368 return PTR_ERR(map); 1369 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1370 err = -EPERM; 1371 goto err_put; 1372 } 1373 1374 if ((attr->flags & BPF_F_LOCK) && 1375 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1376 err = -EINVAL; 1377 goto err_put; 1378 } 1379 1380 key = __bpf_copy_key(ukey, map->key_size); 1381 if (IS_ERR(key)) { 1382 err = PTR_ERR(key); 1383 goto err_put; 1384 } 1385 1386 value_size = bpf_map_value_size(map); 1387 1388 err = -ENOMEM; 1389 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1390 if (!value) 1391 goto free_key; 1392 1393 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1394 if (copy_from_user(value, uvalue, value_size)) 1395 err = -EFAULT; 1396 else 1397 err = bpf_map_copy_value(map, key, value, attr->flags); 1398 goto free_value; 1399 } 1400 1401 err = bpf_map_copy_value(map, key, value, attr->flags); 1402 if (err) 1403 goto free_value; 1404 1405 err = -EFAULT; 1406 if (copy_to_user(uvalue, value, value_size) != 0) 1407 goto free_value; 1408 1409 err = 0; 1410 1411 free_value: 1412 kvfree(value); 1413 free_key: 1414 kvfree(key); 1415 err_put: 1416 fdput(f); 1417 return err; 1418 } 1419 1420 1421 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1422 1423 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1424 { 1425 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1426 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1427 int ufd = attr->map_fd; 1428 struct bpf_map *map; 1429 void *key, *value; 1430 u32 value_size; 1431 struct fd f; 1432 int err; 1433 1434 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1435 return -EINVAL; 1436 1437 f = fdget(ufd); 1438 map = __bpf_map_get(f); 1439 if (IS_ERR(map)) 1440 return PTR_ERR(map); 1441 bpf_map_write_active_inc(map); 1442 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1443 err = -EPERM; 1444 goto err_put; 1445 } 1446 1447 if ((attr->flags & BPF_F_LOCK) && 1448 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1449 err = -EINVAL; 1450 goto err_put; 1451 } 1452 1453 key = ___bpf_copy_key(ukey, map->key_size); 1454 if (IS_ERR(key)) { 1455 err = PTR_ERR(key); 1456 goto err_put; 1457 } 1458 1459 value_size = bpf_map_value_size(map); 1460 value = kvmemdup_bpfptr(uvalue, value_size); 1461 if (IS_ERR(value)) { 1462 err = PTR_ERR(value); 1463 goto free_key; 1464 } 1465 1466 err = bpf_map_update_value(map, f.file, key, value, attr->flags); 1467 1468 kvfree(value); 1469 free_key: 1470 kvfree(key); 1471 err_put: 1472 bpf_map_write_active_dec(map); 1473 fdput(f); 1474 return err; 1475 } 1476 1477 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1478 1479 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1480 { 1481 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1482 int ufd = attr->map_fd; 1483 struct bpf_map *map; 1484 struct fd f; 1485 void *key; 1486 int err; 1487 1488 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1489 return -EINVAL; 1490 1491 f = fdget(ufd); 1492 map = __bpf_map_get(f); 1493 if (IS_ERR(map)) 1494 return PTR_ERR(map); 1495 bpf_map_write_active_inc(map); 1496 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1497 err = -EPERM; 1498 goto err_put; 1499 } 1500 1501 key = ___bpf_copy_key(ukey, map->key_size); 1502 if (IS_ERR(key)) { 1503 err = PTR_ERR(key); 1504 goto err_put; 1505 } 1506 1507 if (bpf_map_is_offloaded(map)) { 1508 err = bpf_map_offload_delete_elem(map, key); 1509 goto out; 1510 } else if (IS_FD_PROG_ARRAY(map) || 1511 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1512 /* These maps require sleepable context */ 1513 err = map->ops->map_delete_elem(map, key); 1514 goto out; 1515 } 1516 1517 bpf_disable_instrumentation(); 1518 rcu_read_lock(); 1519 err = map->ops->map_delete_elem(map, key); 1520 rcu_read_unlock(); 1521 bpf_enable_instrumentation(); 1522 maybe_wait_bpf_programs(map); 1523 out: 1524 kvfree(key); 1525 err_put: 1526 bpf_map_write_active_dec(map); 1527 fdput(f); 1528 return err; 1529 } 1530 1531 /* last field in 'union bpf_attr' used by this command */ 1532 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1533 1534 static int map_get_next_key(union bpf_attr *attr) 1535 { 1536 void __user *ukey = u64_to_user_ptr(attr->key); 1537 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1538 int ufd = attr->map_fd; 1539 struct bpf_map *map; 1540 void *key, *next_key; 1541 struct fd f; 1542 int err; 1543 1544 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1545 return -EINVAL; 1546 1547 f = fdget(ufd); 1548 map = __bpf_map_get(f); 1549 if (IS_ERR(map)) 1550 return PTR_ERR(map); 1551 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1552 err = -EPERM; 1553 goto err_put; 1554 } 1555 1556 if (ukey) { 1557 key = __bpf_copy_key(ukey, map->key_size); 1558 if (IS_ERR(key)) { 1559 err = PTR_ERR(key); 1560 goto err_put; 1561 } 1562 } else { 1563 key = NULL; 1564 } 1565 1566 err = -ENOMEM; 1567 next_key = kvmalloc(map->key_size, GFP_USER); 1568 if (!next_key) 1569 goto free_key; 1570 1571 if (bpf_map_is_offloaded(map)) { 1572 err = bpf_map_offload_get_next_key(map, key, next_key); 1573 goto out; 1574 } 1575 1576 rcu_read_lock(); 1577 err = map->ops->map_get_next_key(map, key, next_key); 1578 rcu_read_unlock(); 1579 out: 1580 if (err) 1581 goto free_next_key; 1582 1583 err = -EFAULT; 1584 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1585 goto free_next_key; 1586 1587 err = 0; 1588 1589 free_next_key: 1590 kvfree(next_key); 1591 free_key: 1592 kvfree(key); 1593 err_put: 1594 fdput(f); 1595 return err; 1596 } 1597 1598 int generic_map_delete_batch(struct bpf_map *map, 1599 const union bpf_attr *attr, 1600 union bpf_attr __user *uattr) 1601 { 1602 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1603 u32 cp, max_count; 1604 int err = 0; 1605 void *key; 1606 1607 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1608 return -EINVAL; 1609 1610 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1611 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1612 return -EINVAL; 1613 } 1614 1615 max_count = attr->batch.count; 1616 if (!max_count) 1617 return 0; 1618 1619 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1620 if (!key) 1621 return -ENOMEM; 1622 1623 for (cp = 0; cp < max_count; cp++) { 1624 err = -EFAULT; 1625 if (copy_from_user(key, keys + cp * map->key_size, 1626 map->key_size)) 1627 break; 1628 1629 if (bpf_map_is_offloaded(map)) { 1630 err = bpf_map_offload_delete_elem(map, key); 1631 break; 1632 } 1633 1634 bpf_disable_instrumentation(); 1635 rcu_read_lock(); 1636 err = map->ops->map_delete_elem(map, key); 1637 rcu_read_unlock(); 1638 bpf_enable_instrumentation(); 1639 if (err) 1640 break; 1641 cond_resched(); 1642 } 1643 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1644 err = -EFAULT; 1645 1646 kvfree(key); 1647 1648 maybe_wait_bpf_programs(map); 1649 return err; 1650 } 1651 1652 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 1653 const union bpf_attr *attr, 1654 union bpf_attr __user *uattr) 1655 { 1656 void __user *values = u64_to_user_ptr(attr->batch.values); 1657 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1658 u32 value_size, cp, max_count; 1659 void *key, *value; 1660 int err = 0; 1661 1662 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1663 return -EINVAL; 1664 1665 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1666 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1667 return -EINVAL; 1668 } 1669 1670 value_size = bpf_map_value_size(map); 1671 1672 max_count = attr->batch.count; 1673 if (!max_count) 1674 return 0; 1675 1676 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1677 if (!key) 1678 return -ENOMEM; 1679 1680 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1681 if (!value) { 1682 kvfree(key); 1683 return -ENOMEM; 1684 } 1685 1686 for (cp = 0; cp < max_count; cp++) { 1687 err = -EFAULT; 1688 if (copy_from_user(key, keys + cp * map->key_size, 1689 map->key_size) || 1690 copy_from_user(value, values + cp * value_size, value_size)) 1691 break; 1692 1693 err = bpf_map_update_value(map, map_file, key, value, 1694 attr->batch.elem_flags); 1695 1696 if (err) 1697 break; 1698 cond_resched(); 1699 } 1700 1701 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1702 err = -EFAULT; 1703 1704 kvfree(value); 1705 kvfree(key); 1706 return err; 1707 } 1708 1709 #define MAP_LOOKUP_RETRIES 3 1710 1711 int generic_map_lookup_batch(struct bpf_map *map, 1712 const union bpf_attr *attr, 1713 union bpf_attr __user *uattr) 1714 { 1715 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 1716 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1717 void __user *values = u64_to_user_ptr(attr->batch.values); 1718 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1719 void *buf, *buf_prevkey, *prev_key, *key, *value; 1720 int err, retry = MAP_LOOKUP_RETRIES; 1721 u32 value_size, cp, max_count; 1722 1723 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1724 return -EINVAL; 1725 1726 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1727 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 1728 return -EINVAL; 1729 1730 value_size = bpf_map_value_size(map); 1731 1732 max_count = attr->batch.count; 1733 if (!max_count) 1734 return 0; 1735 1736 if (put_user(0, &uattr->batch.count)) 1737 return -EFAULT; 1738 1739 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1740 if (!buf_prevkey) 1741 return -ENOMEM; 1742 1743 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 1744 if (!buf) { 1745 kvfree(buf_prevkey); 1746 return -ENOMEM; 1747 } 1748 1749 err = -EFAULT; 1750 prev_key = NULL; 1751 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 1752 goto free_buf; 1753 key = buf; 1754 value = key + map->key_size; 1755 if (ubatch) 1756 prev_key = buf_prevkey; 1757 1758 for (cp = 0; cp < max_count;) { 1759 rcu_read_lock(); 1760 err = map->ops->map_get_next_key(map, prev_key, key); 1761 rcu_read_unlock(); 1762 if (err) 1763 break; 1764 err = bpf_map_copy_value(map, key, value, 1765 attr->batch.elem_flags); 1766 1767 if (err == -ENOENT) { 1768 if (retry) { 1769 retry--; 1770 continue; 1771 } 1772 err = -EINTR; 1773 break; 1774 } 1775 1776 if (err) 1777 goto free_buf; 1778 1779 if (copy_to_user(keys + cp * map->key_size, key, 1780 map->key_size)) { 1781 err = -EFAULT; 1782 goto free_buf; 1783 } 1784 if (copy_to_user(values + cp * value_size, value, value_size)) { 1785 err = -EFAULT; 1786 goto free_buf; 1787 } 1788 1789 if (!prev_key) 1790 prev_key = buf_prevkey; 1791 1792 swap(prev_key, key); 1793 retry = MAP_LOOKUP_RETRIES; 1794 cp++; 1795 cond_resched(); 1796 } 1797 1798 if (err == -EFAULT) 1799 goto free_buf; 1800 1801 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 1802 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 1803 err = -EFAULT; 1804 1805 free_buf: 1806 kvfree(buf_prevkey); 1807 kvfree(buf); 1808 return err; 1809 } 1810 1811 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 1812 1813 static int map_lookup_and_delete_elem(union bpf_attr *attr) 1814 { 1815 void __user *ukey = u64_to_user_ptr(attr->key); 1816 void __user *uvalue = u64_to_user_ptr(attr->value); 1817 int ufd = attr->map_fd; 1818 struct bpf_map *map; 1819 void *key, *value; 1820 u32 value_size; 1821 struct fd f; 1822 int err; 1823 1824 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 1825 return -EINVAL; 1826 1827 if (attr->flags & ~BPF_F_LOCK) 1828 return -EINVAL; 1829 1830 f = fdget(ufd); 1831 map = __bpf_map_get(f); 1832 if (IS_ERR(map)) 1833 return PTR_ERR(map); 1834 bpf_map_write_active_inc(map); 1835 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 1836 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1837 err = -EPERM; 1838 goto err_put; 1839 } 1840 1841 if (attr->flags && 1842 (map->map_type == BPF_MAP_TYPE_QUEUE || 1843 map->map_type == BPF_MAP_TYPE_STACK)) { 1844 err = -EINVAL; 1845 goto err_put; 1846 } 1847 1848 if ((attr->flags & BPF_F_LOCK) && 1849 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1850 err = -EINVAL; 1851 goto err_put; 1852 } 1853 1854 key = __bpf_copy_key(ukey, map->key_size); 1855 if (IS_ERR(key)) { 1856 err = PTR_ERR(key); 1857 goto err_put; 1858 } 1859 1860 value_size = bpf_map_value_size(map); 1861 1862 err = -ENOMEM; 1863 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1864 if (!value) 1865 goto free_key; 1866 1867 err = -ENOTSUPP; 1868 if (map->map_type == BPF_MAP_TYPE_QUEUE || 1869 map->map_type == BPF_MAP_TYPE_STACK) { 1870 err = map->ops->map_pop_elem(map, value); 1871 } else if (map->map_type == BPF_MAP_TYPE_HASH || 1872 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1873 map->map_type == BPF_MAP_TYPE_LRU_HASH || 1874 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 1875 if (!bpf_map_is_offloaded(map)) { 1876 bpf_disable_instrumentation(); 1877 rcu_read_lock(); 1878 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 1879 rcu_read_unlock(); 1880 bpf_enable_instrumentation(); 1881 } 1882 } 1883 1884 if (err) 1885 goto free_value; 1886 1887 if (copy_to_user(uvalue, value, value_size) != 0) { 1888 err = -EFAULT; 1889 goto free_value; 1890 } 1891 1892 err = 0; 1893 1894 free_value: 1895 kvfree(value); 1896 free_key: 1897 kvfree(key); 1898 err_put: 1899 bpf_map_write_active_dec(map); 1900 fdput(f); 1901 return err; 1902 } 1903 1904 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 1905 1906 static int map_freeze(const union bpf_attr *attr) 1907 { 1908 int err = 0, ufd = attr->map_fd; 1909 struct bpf_map *map; 1910 struct fd f; 1911 1912 if (CHECK_ATTR(BPF_MAP_FREEZE)) 1913 return -EINVAL; 1914 1915 f = fdget(ufd); 1916 map = __bpf_map_get(f); 1917 if (IS_ERR(map)) 1918 return PTR_ERR(map); 1919 1920 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) { 1921 fdput(f); 1922 return -ENOTSUPP; 1923 } 1924 1925 mutex_lock(&map->freeze_mutex); 1926 if (bpf_map_write_active(map)) { 1927 err = -EBUSY; 1928 goto err_put; 1929 } 1930 if (READ_ONCE(map->frozen)) { 1931 err = -EBUSY; 1932 goto err_put; 1933 } 1934 if (!bpf_capable()) { 1935 err = -EPERM; 1936 goto err_put; 1937 } 1938 1939 WRITE_ONCE(map->frozen, true); 1940 err_put: 1941 mutex_unlock(&map->freeze_mutex); 1942 fdput(f); 1943 return err; 1944 } 1945 1946 static const struct bpf_prog_ops * const bpf_prog_types[] = { 1947 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 1948 [_id] = & _name ## _prog_ops, 1949 #define BPF_MAP_TYPE(_id, _ops) 1950 #define BPF_LINK_TYPE(_id, _name) 1951 #include <linux/bpf_types.h> 1952 #undef BPF_PROG_TYPE 1953 #undef BPF_MAP_TYPE 1954 #undef BPF_LINK_TYPE 1955 }; 1956 1957 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 1958 { 1959 const struct bpf_prog_ops *ops; 1960 1961 if (type >= ARRAY_SIZE(bpf_prog_types)) 1962 return -EINVAL; 1963 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 1964 ops = bpf_prog_types[type]; 1965 if (!ops) 1966 return -EINVAL; 1967 1968 if (!bpf_prog_is_offloaded(prog->aux)) 1969 prog->aux->ops = ops; 1970 else 1971 prog->aux->ops = &bpf_offload_prog_ops; 1972 prog->type = type; 1973 return 0; 1974 } 1975 1976 enum bpf_audit { 1977 BPF_AUDIT_LOAD, 1978 BPF_AUDIT_UNLOAD, 1979 BPF_AUDIT_MAX, 1980 }; 1981 1982 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 1983 [BPF_AUDIT_LOAD] = "LOAD", 1984 [BPF_AUDIT_UNLOAD] = "UNLOAD", 1985 }; 1986 1987 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 1988 { 1989 struct audit_context *ctx = NULL; 1990 struct audit_buffer *ab; 1991 1992 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 1993 return; 1994 if (audit_enabled == AUDIT_OFF) 1995 return; 1996 if (!in_irq() && !irqs_disabled()) 1997 ctx = audit_context(); 1998 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 1999 if (unlikely(!ab)) 2000 return; 2001 audit_log_format(ab, "prog-id=%u op=%s", 2002 prog->aux->id, bpf_audit_str[op]); 2003 audit_log_end(ab); 2004 } 2005 2006 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2007 { 2008 int id; 2009 2010 idr_preload(GFP_KERNEL); 2011 spin_lock_bh(&prog_idr_lock); 2012 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2013 if (id > 0) 2014 prog->aux->id = id; 2015 spin_unlock_bh(&prog_idr_lock); 2016 idr_preload_end(); 2017 2018 /* id is in [1, INT_MAX) */ 2019 if (WARN_ON_ONCE(!id)) 2020 return -ENOSPC; 2021 2022 return id > 0 ? 0 : id; 2023 } 2024 2025 void bpf_prog_free_id(struct bpf_prog *prog) 2026 { 2027 unsigned long flags; 2028 2029 /* cBPF to eBPF migrations are currently not in the idr store. 2030 * Offloaded programs are removed from the store when their device 2031 * disappears - even if someone grabs an fd to them they are unusable, 2032 * simply waiting for refcnt to drop to be freed. 2033 */ 2034 if (!prog->aux->id) 2035 return; 2036 2037 spin_lock_irqsave(&prog_idr_lock, flags); 2038 idr_remove(&prog_idr, prog->aux->id); 2039 prog->aux->id = 0; 2040 spin_unlock_irqrestore(&prog_idr_lock, flags); 2041 } 2042 2043 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2044 { 2045 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2046 2047 kvfree(aux->func_info); 2048 kfree(aux->func_info_aux); 2049 free_uid(aux->user); 2050 security_bpf_prog_free(aux); 2051 bpf_prog_free(aux->prog); 2052 } 2053 2054 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2055 { 2056 bpf_prog_kallsyms_del_all(prog); 2057 btf_put(prog->aux->btf); 2058 kvfree(prog->aux->jited_linfo); 2059 kvfree(prog->aux->linfo); 2060 kfree(prog->aux->kfunc_tab); 2061 if (prog->aux->attach_btf) 2062 btf_put(prog->aux->attach_btf); 2063 2064 if (deferred) { 2065 if (prog->aux->sleepable) 2066 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2067 else 2068 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2069 } else { 2070 __bpf_prog_put_rcu(&prog->aux->rcu); 2071 } 2072 } 2073 2074 static void bpf_prog_put_deferred(struct work_struct *work) 2075 { 2076 struct bpf_prog_aux *aux; 2077 struct bpf_prog *prog; 2078 2079 aux = container_of(work, struct bpf_prog_aux, work); 2080 prog = aux->prog; 2081 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2082 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2083 bpf_prog_free_id(prog); 2084 __bpf_prog_put_noref(prog, true); 2085 } 2086 2087 static void __bpf_prog_put(struct bpf_prog *prog) 2088 { 2089 struct bpf_prog_aux *aux = prog->aux; 2090 2091 if (atomic64_dec_and_test(&aux->refcnt)) { 2092 if (in_irq() || irqs_disabled()) { 2093 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2094 schedule_work(&aux->work); 2095 } else { 2096 bpf_prog_put_deferred(&aux->work); 2097 } 2098 } 2099 } 2100 2101 void bpf_prog_put(struct bpf_prog *prog) 2102 { 2103 __bpf_prog_put(prog); 2104 } 2105 EXPORT_SYMBOL_GPL(bpf_prog_put); 2106 2107 static int bpf_prog_release(struct inode *inode, struct file *filp) 2108 { 2109 struct bpf_prog *prog = filp->private_data; 2110 2111 bpf_prog_put(prog); 2112 return 0; 2113 } 2114 2115 struct bpf_prog_kstats { 2116 u64 nsecs; 2117 u64 cnt; 2118 u64 misses; 2119 }; 2120 2121 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2122 { 2123 struct bpf_prog_stats *stats; 2124 unsigned int flags; 2125 2126 stats = this_cpu_ptr(prog->stats); 2127 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2128 u64_stats_inc(&stats->misses); 2129 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2130 } 2131 2132 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2133 struct bpf_prog_kstats *stats) 2134 { 2135 u64 nsecs = 0, cnt = 0, misses = 0; 2136 int cpu; 2137 2138 for_each_possible_cpu(cpu) { 2139 const struct bpf_prog_stats *st; 2140 unsigned int start; 2141 u64 tnsecs, tcnt, tmisses; 2142 2143 st = per_cpu_ptr(prog->stats, cpu); 2144 do { 2145 start = u64_stats_fetch_begin(&st->syncp); 2146 tnsecs = u64_stats_read(&st->nsecs); 2147 tcnt = u64_stats_read(&st->cnt); 2148 tmisses = u64_stats_read(&st->misses); 2149 } while (u64_stats_fetch_retry(&st->syncp, start)); 2150 nsecs += tnsecs; 2151 cnt += tcnt; 2152 misses += tmisses; 2153 } 2154 stats->nsecs = nsecs; 2155 stats->cnt = cnt; 2156 stats->misses = misses; 2157 } 2158 2159 #ifdef CONFIG_PROC_FS 2160 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2161 { 2162 const struct bpf_prog *prog = filp->private_data; 2163 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2164 struct bpf_prog_kstats stats; 2165 2166 bpf_prog_get_stats(prog, &stats); 2167 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2168 seq_printf(m, 2169 "prog_type:\t%u\n" 2170 "prog_jited:\t%u\n" 2171 "prog_tag:\t%s\n" 2172 "memlock:\t%llu\n" 2173 "prog_id:\t%u\n" 2174 "run_time_ns:\t%llu\n" 2175 "run_cnt:\t%llu\n" 2176 "recursion_misses:\t%llu\n" 2177 "verified_insns:\t%u\n", 2178 prog->type, 2179 prog->jited, 2180 prog_tag, 2181 prog->pages * 1ULL << PAGE_SHIFT, 2182 prog->aux->id, 2183 stats.nsecs, 2184 stats.cnt, 2185 stats.misses, 2186 prog->aux->verified_insns); 2187 } 2188 #endif 2189 2190 const struct file_operations bpf_prog_fops = { 2191 #ifdef CONFIG_PROC_FS 2192 .show_fdinfo = bpf_prog_show_fdinfo, 2193 #endif 2194 .release = bpf_prog_release, 2195 .read = bpf_dummy_read, 2196 .write = bpf_dummy_write, 2197 }; 2198 2199 int bpf_prog_new_fd(struct bpf_prog *prog) 2200 { 2201 int ret; 2202 2203 ret = security_bpf_prog(prog); 2204 if (ret < 0) 2205 return ret; 2206 2207 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2208 O_RDWR | O_CLOEXEC); 2209 } 2210 2211 static struct bpf_prog *____bpf_prog_get(struct fd f) 2212 { 2213 if (!f.file) 2214 return ERR_PTR(-EBADF); 2215 if (f.file->f_op != &bpf_prog_fops) { 2216 fdput(f); 2217 return ERR_PTR(-EINVAL); 2218 } 2219 2220 return f.file->private_data; 2221 } 2222 2223 void bpf_prog_add(struct bpf_prog *prog, int i) 2224 { 2225 atomic64_add(i, &prog->aux->refcnt); 2226 } 2227 EXPORT_SYMBOL_GPL(bpf_prog_add); 2228 2229 void bpf_prog_sub(struct bpf_prog *prog, int i) 2230 { 2231 /* Only to be used for undoing previous bpf_prog_add() in some 2232 * error path. We still know that another entity in our call 2233 * path holds a reference to the program, thus atomic_sub() can 2234 * be safely used in such cases! 2235 */ 2236 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2237 } 2238 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2239 2240 void bpf_prog_inc(struct bpf_prog *prog) 2241 { 2242 atomic64_inc(&prog->aux->refcnt); 2243 } 2244 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2245 2246 /* prog_idr_lock should have been held */ 2247 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2248 { 2249 int refold; 2250 2251 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2252 2253 if (!refold) 2254 return ERR_PTR(-ENOENT); 2255 2256 return prog; 2257 } 2258 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2259 2260 bool bpf_prog_get_ok(struct bpf_prog *prog, 2261 enum bpf_prog_type *attach_type, bool attach_drv) 2262 { 2263 /* not an attachment, just a refcount inc, always allow */ 2264 if (!attach_type) 2265 return true; 2266 2267 if (prog->type != *attach_type) 2268 return false; 2269 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2270 return false; 2271 2272 return true; 2273 } 2274 2275 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2276 bool attach_drv) 2277 { 2278 struct fd f = fdget(ufd); 2279 struct bpf_prog *prog; 2280 2281 prog = ____bpf_prog_get(f); 2282 if (IS_ERR(prog)) 2283 return prog; 2284 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 2285 prog = ERR_PTR(-EINVAL); 2286 goto out; 2287 } 2288 2289 bpf_prog_inc(prog); 2290 out: 2291 fdput(f); 2292 return prog; 2293 } 2294 2295 struct bpf_prog *bpf_prog_get(u32 ufd) 2296 { 2297 return __bpf_prog_get(ufd, NULL, false); 2298 } 2299 2300 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2301 bool attach_drv) 2302 { 2303 return __bpf_prog_get(ufd, &type, attach_drv); 2304 } 2305 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2306 2307 /* Initially all BPF programs could be loaded w/o specifying 2308 * expected_attach_type. Later for some of them specifying expected_attach_type 2309 * at load time became required so that program could be validated properly. 2310 * Programs of types that are allowed to be loaded both w/ and w/o (for 2311 * backward compatibility) expected_attach_type, should have the default attach 2312 * type assigned to expected_attach_type for the latter case, so that it can be 2313 * validated later at attach time. 2314 * 2315 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2316 * prog type requires it but has some attach types that have to be backward 2317 * compatible. 2318 */ 2319 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2320 { 2321 switch (attr->prog_type) { 2322 case BPF_PROG_TYPE_CGROUP_SOCK: 2323 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2324 * exist so checking for non-zero is the way to go here. 2325 */ 2326 if (!attr->expected_attach_type) 2327 attr->expected_attach_type = 2328 BPF_CGROUP_INET_SOCK_CREATE; 2329 break; 2330 case BPF_PROG_TYPE_SK_REUSEPORT: 2331 if (!attr->expected_attach_type) 2332 attr->expected_attach_type = 2333 BPF_SK_REUSEPORT_SELECT; 2334 break; 2335 } 2336 } 2337 2338 static int 2339 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2340 enum bpf_attach_type expected_attach_type, 2341 struct btf *attach_btf, u32 btf_id, 2342 struct bpf_prog *dst_prog) 2343 { 2344 if (btf_id) { 2345 if (btf_id > BTF_MAX_TYPE) 2346 return -EINVAL; 2347 2348 if (!attach_btf && !dst_prog) 2349 return -EINVAL; 2350 2351 switch (prog_type) { 2352 case BPF_PROG_TYPE_TRACING: 2353 case BPF_PROG_TYPE_LSM: 2354 case BPF_PROG_TYPE_STRUCT_OPS: 2355 case BPF_PROG_TYPE_EXT: 2356 break; 2357 default: 2358 return -EINVAL; 2359 } 2360 } 2361 2362 if (attach_btf && (!btf_id || dst_prog)) 2363 return -EINVAL; 2364 2365 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2366 prog_type != BPF_PROG_TYPE_EXT) 2367 return -EINVAL; 2368 2369 switch (prog_type) { 2370 case BPF_PROG_TYPE_CGROUP_SOCK: 2371 switch (expected_attach_type) { 2372 case BPF_CGROUP_INET_SOCK_CREATE: 2373 case BPF_CGROUP_INET_SOCK_RELEASE: 2374 case BPF_CGROUP_INET4_POST_BIND: 2375 case BPF_CGROUP_INET6_POST_BIND: 2376 return 0; 2377 default: 2378 return -EINVAL; 2379 } 2380 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2381 switch (expected_attach_type) { 2382 case BPF_CGROUP_INET4_BIND: 2383 case BPF_CGROUP_INET6_BIND: 2384 case BPF_CGROUP_INET4_CONNECT: 2385 case BPF_CGROUP_INET6_CONNECT: 2386 case BPF_CGROUP_INET4_GETPEERNAME: 2387 case BPF_CGROUP_INET6_GETPEERNAME: 2388 case BPF_CGROUP_INET4_GETSOCKNAME: 2389 case BPF_CGROUP_INET6_GETSOCKNAME: 2390 case BPF_CGROUP_UDP4_SENDMSG: 2391 case BPF_CGROUP_UDP6_SENDMSG: 2392 case BPF_CGROUP_UDP4_RECVMSG: 2393 case BPF_CGROUP_UDP6_RECVMSG: 2394 return 0; 2395 default: 2396 return -EINVAL; 2397 } 2398 case BPF_PROG_TYPE_CGROUP_SKB: 2399 switch (expected_attach_type) { 2400 case BPF_CGROUP_INET_INGRESS: 2401 case BPF_CGROUP_INET_EGRESS: 2402 return 0; 2403 default: 2404 return -EINVAL; 2405 } 2406 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2407 switch (expected_attach_type) { 2408 case BPF_CGROUP_SETSOCKOPT: 2409 case BPF_CGROUP_GETSOCKOPT: 2410 return 0; 2411 default: 2412 return -EINVAL; 2413 } 2414 case BPF_PROG_TYPE_SK_LOOKUP: 2415 if (expected_attach_type == BPF_SK_LOOKUP) 2416 return 0; 2417 return -EINVAL; 2418 case BPF_PROG_TYPE_SK_REUSEPORT: 2419 switch (expected_attach_type) { 2420 case BPF_SK_REUSEPORT_SELECT: 2421 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2422 return 0; 2423 default: 2424 return -EINVAL; 2425 } 2426 case BPF_PROG_TYPE_SYSCALL: 2427 case BPF_PROG_TYPE_EXT: 2428 if (expected_attach_type) 2429 return -EINVAL; 2430 fallthrough; 2431 default: 2432 return 0; 2433 } 2434 } 2435 2436 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2437 { 2438 switch (prog_type) { 2439 case BPF_PROG_TYPE_SCHED_CLS: 2440 case BPF_PROG_TYPE_SCHED_ACT: 2441 case BPF_PROG_TYPE_XDP: 2442 case BPF_PROG_TYPE_LWT_IN: 2443 case BPF_PROG_TYPE_LWT_OUT: 2444 case BPF_PROG_TYPE_LWT_XMIT: 2445 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2446 case BPF_PROG_TYPE_SK_SKB: 2447 case BPF_PROG_TYPE_SK_MSG: 2448 case BPF_PROG_TYPE_LIRC_MODE2: 2449 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2450 case BPF_PROG_TYPE_CGROUP_DEVICE: 2451 case BPF_PROG_TYPE_CGROUP_SOCK: 2452 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2453 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2454 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2455 case BPF_PROG_TYPE_SOCK_OPS: 2456 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2457 return true; 2458 case BPF_PROG_TYPE_CGROUP_SKB: 2459 /* always unpriv */ 2460 case BPF_PROG_TYPE_SK_REUSEPORT: 2461 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2462 default: 2463 return false; 2464 } 2465 } 2466 2467 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2468 { 2469 switch (prog_type) { 2470 case BPF_PROG_TYPE_KPROBE: 2471 case BPF_PROG_TYPE_TRACEPOINT: 2472 case BPF_PROG_TYPE_PERF_EVENT: 2473 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2474 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2475 case BPF_PROG_TYPE_TRACING: 2476 case BPF_PROG_TYPE_LSM: 2477 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2478 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2479 return true; 2480 default: 2481 return false; 2482 } 2483 } 2484 2485 /* last field in 'union bpf_attr' used by this command */ 2486 #define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size 2487 2488 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) 2489 { 2490 enum bpf_prog_type type = attr->prog_type; 2491 struct bpf_prog *prog, *dst_prog = NULL; 2492 struct btf *attach_btf = NULL; 2493 int err; 2494 char license[128]; 2495 bool is_gpl; 2496 2497 if (CHECK_ATTR(BPF_PROG_LOAD)) 2498 return -EINVAL; 2499 2500 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2501 BPF_F_ANY_ALIGNMENT | 2502 BPF_F_TEST_STATE_FREQ | 2503 BPF_F_SLEEPABLE | 2504 BPF_F_TEST_RND_HI32 | 2505 BPF_F_XDP_HAS_FRAGS | 2506 BPF_F_XDP_DEV_BOUND_ONLY)) 2507 return -EINVAL; 2508 2509 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2510 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2511 !bpf_capable()) 2512 return -EPERM; 2513 2514 /* copy eBPF program license from user space */ 2515 if (strncpy_from_bpfptr(license, 2516 make_bpfptr(attr->license, uattr.is_kernel), 2517 sizeof(license) - 1) < 0) 2518 return -EFAULT; 2519 license[sizeof(license) - 1] = 0; 2520 2521 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2522 is_gpl = license_is_gpl_compatible(license); 2523 2524 if (attr->insn_cnt == 0 || 2525 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) 2526 return -E2BIG; 2527 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2528 type != BPF_PROG_TYPE_CGROUP_SKB && 2529 !bpf_capable()) 2530 return -EPERM; 2531 2532 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) 2533 return -EPERM; 2534 if (is_perfmon_prog_type(type) && !perfmon_capable()) 2535 return -EPERM; 2536 2537 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2538 * or btf, we need to check which one it is 2539 */ 2540 if (attr->attach_prog_fd) { 2541 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2542 if (IS_ERR(dst_prog)) { 2543 dst_prog = NULL; 2544 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2545 if (IS_ERR(attach_btf)) 2546 return -EINVAL; 2547 if (!btf_is_kernel(attach_btf)) { 2548 /* attaching through specifying bpf_prog's BTF 2549 * objects directly might be supported eventually 2550 */ 2551 btf_put(attach_btf); 2552 return -ENOTSUPP; 2553 } 2554 } 2555 } else if (attr->attach_btf_id) { 2556 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2557 attach_btf = bpf_get_btf_vmlinux(); 2558 if (IS_ERR(attach_btf)) 2559 return PTR_ERR(attach_btf); 2560 if (!attach_btf) 2561 return -EINVAL; 2562 btf_get(attach_btf); 2563 } 2564 2565 bpf_prog_load_fixup_attach_type(attr); 2566 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2567 attach_btf, attr->attach_btf_id, 2568 dst_prog)) { 2569 if (dst_prog) 2570 bpf_prog_put(dst_prog); 2571 if (attach_btf) 2572 btf_put(attach_btf); 2573 return -EINVAL; 2574 } 2575 2576 /* plain bpf_prog allocation */ 2577 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2578 if (!prog) { 2579 if (dst_prog) 2580 bpf_prog_put(dst_prog); 2581 if (attach_btf) 2582 btf_put(attach_btf); 2583 return -ENOMEM; 2584 } 2585 2586 prog->expected_attach_type = attr->expected_attach_type; 2587 prog->aux->attach_btf = attach_btf; 2588 prog->aux->attach_btf_id = attr->attach_btf_id; 2589 prog->aux->dst_prog = dst_prog; 2590 prog->aux->dev_bound = !!attr->prog_ifindex; 2591 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2592 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2593 2594 err = security_bpf_prog_alloc(prog->aux); 2595 if (err) 2596 goto free_prog; 2597 2598 prog->aux->user = get_current_user(); 2599 prog->len = attr->insn_cnt; 2600 2601 err = -EFAULT; 2602 if (copy_from_bpfptr(prog->insns, 2603 make_bpfptr(attr->insns, uattr.is_kernel), 2604 bpf_prog_insn_size(prog)) != 0) 2605 goto free_prog_sec; 2606 2607 prog->orig_prog = NULL; 2608 prog->jited = 0; 2609 2610 atomic64_set(&prog->aux->refcnt, 1); 2611 prog->gpl_compatible = is_gpl ? 1 : 0; 2612 2613 if (bpf_prog_is_dev_bound(prog->aux)) { 2614 err = bpf_prog_dev_bound_init(prog, attr); 2615 if (err) 2616 goto free_prog_sec; 2617 } 2618 2619 if (type == BPF_PROG_TYPE_EXT && dst_prog && 2620 bpf_prog_is_dev_bound(dst_prog->aux)) { 2621 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 2622 if (err) 2623 goto free_prog_sec; 2624 } 2625 2626 /* find program type: socket_filter vs tracing_filter */ 2627 err = find_prog_type(type, prog); 2628 if (err < 0) 2629 goto free_prog_sec; 2630 2631 prog->aux->load_time = ktime_get_boottime_ns(); 2632 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 2633 sizeof(attr->prog_name)); 2634 if (err < 0) 2635 goto free_prog_sec; 2636 2637 /* run eBPF verifier */ 2638 err = bpf_check(&prog, attr, uattr); 2639 if (err < 0) 2640 goto free_used_maps; 2641 2642 prog = bpf_prog_select_runtime(prog, &err); 2643 if (err < 0) 2644 goto free_used_maps; 2645 2646 err = bpf_prog_alloc_id(prog); 2647 if (err) 2648 goto free_used_maps; 2649 2650 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 2651 * effectively publicly exposed. However, retrieving via 2652 * bpf_prog_get_fd_by_id() will take another reference, 2653 * therefore it cannot be gone underneath us. 2654 * 2655 * Only for the time /after/ successful bpf_prog_new_fd() 2656 * and before returning to userspace, we might just hold 2657 * one reference and any parallel close on that fd could 2658 * rip everything out. Hence, below notifications must 2659 * happen before bpf_prog_new_fd(). 2660 * 2661 * Also, any failure handling from this point onwards must 2662 * be using bpf_prog_put() given the program is exposed. 2663 */ 2664 bpf_prog_kallsyms_add(prog); 2665 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 2666 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 2667 2668 err = bpf_prog_new_fd(prog); 2669 if (err < 0) 2670 bpf_prog_put(prog); 2671 return err; 2672 2673 free_used_maps: 2674 /* In case we have subprogs, we need to wait for a grace 2675 * period before we can tear down JIT memory since symbols 2676 * are already exposed under kallsyms. 2677 */ 2678 __bpf_prog_put_noref(prog, prog->aux->func_cnt); 2679 return err; 2680 free_prog_sec: 2681 free_uid(prog->aux->user); 2682 security_bpf_prog_free(prog->aux); 2683 free_prog: 2684 if (prog->aux->attach_btf) 2685 btf_put(prog->aux->attach_btf); 2686 bpf_prog_free(prog); 2687 return err; 2688 } 2689 2690 #define BPF_OBJ_LAST_FIELD file_flags 2691 2692 static int bpf_obj_pin(const union bpf_attr *attr) 2693 { 2694 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 2695 return -EINVAL; 2696 2697 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2698 } 2699 2700 static int bpf_obj_get(const union bpf_attr *attr) 2701 { 2702 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 2703 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 2704 return -EINVAL; 2705 2706 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 2707 attr->file_flags); 2708 } 2709 2710 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 2711 const struct bpf_link_ops *ops, struct bpf_prog *prog) 2712 { 2713 atomic64_set(&link->refcnt, 1); 2714 link->type = type; 2715 link->id = 0; 2716 link->ops = ops; 2717 link->prog = prog; 2718 } 2719 2720 static void bpf_link_free_id(int id) 2721 { 2722 if (!id) 2723 return; 2724 2725 spin_lock_bh(&link_idr_lock); 2726 idr_remove(&link_idr, id); 2727 spin_unlock_bh(&link_idr_lock); 2728 } 2729 2730 /* Clean up bpf_link and corresponding anon_inode file and FD. After 2731 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2732 * anon_inode's release() call. This helper marksbpf_link as 2733 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2734 * is not decremented, it's the responsibility of a calling code that failed 2735 * to complete bpf_link initialization. 2736 */ 2737 void bpf_link_cleanup(struct bpf_link_primer *primer) 2738 { 2739 primer->link->prog = NULL; 2740 bpf_link_free_id(primer->id); 2741 fput(primer->file); 2742 put_unused_fd(primer->fd); 2743 } 2744 2745 void bpf_link_inc(struct bpf_link *link) 2746 { 2747 atomic64_inc(&link->refcnt); 2748 } 2749 2750 /* bpf_link_free is guaranteed to be called from process context */ 2751 static void bpf_link_free(struct bpf_link *link) 2752 { 2753 bpf_link_free_id(link->id); 2754 if (link->prog) { 2755 /* detach BPF program, clean up used resources */ 2756 link->ops->release(link); 2757 bpf_prog_put(link->prog); 2758 } 2759 /* free bpf_link and its containing memory */ 2760 link->ops->dealloc(link); 2761 } 2762 2763 static void bpf_link_put_deferred(struct work_struct *work) 2764 { 2765 struct bpf_link *link = container_of(work, struct bpf_link, work); 2766 2767 bpf_link_free(link); 2768 } 2769 2770 /* bpf_link_put can be called from atomic context, but ensures that resources 2771 * are freed from process context 2772 */ 2773 void bpf_link_put(struct bpf_link *link) 2774 { 2775 if (!atomic64_dec_and_test(&link->refcnt)) 2776 return; 2777 2778 if (in_atomic()) { 2779 INIT_WORK(&link->work, bpf_link_put_deferred); 2780 schedule_work(&link->work); 2781 } else { 2782 bpf_link_free(link); 2783 } 2784 } 2785 EXPORT_SYMBOL(bpf_link_put); 2786 2787 static int bpf_link_release(struct inode *inode, struct file *filp) 2788 { 2789 struct bpf_link *link = filp->private_data; 2790 2791 bpf_link_put(link); 2792 return 0; 2793 } 2794 2795 #ifdef CONFIG_PROC_FS 2796 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 2797 #define BPF_MAP_TYPE(_id, _ops) 2798 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 2799 static const char *bpf_link_type_strs[] = { 2800 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 2801 #include <linux/bpf_types.h> 2802 }; 2803 #undef BPF_PROG_TYPE 2804 #undef BPF_MAP_TYPE 2805 #undef BPF_LINK_TYPE 2806 2807 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 2808 { 2809 const struct bpf_link *link = filp->private_data; 2810 const struct bpf_prog *prog = link->prog; 2811 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2812 2813 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2814 seq_printf(m, 2815 "link_type:\t%s\n" 2816 "link_id:\t%u\n" 2817 "prog_tag:\t%s\n" 2818 "prog_id:\t%u\n", 2819 bpf_link_type_strs[link->type], 2820 link->id, 2821 prog_tag, 2822 prog->aux->id); 2823 if (link->ops->show_fdinfo) 2824 link->ops->show_fdinfo(link, m); 2825 } 2826 #endif 2827 2828 static const struct file_operations bpf_link_fops = { 2829 #ifdef CONFIG_PROC_FS 2830 .show_fdinfo = bpf_link_show_fdinfo, 2831 #endif 2832 .release = bpf_link_release, 2833 .read = bpf_dummy_read, 2834 .write = bpf_dummy_write, 2835 }; 2836 2837 static int bpf_link_alloc_id(struct bpf_link *link) 2838 { 2839 int id; 2840 2841 idr_preload(GFP_KERNEL); 2842 spin_lock_bh(&link_idr_lock); 2843 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 2844 spin_unlock_bh(&link_idr_lock); 2845 idr_preload_end(); 2846 2847 return id; 2848 } 2849 2850 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 2851 * reserving unused FD and allocating ID from link_idr. This is to be paired 2852 * with bpf_link_settle() to install FD and ID and expose bpf_link to 2853 * user-space, if bpf_link is successfully attached. If not, bpf_link and 2854 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 2855 * transient state is passed around in struct bpf_link_primer. 2856 * This is preferred way to create and initialize bpf_link, especially when 2857 * there are complicated and expensive operations in between creating bpf_link 2858 * itself and attaching it to BPF hook. By using bpf_link_prime() and 2859 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 2860 * expensive (and potentially failing) roll back operations in a rare case 2861 * that file, FD, or ID can't be allocated. 2862 */ 2863 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 2864 { 2865 struct file *file; 2866 int fd, id; 2867 2868 fd = get_unused_fd_flags(O_CLOEXEC); 2869 if (fd < 0) 2870 return fd; 2871 2872 2873 id = bpf_link_alloc_id(link); 2874 if (id < 0) { 2875 put_unused_fd(fd); 2876 return id; 2877 } 2878 2879 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); 2880 if (IS_ERR(file)) { 2881 bpf_link_free_id(id); 2882 put_unused_fd(fd); 2883 return PTR_ERR(file); 2884 } 2885 2886 primer->link = link; 2887 primer->file = file; 2888 primer->fd = fd; 2889 primer->id = id; 2890 return 0; 2891 } 2892 2893 int bpf_link_settle(struct bpf_link_primer *primer) 2894 { 2895 /* make bpf_link fetchable by ID */ 2896 spin_lock_bh(&link_idr_lock); 2897 primer->link->id = primer->id; 2898 spin_unlock_bh(&link_idr_lock); 2899 /* make bpf_link fetchable by FD */ 2900 fd_install(primer->fd, primer->file); 2901 /* pass through installed FD */ 2902 return primer->fd; 2903 } 2904 2905 int bpf_link_new_fd(struct bpf_link *link) 2906 { 2907 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); 2908 } 2909 2910 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 2911 { 2912 struct fd f = fdget(ufd); 2913 struct bpf_link *link; 2914 2915 if (!f.file) 2916 return ERR_PTR(-EBADF); 2917 if (f.file->f_op != &bpf_link_fops) { 2918 fdput(f); 2919 return ERR_PTR(-EINVAL); 2920 } 2921 2922 link = f.file->private_data; 2923 bpf_link_inc(link); 2924 fdput(f); 2925 2926 return link; 2927 } 2928 EXPORT_SYMBOL(bpf_link_get_from_fd); 2929 2930 static void bpf_tracing_link_release(struct bpf_link *link) 2931 { 2932 struct bpf_tracing_link *tr_link = 2933 container_of(link, struct bpf_tracing_link, link.link); 2934 2935 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 2936 tr_link->trampoline)); 2937 2938 bpf_trampoline_put(tr_link->trampoline); 2939 2940 /* tgt_prog is NULL if target is a kernel function */ 2941 if (tr_link->tgt_prog) 2942 bpf_prog_put(tr_link->tgt_prog); 2943 } 2944 2945 static void bpf_tracing_link_dealloc(struct bpf_link *link) 2946 { 2947 struct bpf_tracing_link *tr_link = 2948 container_of(link, struct bpf_tracing_link, link.link); 2949 2950 kfree(tr_link); 2951 } 2952 2953 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 2954 struct seq_file *seq) 2955 { 2956 struct bpf_tracing_link *tr_link = 2957 container_of(link, struct bpf_tracing_link, link.link); 2958 2959 seq_printf(seq, 2960 "attach_type:\t%d\n", 2961 tr_link->attach_type); 2962 } 2963 2964 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 2965 struct bpf_link_info *info) 2966 { 2967 struct bpf_tracing_link *tr_link = 2968 container_of(link, struct bpf_tracing_link, link.link); 2969 2970 info->tracing.attach_type = tr_link->attach_type; 2971 bpf_trampoline_unpack_key(tr_link->trampoline->key, 2972 &info->tracing.target_obj_id, 2973 &info->tracing.target_btf_id); 2974 2975 return 0; 2976 } 2977 2978 static const struct bpf_link_ops bpf_tracing_link_lops = { 2979 .release = bpf_tracing_link_release, 2980 .dealloc = bpf_tracing_link_dealloc, 2981 .show_fdinfo = bpf_tracing_link_show_fdinfo, 2982 .fill_link_info = bpf_tracing_link_fill_link_info, 2983 }; 2984 2985 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 2986 int tgt_prog_fd, 2987 u32 btf_id, 2988 u64 bpf_cookie) 2989 { 2990 struct bpf_link_primer link_primer; 2991 struct bpf_prog *tgt_prog = NULL; 2992 struct bpf_trampoline *tr = NULL; 2993 struct bpf_tracing_link *link; 2994 u64 key = 0; 2995 int err; 2996 2997 switch (prog->type) { 2998 case BPF_PROG_TYPE_TRACING: 2999 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3000 prog->expected_attach_type != BPF_TRACE_FEXIT && 3001 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3002 err = -EINVAL; 3003 goto out_put_prog; 3004 } 3005 break; 3006 case BPF_PROG_TYPE_EXT: 3007 if (prog->expected_attach_type != 0) { 3008 err = -EINVAL; 3009 goto out_put_prog; 3010 } 3011 break; 3012 case BPF_PROG_TYPE_LSM: 3013 if (prog->expected_attach_type != BPF_LSM_MAC) { 3014 err = -EINVAL; 3015 goto out_put_prog; 3016 } 3017 break; 3018 default: 3019 err = -EINVAL; 3020 goto out_put_prog; 3021 } 3022 3023 if (!!tgt_prog_fd != !!btf_id) { 3024 err = -EINVAL; 3025 goto out_put_prog; 3026 } 3027 3028 if (tgt_prog_fd) { 3029 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ 3030 if (prog->type != BPF_PROG_TYPE_EXT) { 3031 err = -EINVAL; 3032 goto out_put_prog; 3033 } 3034 3035 tgt_prog = bpf_prog_get(tgt_prog_fd); 3036 if (IS_ERR(tgt_prog)) { 3037 err = PTR_ERR(tgt_prog); 3038 tgt_prog = NULL; 3039 goto out_put_prog; 3040 } 3041 3042 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3043 } 3044 3045 link = kzalloc(sizeof(*link), GFP_USER); 3046 if (!link) { 3047 err = -ENOMEM; 3048 goto out_put_prog; 3049 } 3050 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3051 &bpf_tracing_link_lops, prog); 3052 link->attach_type = prog->expected_attach_type; 3053 link->link.cookie = bpf_cookie; 3054 3055 mutex_lock(&prog->aux->dst_mutex); 3056 3057 /* There are a few possible cases here: 3058 * 3059 * - if prog->aux->dst_trampoline is set, the program was just loaded 3060 * and not yet attached to anything, so we can use the values stored 3061 * in prog->aux 3062 * 3063 * - if prog->aux->dst_trampoline is NULL, the program has already been 3064 * attached to a target and its initial target was cleared (below) 3065 * 3066 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3067 * target_btf_id using the link_create API. 3068 * 3069 * - if tgt_prog == NULL when this function was called using the old 3070 * raw_tracepoint_open API, and we need a target from prog->aux 3071 * 3072 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3073 * was detached and is going for re-attachment. 3074 */ 3075 if (!prog->aux->dst_trampoline && !tgt_prog) { 3076 /* 3077 * Allow re-attach for TRACING and LSM programs. If it's 3078 * currently linked, bpf_trampoline_link_prog will fail. 3079 * EXT programs need to specify tgt_prog_fd, so they 3080 * re-attach in separate code path. 3081 */ 3082 if (prog->type != BPF_PROG_TYPE_TRACING && 3083 prog->type != BPF_PROG_TYPE_LSM) { 3084 err = -EINVAL; 3085 goto out_unlock; 3086 } 3087 btf_id = prog->aux->attach_btf_id; 3088 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3089 } 3090 3091 if (!prog->aux->dst_trampoline || 3092 (key && key != prog->aux->dst_trampoline->key)) { 3093 /* If there is no saved target, or the specified target is 3094 * different from the destination specified at load time, we 3095 * need a new trampoline and a check for compatibility 3096 */ 3097 struct bpf_attach_target_info tgt_info = {}; 3098 3099 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3100 &tgt_info); 3101 if (err) 3102 goto out_unlock; 3103 3104 tr = bpf_trampoline_get(key, &tgt_info); 3105 if (!tr) { 3106 err = -ENOMEM; 3107 goto out_unlock; 3108 } 3109 } else { 3110 /* The caller didn't specify a target, or the target was the 3111 * same as the destination supplied during program load. This 3112 * means we can reuse the trampoline and reference from program 3113 * load time, and there is no need to allocate a new one. This 3114 * can only happen once for any program, as the saved values in 3115 * prog->aux are cleared below. 3116 */ 3117 tr = prog->aux->dst_trampoline; 3118 tgt_prog = prog->aux->dst_prog; 3119 } 3120 3121 err = bpf_link_prime(&link->link.link, &link_primer); 3122 if (err) 3123 goto out_unlock; 3124 3125 err = bpf_trampoline_link_prog(&link->link, tr); 3126 if (err) { 3127 bpf_link_cleanup(&link_primer); 3128 link = NULL; 3129 goto out_unlock; 3130 } 3131 3132 link->tgt_prog = tgt_prog; 3133 link->trampoline = tr; 3134 3135 /* Always clear the trampoline and target prog from prog->aux to make 3136 * sure the original attach destination is not kept alive after a 3137 * program is (re-)attached to another target. 3138 */ 3139 if (prog->aux->dst_prog && 3140 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3141 /* got extra prog ref from syscall, or attaching to different prog */ 3142 bpf_prog_put(prog->aux->dst_prog); 3143 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3144 /* we allocated a new trampoline, so free the old one */ 3145 bpf_trampoline_put(prog->aux->dst_trampoline); 3146 3147 prog->aux->dst_prog = NULL; 3148 prog->aux->dst_trampoline = NULL; 3149 mutex_unlock(&prog->aux->dst_mutex); 3150 3151 return bpf_link_settle(&link_primer); 3152 out_unlock: 3153 if (tr && tr != prog->aux->dst_trampoline) 3154 bpf_trampoline_put(tr); 3155 mutex_unlock(&prog->aux->dst_mutex); 3156 kfree(link); 3157 out_put_prog: 3158 if (tgt_prog_fd && tgt_prog) 3159 bpf_prog_put(tgt_prog); 3160 return err; 3161 } 3162 3163 struct bpf_raw_tp_link { 3164 struct bpf_link link; 3165 struct bpf_raw_event_map *btp; 3166 }; 3167 3168 static void bpf_raw_tp_link_release(struct bpf_link *link) 3169 { 3170 struct bpf_raw_tp_link *raw_tp = 3171 container_of(link, struct bpf_raw_tp_link, link); 3172 3173 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); 3174 bpf_put_raw_tracepoint(raw_tp->btp); 3175 } 3176 3177 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3178 { 3179 struct bpf_raw_tp_link *raw_tp = 3180 container_of(link, struct bpf_raw_tp_link, link); 3181 3182 kfree(raw_tp); 3183 } 3184 3185 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3186 struct seq_file *seq) 3187 { 3188 struct bpf_raw_tp_link *raw_tp_link = 3189 container_of(link, struct bpf_raw_tp_link, link); 3190 3191 seq_printf(seq, 3192 "tp_name:\t%s\n", 3193 raw_tp_link->btp->tp->name); 3194 } 3195 3196 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3197 struct bpf_link_info *info) 3198 { 3199 struct bpf_raw_tp_link *raw_tp_link = 3200 container_of(link, struct bpf_raw_tp_link, link); 3201 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3202 const char *tp_name = raw_tp_link->btp->tp->name; 3203 u32 ulen = info->raw_tracepoint.tp_name_len; 3204 size_t tp_len = strlen(tp_name); 3205 3206 if (!ulen ^ !ubuf) 3207 return -EINVAL; 3208 3209 info->raw_tracepoint.tp_name_len = tp_len + 1; 3210 3211 if (!ubuf) 3212 return 0; 3213 3214 if (ulen >= tp_len + 1) { 3215 if (copy_to_user(ubuf, tp_name, tp_len + 1)) 3216 return -EFAULT; 3217 } else { 3218 char zero = '\0'; 3219 3220 if (copy_to_user(ubuf, tp_name, ulen - 1)) 3221 return -EFAULT; 3222 if (put_user(zero, ubuf + ulen - 1)) 3223 return -EFAULT; 3224 return -ENOSPC; 3225 } 3226 3227 return 0; 3228 } 3229 3230 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3231 .release = bpf_raw_tp_link_release, 3232 .dealloc = bpf_raw_tp_link_dealloc, 3233 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3234 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3235 }; 3236 3237 #ifdef CONFIG_PERF_EVENTS 3238 struct bpf_perf_link { 3239 struct bpf_link link; 3240 struct file *perf_file; 3241 }; 3242 3243 static void bpf_perf_link_release(struct bpf_link *link) 3244 { 3245 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3246 struct perf_event *event = perf_link->perf_file->private_data; 3247 3248 perf_event_free_bpf_prog(event); 3249 fput(perf_link->perf_file); 3250 } 3251 3252 static void bpf_perf_link_dealloc(struct bpf_link *link) 3253 { 3254 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3255 3256 kfree(perf_link); 3257 } 3258 3259 static const struct bpf_link_ops bpf_perf_link_lops = { 3260 .release = bpf_perf_link_release, 3261 .dealloc = bpf_perf_link_dealloc, 3262 }; 3263 3264 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3265 { 3266 struct bpf_link_primer link_primer; 3267 struct bpf_perf_link *link; 3268 struct perf_event *event; 3269 struct file *perf_file; 3270 int err; 3271 3272 if (attr->link_create.flags) 3273 return -EINVAL; 3274 3275 perf_file = perf_event_get(attr->link_create.target_fd); 3276 if (IS_ERR(perf_file)) 3277 return PTR_ERR(perf_file); 3278 3279 link = kzalloc(sizeof(*link), GFP_USER); 3280 if (!link) { 3281 err = -ENOMEM; 3282 goto out_put_file; 3283 } 3284 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog); 3285 link->perf_file = perf_file; 3286 3287 err = bpf_link_prime(&link->link, &link_primer); 3288 if (err) { 3289 kfree(link); 3290 goto out_put_file; 3291 } 3292 3293 event = perf_file->private_data; 3294 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 3295 if (err) { 3296 bpf_link_cleanup(&link_primer); 3297 goto out_put_file; 3298 } 3299 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 3300 bpf_prog_inc(prog); 3301 3302 return bpf_link_settle(&link_primer); 3303 3304 out_put_file: 3305 fput(perf_file); 3306 return err; 3307 } 3308 #else 3309 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3310 { 3311 return -EOPNOTSUPP; 3312 } 3313 #endif /* CONFIG_PERF_EVENTS */ 3314 3315 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 3316 const char __user *user_tp_name) 3317 { 3318 struct bpf_link_primer link_primer; 3319 struct bpf_raw_tp_link *link; 3320 struct bpf_raw_event_map *btp; 3321 const char *tp_name; 3322 char buf[128]; 3323 int err; 3324 3325 switch (prog->type) { 3326 case BPF_PROG_TYPE_TRACING: 3327 case BPF_PROG_TYPE_EXT: 3328 case BPF_PROG_TYPE_LSM: 3329 if (user_tp_name) 3330 /* The attach point for this category of programs 3331 * should be specified via btf_id during program load. 3332 */ 3333 return -EINVAL; 3334 if (prog->type == BPF_PROG_TYPE_TRACING && 3335 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 3336 tp_name = prog->aux->attach_func_name; 3337 break; 3338 } 3339 return bpf_tracing_prog_attach(prog, 0, 0, 0); 3340 case BPF_PROG_TYPE_RAW_TRACEPOINT: 3341 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 3342 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 3343 return -EFAULT; 3344 buf[sizeof(buf) - 1] = 0; 3345 tp_name = buf; 3346 break; 3347 default: 3348 return -EINVAL; 3349 } 3350 3351 btp = bpf_get_raw_tracepoint(tp_name); 3352 if (!btp) 3353 return -ENOENT; 3354 3355 link = kzalloc(sizeof(*link), GFP_USER); 3356 if (!link) { 3357 err = -ENOMEM; 3358 goto out_put_btp; 3359 } 3360 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 3361 &bpf_raw_tp_link_lops, prog); 3362 link->btp = btp; 3363 3364 err = bpf_link_prime(&link->link, &link_primer); 3365 if (err) { 3366 kfree(link); 3367 goto out_put_btp; 3368 } 3369 3370 err = bpf_probe_register(link->btp, prog); 3371 if (err) { 3372 bpf_link_cleanup(&link_primer); 3373 goto out_put_btp; 3374 } 3375 3376 return bpf_link_settle(&link_primer); 3377 3378 out_put_btp: 3379 bpf_put_raw_tracepoint(btp); 3380 return err; 3381 } 3382 3383 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 3384 3385 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 3386 { 3387 struct bpf_prog *prog; 3388 int fd; 3389 3390 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 3391 return -EINVAL; 3392 3393 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 3394 if (IS_ERR(prog)) 3395 return PTR_ERR(prog); 3396 3397 fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name)); 3398 if (fd < 0) 3399 bpf_prog_put(prog); 3400 return fd; 3401 } 3402 3403 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3404 enum bpf_attach_type attach_type) 3405 { 3406 switch (prog->type) { 3407 case BPF_PROG_TYPE_CGROUP_SOCK: 3408 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3409 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3410 case BPF_PROG_TYPE_SK_LOOKUP: 3411 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3412 case BPF_PROG_TYPE_CGROUP_SKB: 3413 if (!capable(CAP_NET_ADMIN)) 3414 /* cg-skb progs can be loaded by unpriv user. 3415 * check permissions at attach time. 3416 */ 3417 return -EPERM; 3418 return prog->enforce_expected_attach_type && 3419 prog->expected_attach_type != attach_type ? 3420 -EINVAL : 0; 3421 default: 3422 return 0; 3423 } 3424 } 3425 3426 static enum bpf_prog_type 3427 attach_type_to_prog_type(enum bpf_attach_type attach_type) 3428 { 3429 switch (attach_type) { 3430 case BPF_CGROUP_INET_INGRESS: 3431 case BPF_CGROUP_INET_EGRESS: 3432 return BPF_PROG_TYPE_CGROUP_SKB; 3433 case BPF_CGROUP_INET_SOCK_CREATE: 3434 case BPF_CGROUP_INET_SOCK_RELEASE: 3435 case BPF_CGROUP_INET4_POST_BIND: 3436 case BPF_CGROUP_INET6_POST_BIND: 3437 return BPF_PROG_TYPE_CGROUP_SOCK; 3438 case BPF_CGROUP_INET4_BIND: 3439 case BPF_CGROUP_INET6_BIND: 3440 case BPF_CGROUP_INET4_CONNECT: 3441 case BPF_CGROUP_INET6_CONNECT: 3442 case BPF_CGROUP_INET4_GETPEERNAME: 3443 case BPF_CGROUP_INET6_GETPEERNAME: 3444 case BPF_CGROUP_INET4_GETSOCKNAME: 3445 case BPF_CGROUP_INET6_GETSOCKNAME: 3446 case BPF_CGROUP_UDP4_SENDMSG: 3447 case BPF_CGROUP_UDP6_SENDMSG: 3448 case BPF_CGROUP_UDP4_RECVMSG: 3449 case BPF_CGROUP_UDP6_RECVMSG: 3450 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 3451 case BPF_CGROUP_SOCK_OPS: 3452 return BPF_PROG_TYPE_SOCK_OPS; 3453 case BPF_CGROUP_DEVICE: 3454 return BPF_PROG_TYPE_CGROUP_DEVICE; 3455 case BPF_SK_MSG_VERDICT: 3456 return BPF_PROG_TYPE_SK_MSG; 3457 case BPF_SK_SKB_STREAM_PARSER: 3458 case BPF_SK_SKB_STREAM_VERDICT: 3459 case BPF_SK_SKB_VERDICT: 3460 return BPF_PROG_TYPE_SK_SKB; 3461 case BPF_LIRC_MODE2: 3462 return BPF_PROG_TYPE_LIRC_MODE2; 3463 case BPF_FLOW_DISSECTOR: 3464 return BPF_PROG_TYPE_FLOW_DISSECTOR; 3465 case BPF_CGROUP_SYSCTL: 3466 return BPF_PROG_TYPE_CGROUP_SYSCTL; 3467 case BPF_CGROUP_GETSOCKOPT: 3468 case BPF_CGROUP_SETSOCKOPT: 3469 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 3470 case BPF_TRACE_ITER: 3471 case BPF_TRACE_RAW_TP: 3472 case BPF_TRACE_FENTRY: 3473 case BPF_TRACE_FEXIT: 3474 case BPF_MODIFY_RETURN: 3475 return BPF_PROG_TYPE_TRACING; 3476 case BPF_LSM_MAC: 3477 return BPF_PROG_TYPE_LSM; 3478 case BPF_SK_LOOKUP: 3479 return BPF_PROG_TYPE_SK_LOOKUP; 3480 case BPF_XDP: 3481 return BPF_PROG_TYPE_XDP; 3482 case BPF_LSM_CGROUP: 3483 return BPF_PROG_TYPE_LSM; 3484 default: 3485 return BPF_PROG_TYPE_UNSPEC; 3486 } 3487 } 3488 3489 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd 3490 3491 #define BPF_F_ATTACH_MASK \ 3492 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) 3493 3494 static int bpf_prog_attach(const union bpf_attr *attr) 3495 { 3496 enum bpf_prog_type ptype; 3497 struct bpf_prog *prog; 3498 int ret; 3499 3500 if (CHECK_ATTR(BPF_PROG_ATTACH)) 3501 return -EINVAL; 3502 3503 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 3504 return -EINVAL; 3505 3506 ptype = attach_type_to_prog_type(attr->attach_type); 3507 if (ptype == BPF_PROG_TYPE_UNSPEC) 3508 return -EINVAL; 3509 3510 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 3511 if (IS_ERR(prog)) 3512 return PTR_ERR(prog); 3513 3514 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 3515 bpf_prog_put(prog); 3516 return -EINVAL; 3517 } 3518 3519 switch (ptype) { 3520 case BPF_PROG_TYPE_SK_SKB: 3521 case BPF_PROG_TYPE_SK_MSG: 3522 ret = sock_map_get_from_fd(attr, prog); 3523 break; 3524 case BPF_PROG_TYPE_LIRC_MODE2: 3525 ret = lirc_prog_attach(attr, prog); 3526 break; 3527 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3528 ret = netns_bpf_prog_attach(attr, prog); 3529 break; 3530 case BPF_PROG_TYPE_CGROUP_DEVICE: 3531 case BPF_PROG_TYPE_CGROUP_SKB: 3532 case BPF_PROG_TYPE_CGROUP_SOCK: 3533 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3534 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3535 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3536 case BPF_PROG_TYPE_SOCK_OPS: 3537 case BPF_PROG_TYPE_LSM: 3538 if (ptype == BPF_PROG_TYPE_LSM && 3539 prog->expected_attach_type != BPF_LSM_CGROUP) 3540 ret = -EINVAL; 3541 else 3542 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 3543 break; 3544 default: 3545 ret = -EINVAL; 3546 } 3547 3548 if (ret) 3549 bpf_prog_put(prog); 3550 return ret; 3551 } 3552 3553 #define BPF_PROG_DETACH_LAST_FIELD attach_type 3554 3555 static int bpf_prog_detach(const union bpf_attr *attr) 3556 { 3557 enum bpf_prog_type ptype; 3558 3559 if (CHECK_ATTR(BPF_PROG_DETACH)) 3560 return -EINVAL; 3561 3562 ptype = attach_type_to_prog_type(attr->attach_type); 3563 3564 switch (ptype) { 3565 case BPF_PROG_TYPE_SK_MSG: 3566 case BPF_PROG_TYPE_SK_SKB: 3567 return sock_map_prog_detach(attr, ptype); 3568 case BPF_PROG_TYPE_LIRC_MODE2: 3569 return lirc_prog_detach(attr); 3570 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3571 return netns_bpf_prog_detach(attr, ptype); 3572 case BPF_PROG_TYPE_CGROUP_DEVICE: 3573 case BPF_PROG_TYPE_CGROUP_SKB: 3574 case BPF_PROG_TYPE_CGROUP_SOCK: 3575 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3576 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3577 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3578 case BPF_PROG_TYPE_SOCK_OPS: 3579 case BPF_PROG_TYPE_LSM: 3580 return cgroup_bpf_prog_detach(attr, ptype); 3581 default: 3582 return -EINVAL; 3583 } 3584 } 3585 3586 #define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags 3587 3588 static int bpf_prog_query(const union bpf_attr *attr, 3589 union bpf_attr __user *uattr) 3590 { 3591 if (!capable(CAP_NET_ADMIN)) 3592 return -EPERM; 3593 if (CHECK_ATTR(BPF_PROG_QUERY)) 3594 return -EINVAL; 3595 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 3596 return -EINVAL; 3597 3598 switch (attr->query.attach_type) { 3599 case BPF_CGROUP_INET_INGRESS: 3600 case BPF_CGROUP_INET_EGRESS: 3601 case BPF_CGROUP_INET_SOCK_CREATE: 3602 case BPF_CGROUP_INET_SOCK_RELEASE: 3603 case BPF_CGROUP_INET4_BIND: 3604 case BPF_CGROUP_INET6_BIND: 3605 case BPF_CGROUP_INET4_POST_BIND: 3606 case BPF_CGROUP_INET6_POST_BIND: 3607 case BPF_CGROUP_INET4_CONNECT: 3608 case BPF_CGROUP_INET6_CONNECT: 3609 case BPF_CGROUP_INET4_GETPEERNAME: 3610 case BPF_CGROUP_INET6_GETPEERNAME: 3611 case BPF_CGROUP_INET4_GETSOCKNAME: 3612 case BPF_CGROUP_INET6_GETSOCKNAME: 3613 case BPF_CGROUP_UDP4_SENDMSG: 3614 case BPF_CGROUP_UDP6_SENDMSG: 3615 case BPF_CGROUP_UDP4_RECVMSG: 3616 case BPF_CGROUP_UDP6_RECVMSG: 3617 case BPF_CGROUP_SOCK_OPS: 3618 case BPF_CGROUP_DEVICE: 3619 case BPF_CGROUP_SYSCTL: 3620 case BPF_CGROUP_GETSOCKOPT: 3621 case BPF_CGROUP_SETSOCKOPT: 3622 case BPF_LSM_CGROUP: 3623 return cgroup_bpf_prog_query(attr, uattr); 3624 case BPF_LIRC_MODE2: 3625 return lirc_prog_query(attr, uattr); 3626 case BPF_FLOW_DISSECTOR: 3627 case BPF_SK_LOOKUP: 3628 return netns_bpf_prog_query(attr, uattr); 3629 case BPF_SK_SKB_STREAM_PARSER: 3630 case BPF_SK_SKB_STREAM_VERDICT: 3631 case BPF_SK_MSG_VERDICT: 3632 case BPF_SK_SKB_VERDICT: 3633 return sock_map_bpf_prog_query(attr, uattr); 3634 default: 3635 return -EINVAL; 3636 } 3637 } 3638 3639 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 3640 3641 static int bpf_prog_test_run(const union bpf_attr *attr, 3642 union bpf_attr __user *uattr) 3643 { 3644 struct bpf_prog *prog; 3645 int ret = -ENOTSUPP; 3646 3647 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 3648 return -EINVAL; 3649 3650 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 3651 (!attr->test.ctx_size_in && attr->test.ctx_in)) 3652 return -EINVAL; 3653 3654 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 3655 (!attr->test.ctx_size_out && attr->test.ctx_out)) 3656 return -EINVAL; 3657 3658 prog = bpf_prog_get(attr->test.prog_fd); 3659 if (IS_ERR(prog)) 3660 return PTR_ERR(prog); 3661 3662 if (prog->aux->ops->test_run) 3663 ret = prog->aux->ops->test_run(prog, attr, uattr); 3664 3665 bpf_prog_put(prog); 3666 return ret; 3667 } 3668 3669 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 3670 3671 static int bpf_obj_get_next_id(const union bpf_attr *attr, 3672 union bpf_attr __user *uattr, 3673 struct idr *idr, 3674 spinlock_t *lock) 3675 { 3676 u32 next_id = attr->start_id; 3677 int err = 0; 3678 3679 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 3680 return -EINVAL; 3681 3682 if (!capable(CAP_SYS_ADMIN)) 3683 return -EPERM; 3684 3685 next_id++; 3686 spin_lock_bh(lock); 3687 if (!idr_get_next(idr, &next_id)) 3688 err = -ENOENT; 3689 spin_unlock_bh(lock); 3690 3691 if (!err) 3692 err = put_user(next_id, &uattr->next_id); 3693 3694 return err; 3695 } 3696 3697 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 3698 { 3699 struct bpf_map *map; 3700 3701 spin_lock_bh(&map_idr_lock); 3702 again: 3703 map = idr_get_next(&map_idr, id); 3704 if (map) { 3705 map = __bpf_map_inc_not_zero(map, false); 3706 if (IS_ERR(map)) { 3707 (*id)++; 3708 goto again; 3709 } 3710 } 3711 spin_unlock_bh(&map_idr_lock); 3712 3713 return map; 3714 } 3715 3716 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 3717 { 3718 struct bpf_prog *prog; 3719 3720 spin_lock_bh(&prog_idr_lock); 3721 again: 3722 prog = idr_get_next(&prog_idr, id); 3723 if (prog) { 3724 prog = bpf_prog_inc_not_zero(prog); 3725 if (IS_ERR(prog)) { 3726 (*id)++; 3727 goto again; 3728 } 3729 } 3730 spin_unlock_bh(&prog_idr_lock); 3731 3732 return prog; 3733 } 3734 3735 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 3736 3737 struct bpf_prog *bpf_prog_by_id(u32 id) 3738 { 3739 struct bpf_prog *prog; 3740 3741 if (!id) 3742 return ERR_PTR(-ENOENT); 3743 3744 spin_lock_bh(&prog_idr_lock); 3745 prog = idr_find(&prog_idr, id); 3746 if (prog) 3747 prog = bpf_prog_inc_not_zero(prog); 3748 else 3749 prog = ERR_PTR(-ENOENT); 3750 spin_unlock_bh(&prog_idr_lock); 3751 return prog; 3752 } 3753 3754 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 3755 { 3756 struct bpf_prog *prog; 3757 u32 id = attr->prog_id; 3758 int fd; 3759 3760 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 3761 return -EINVAL; 3762 3763 if (!capable(CAP_SYS_ADMIN)) 3764 return -EPERM; 3765 3766 prog = bpf_prog_by_id(id); 3767 if (IS_ERR(prog)) 3768 return PTR_ERR(prog); 3769 3770 fd = bpf_prog_new_fd(prog); 3771 if (fd < 0) 3772 bpf_prog_put(prog); 3773 3774 return fd; 3775 } 3776 3777 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 3778 3779 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 3780 { 3781 struct bpf_map *map; 3782 u32 id = attr->map_id; 3783 int f_flags; 3784 int fd; 3785 3786 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 3787 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 3788 return -EINVAL; 3789 3790 if (!capable(CAP_SYS_ADMIN)) 3791 return -EPERM; 3792 3793 f_flags = bpf_get_file_flag(attr->open_flags); 3794 if (f_flags < 0) 3795 return f_flags; 3796 3797 spin_lock_bh(&map_idr_lock); 3798 map = idr_find(&map_idr, id); 3799 if (map) 3800 map = __bpf_map_inc_not_zero(map, true); 3801 else 3802 map = ERR_PTR(-ENOENT); 3803 spin_unlock_bh(&map_idr_lock); 3804 3805 if (IS_ERR(map)) 3806 return PTR_ERR(map); 3807 3808 fd = bpf_map_new_fd(map, f_flags); 3809 if (fd < 0) 3810 bpf_map_put_with_uref(map); 3811 3812 return fd; 3813 } 3814 3815 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 3816 unsigned long addr, u32 *off, 3817 u32 *type) 3818 { 3819 const struct bpf_map *map; 3820 int i; 3821 3822 mutex_lock(&prog->aux->used_maps_mutex); 3823 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 3824 map = prog->aux->used_maps[i]; 3825 if (map == (void *)addr) { 3826 *type = BPF_PSEUDO_MAP_FD; 3827 goto out; 3828 } 3829 if (!map->ops->map_direct_value_meta) 3830 continue; 3831 if (!map->ops->map_direct_value_meta(map, addr, off)) { 3832 *type = BPF_PSEUDO_MAP_VALUE; 3833 goto out; 3834 } 3835 } 3836 map = NULL; 3837 3838 out: 3839 mutex_unlock(&prog->aux->used_maps_mutex); 3840 return map; 3841 } 3842 3843 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 3844 const struct cred *f_cred) 3845 { 3846 const struct bpf_map *map; 3847 struct bpf_insn *insns; 3848 u32 off, type; 3849 u64 imm; 3850 u8 code; 3851 int i; 3852 3853 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 3854 GFP_USER); 3855 if (!insns) 3856 return insns; 3857 3858 for (i = 0; i < prog->len; i++) { 3859 code = insns[i].code; 3860 3861 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 3862 insns[i].code = BPF_JMP | BPF_CALL; 3863 insns[i].imm = BPF_FUNC_tail_call; 3864 /* fall-through */ 3865 } 3866 if (code == (BPF_JMP | BPF_CALL) || 3867 code == (BPF_JMP | BPF_CALL_ARGS)) { 3868 if (code == (BPF_JMP | BPF_CALL_ARGS)) 3869 insns[i].code = BPF_JMP | BPF_CALL; 3870 if (!bpf_dump_raw_ok(f_cred)) 3871 insns[i].imm = 0; 3872 continue; 3873 } 3874 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 3875 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 3876 continue; 3877 } 3878 3879 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 3880 continue; 3881 3882 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 3883 map = bpf_map_from_imm(prog, imm, &off, &type); 3884 if (map) { 3885 insns[i].src_reg = type; 3886 insns[i].imm = map->id; 3887 insns[i + 1].imm = off; 3888 continue; 3889 } 3890 } 3891 3892 return insns; 3893 } 3894 3895 static int set_info_rec_size(struct bpf_prog_info *info) 3896 { 3897 /* 3898 * Ensure info.*_rec_size is the same as kernel expected size 3899 * 3900 * or 3901 * 3902 * Only allow zero *_rec_size if both _rec_size and _cnt are 3903 * zero. In this case, the kernel will set the expected 3904 * _rec_size back to the info. 3905 */ 3906 3907 if ((info->nr_func_info || info->func_info_rec_size) && 3908 info->func_info_rec_size != sizeof(struct bpf_func_info)) 3909 return -EINVAL; 3910 3911 if ((info->nr_line_info || info->line_info_rec_size) && 3912 info->line_info_rec_size != sizeof(struct bpf_line_info)) 3913 return -EINVAL; 3914 3915 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 3916 info->jited_line_info_rec_size != sizeof(__u64)) 3917 return -EINVAL; 3918 3919 info->func_info_rec_size = sizeof(struct bpf_func_info); 3920 info->line_info_rec_size = sizeof(struct bpf_line_info); 3921 info->jited_line_info_rec_size = sizeof(__u64); 3922 3923 return 0; 3924 } 3925 3926 static int bpf_prog_get_info_by_fd(struct file *file, 3927 struct bpf_prog *prog, 3928 const union bpf_attr *attr, 3929 union bpf_attr __user *uattr) 3930 { 3931 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3932 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 3933 struct bpf_prog_info info; 3934 u32 info_len = attr->info.info_len; 3935 struct bpf_prog_kstats stats; 3936 char __user *uinsns; 3937 u32 ulen; 3938 int err; 3939 3940 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 3941 if (err) 3942 return err; 3943 info_len = min_t(u32, sizeof(info), info_len); 3944 3945 memset(&info, 0, sizeof(info)); 3946 if (copy_from_user(&info, uinfo, info_len)) 3947 return -EFAULT; 3948 3949 info.type = prog->type; 3950 info.id = prog->aux->id; 3951 info.load_time = prog->aux->load_time; 3952 info.created_by_uid = from_kuid_munged(current_user_ns(), 3953 prog->aux->user->uid); 3954 info.gpl_compatible = prog->gpl_compatible; 3955 3956 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 3957 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 3958 3959 mutex_lock(&prog->aux->used_maps_mutex); 3960 ulen = info.nr_map_ids; 3961 info.nr_map_ids = prog->aux->used_map_cnt; 3962 ulen = min_t(u32, info.nr_map_ids, ulen); 3963 if (ulen) { 3964 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 3965 u32 i; 3966 3967 for (i = 0; i < ulen; i++) 3968 if (put_user(prog->aux->used_maps[i]->id, 3969 &user_map_ids[i])) { 3970 mutex_unlock(&prog->aux->used_maps_mutex); 3971 return -EFAULT; 3972 } 3973 } 3974 mutex_unlock(&prog->aux->used_maps_mutex); 3975 3976 err = set_info_rec_size(&info); 3977 if (err) 3978 return err; 3979 3980 bpf_prog_get_stats(prog, &stats); 3981 info.run_time_ns = stats.nsecs; 3982 info.run_cnt = stats.cnt; 3983 info.recursion_misses = stats.misses; 3984 3985 info.verified_insns = prog->aux->verified_insns; 3986 3987 if (!bpf_capable()) { 3988 info.jited_prog_len = 0; 3989 info.xlated_prog_len = 0; 3990 info.nr_jited_ksyms = 0; 3991 info.nr_jited_func_lens = 0; 3992 info.nr_func_info = 0; 3993 info.nr_line_info = 0; 3994 info.nr_jited_line_info = 0; 3995 goto done; 3996 } 3997 3998 ulen = info.xlated_prog_len; 3999 info.xlated_prog_len = bpf_prog_insn_size(prog); 4000 if (info.xlated_prog_len && ulen) { 4001 struct bpf_insn *insns_sanitized; 4002 bool fault; 4003 4004 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { 4005 info.xlated_prog_insns = 0; 4006 goto done; 4007 } 4008 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 4009 if (!insns_sanitized) 4010 return -ENOMEM; 4011 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 4012 ulen = min_t(u32, info.xlated_prog_len, ulen); 4013 fault = copy_to_user(uinsns, insns_sanitized, ulen); 4014 kfree(insns_sanitized); 4015 if (fault) 4016 return -EFAULT; 4017 } 4018 4019 if (bpf_prog_is_offloaded(prog->aux)) { 4020 err = bpf_prog_offload_info_fill(&info, prog); 4021 if (err) 4022 return err; 4023 goto done; 4024 } 4025 4026 /* NOTE: the following code is supposed to be skipped for offload. 4027 * bpf_prog_offload_info_fill() is the place to fill similar fields 4028 * for offload. 4029 */ 4030 ulen = info.jited_prog_len; 4031 if (prog->aux->func_cnt) { 4032 u32 i; 4033 4034 info.jited_prog_len = 0; 4035 for (i = 0; i < prog->aux->func_cnt; i++) 4036 info.jited_prog_len += prog->aux->func[i]->jited_len; 4037 } else { 4038 info.jited_prog_len = prog->jited_len; 4039 } 4040 4041 if (info.jited_prog_len && ulen) { 4042 if (bpf_dump_raw_ok(file->f_cred)) { 4043 uinsns = u64_to_user_ptr(info.jited_prog_insns); 4044 ulen = min_t(u32, info.jited_prog_len, ulen); 4045 4046 /* for multi-function programs, copy the JITed 4047 * instructions for all the functions 4048 */ 4049 if (prog->aux->func_cnt) { 4050 u32 len, free, i; 4051 u8 *img; 4052 4053 free = ulen; 4054 for (i = 0; i < prog->aux->func_cnt; i++) { 4055 len = prog->aux->func[i]->jited_len; 4056 len = min_t(u32, len, free); 4057 img = (u8 *) prog->aux->func[i]->bpf_func; 4058 if (copy_to_user(uinsns, img, len)) 4059 return -EFAULT; 4060 uinsns += len; 4061 free -= len; 4062 if (!free) 4063 break; 4064 } 4065 } else { 4066 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 4067 return -EFAULT; 4068 } 4069 } else { 4070 info.jited_prog_insns = 0; 4071 } 4072 } 4073 4074 ulen = info.nr_jited_ksyms; 4075 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 4076 if (ulen) { 4077 if (bpf_dump_raw_ok(file->f_cred)) { 4078 unsigned long ksym_addr; 4079 u64 __user *user_ksyms; 4080 u32 i; 4081 4082 /* copy the address of the kernel symbol 4083 * corresponding to each function 4084 */ 4085 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 4086 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 4087 if (prog->aux->func_cnt) { 4088 for (i = 0; i < ulen; i++) { 4089 ksym_addr = (unsigned long) 4090 prog->aux->func[i]->bpf_func; 4091 if (put_user((u64) ksym_addr, 4092 &user_ksyms[i])) 4093 return -EFAULT; 4094 } 4095 } else { 4096 ksym_addr = (unsigned long) prog->bpf_func; 4097 if (put_user((u64) ksym_addr, &user_ksyms[0])) 4098 return -EFAULT; 4099 } 4100 } else { 4101 info.jited_ksyms = 0; 4102 } 4103 } 4104 4105 ulen = info.nr_jited_func_lens; 4106 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 4107 if (ulen) { 4108 if (bpf_dump_raw_ok(file->f_cred)) { 4109 u32 __user *user_lens; 4110 u32 func_len, i; 4111 4112 /* copy the JITed image lengths for each function */ 4113 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 4114 user_lens = u64_to_user_ptr(info.jited_func_lens); 4115 if (prog->aux->func_cnt) { 4116 for (i = 0; i < ulen; i++) { 4117 func_len = 4118 prog->aux->func[i]->jited_len; 4119 if (put_user(func_len, &user_lens[i])) 4120 return -EFAULT; 4121 } 4122 } else { 4123 func_len = prog->jited_len; 4124 if (put_user(func_len, &user_lens[0])) 4125 return -EFAULT; 4126 } 4127 } else { 4128 info.jited_func_lens = 0; 4129 } 4130 } 4131 4132 if (prog->aux->btf) 4133 info.btf_id = btf_obj_id(prog->aux->btf); 4134 info.attach_btf_id = prog->aux->attach_btf_id; 4135 if (attach_btf) 4136 info.attach_btf_obj_id = btf_obj_id(attach_btf); 4137 4138 ulen = info.nr_func_info; 4139 info.nr_func_info = prog->aux->func_info_cnt; 4140 if (info.nr_func_info && ulen) { 4141 char __user *user_finfo; 4142 4143 user_finfo = u64_to_user_ptr(info.func_info); 4144 ulen = min_t(u32, info.nr_func_info, ulen); 4145 if (copy_to_user(user_finfo, prog->aux->func_info, 4146 info.func_info_rec_size * ulen)) 4147 return -EFAULT; 4148 } 4149 4150 ulen = info.nr_line_info; 4151 info.nr_line_info = prog->aux->nr_linfo; 4152 if (info.nr_line_info && ulen) { 4153 __u8 __user *user_linfo; 4154 4155 user_linfo = u64_to_user_ptr(info.line_info); 4156 ulen = min_t(u32, info.nr_line_info, ulen); 4157 if (copy_to_user(user_linfo, prog->aux->linfo, 4158 info.line_info_rec_size * ulen)) 4159 return -EFAULT; 4160 } 4161 4162 ulen = info.nr_jited_line_info; 4163 if (prog->aux->jited_linfo) 4164 info.nr_jited_line_info = prog->aux->nr_linfo; 4165 else 4166 info.nr_jited_line_info = 0; 4167 if (info.nr_jited_line_info && ulen) { 4168 if (bpf_dump_raw_ok(file->f_cred)) { 4169 unsigned long line_addr; 4170 __u64 __user *user_linfo; 4171 u32 i; 4172 4173 user_linfo = u64_to_user_ptr(info.jited_line_info); 4174 ulen = min_t(u32, info.nr_jited_line_info, ulen); 4175 for (i = 0; i < ulen; i++) { 4176 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 4177 if (put_user((__u64)line_addr, &user_linfo[i])) 4178 return -EFAULT; 4179 } 4180 } else { 4181 info.jited_line_info = 0; 4182 } 4183 } 4184 4185 ulen = info.nr_prog_tags; 4186 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 4187 if (ulen) { 4188 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 4189 u32 i; 4190 4191 user_prog_tags = u64_to_user_ptr(info.prog_tags); 4192 ulen = min_t(u32, info.nr_prog_tags, ulen); 4193 if (prog->aux->func_cnt) { 4194 for (i = 0; i < ulen; i++) { 4195 if (copy_to_user(user_prog_tags[i], 4196 prog->aux->func[i]->tag, 4197 BPF_TAG_SIZE)) 4198 return -EFAULT; 4199 } 4200 } else { 4201 if (copy_to_user(user_prog_tags[0], 4202 prog->tag, BPF_TAG_SIZE)) 4203 return -EFAULT; 4204 } 4205 } 4206 4207 done: 4208 if (copy_to_user(uinfo, &info, info_len) || 4209 put_user(info_len, &uattr->info.info_len)) 4210 return -EFAULT; 4211 4212 return 0; 4213 } 4214 4215 static int bpf_map_get_info_by_fd(struct file *file, 4216 struct bpf_map *map, 4217 const union bpf_attr *attr, 4218 union bpf_attr __user *uattr) 4219 { 4220 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4221 struct bpf_map_info info; 4222 u32 info_len = attr->info.info_len; 4223 int err; 4224 4225 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4226 if (err) 4227 return err; 4228 info_len = min_t(u32, sizeof(info), info_len); 4229 4230 memset(&info, 0, sizeof(info)); 4231 info.type = map->map_type; 4232 info.id = map->id; 4233 info.key_size = map->key_size; 4234 info.value_size = map->value_size; 4235 info.max_entries = map->max_entries; 4236 info.map_flags = map->map_flags; 4237 info.map_extra = map->map_extra; 4238 memcpy(info.name, map->name, sizeof(map->name)); 4239 4240 if (map->btf) { 4241 info.btf_id = btf_obj_id(map->btf); 4242 info.btf_key_type_id = map->btf_key_type_id; 4243 info.btf_value_type_id = map->btf_value_type_id; 4244 } 4245 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 4246 4247 if (bpf_map_is_offloaded(map)) { 4248 err = bpf_map_offload_info_fill(&info, map); 4249 if (err) 4250 return err; 4251 } 4252 4253 if (copy_to_user(uinfo, &info, info_len) || 4254 put_user(info_len, &uattr->info.info_len)) 4255 return -EFAULT; 4256 4257 return 0; 4258 } 4259 4260 static int bpf_btf_get_info_by_fd(struct file *file, 4261 struct btf *btf, 4262 const union bpf_attr *attr, 4263 union bpf_attr __user *uattr) 4264 { 4265 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4266 u32 info_len = attr->info.info_len; 4267 int err; 4268 4269 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 4270 if (err) 4271 return err; 4272 4273 return btf_get_info_by_fd(btf, attr, uattr); 4274 } 4275 4276 static int bpf_link_get_info_by_fd(struct file *file, 4277 struct bpf_link *link, 4278 const union bpf_attr *attr, 4279 union bpf_attr __user *uattr) 4280 { 4281 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4282 struct bpf_link_info info; 4283 u32 info_len = attr->info.info_len; 4284 int err; 4285 4286 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4287 if (err) 4288 return err; 4289 info_len = min_t(u32, sizeof(info), info_len); 4290 4291 memset(&info, 0, sizeof(info)); 4292 if (copy_from_user(&info, uinfo, info_len)) 4293 return -EFAULT; 4294 4295 info.type = link->type; 4296 info.id = link->id; 4297 info.prog_id = link->prog->aux->id; 4298 4299 if (link->ops->fill_link_info) { 4300 err = link->ops->fill_link_info(link, &info); 4301 if (err) 4302 return err; 4303 } 4304 4305 if (copy_to_user(uinfo, &info, info_len) || 4306 put_user(info_len, &uattr->info.info_len)) 4307 return -EFAULT; 4308 4309 return 0; 4310 } 4311 4312 4313 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 4314 4315 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 4316 union bpf_attr __user *uattr) 4317 { 4318 int ufd = attr->info.bpf_fd; 4319 struct fd f; 4320 int err; 4321 4322 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 4323 return -EINVAL; 4324 4325 f = fdget(ufd); 4326 if (!f.file) 4327 return -EBADFD; 4328 4329 if (f.file->f_op == &bpf_prog_fops) 4330 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, 4331 uattr); 4332 else if (f.file->f_op == &bpf_map_fops) 4333 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, 4334 uattr); 4335 else if (f.file->f_op == &btf_fops) 4336 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 4337 else if (f.file->f_op == &bpf_link_fops) 4338 err = bpf_link_get_info_by_fd(f.file, f.file->private_data, 4339 attr, uattr); 4340 else 4341 err = -EINVAL; 4342 4343 fdput(f); 4344 return err; 4345 } 4346 4347 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level 4348 4349 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr) 4350 { 4351 if (CHECK_ATTR(BPF_BTF_LOAD)) 4352 return -EINVAL; 4353 4354 if (!bpf_capable()) 4355 return -EPERM; 4356 4357 return btf_new_fd(attr, uattr); 4358 } 4359 4360 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id 4361 4362 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 4363 { 4364 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 4365 return -EINVAL; 4366 4367 if (!capable(CAP_SYS_ADMIN)) 4368 return -EPERM; 4369 4370 return btf_get_fd_by_id(attr->btf_id); 4371 } 4372 4373 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 4374 union bpf_attr __user *uattr, 4375 u32 prog_id, u32 fd_type, 4376 const char *buf, u64 probe_offset, 4377 u64 probe_addr) 4378 { 4379 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 4380 u32 len = buf ? strlen(buf) : 0, input_len; 4381 int err = 0; 4382 4383 if (put_user(len, &uattr->task_fd_query.buf_len)) 4384 return -EFAULT; 4385 input_len = attr->task_fd_query.buf_len; 4386 if (input_len && ubuf) { 4387 if (!len) { 4388 /* nothing to copy, just make ubuf NULL terminated */ 4389 char zero = '\0'; 4390 4391 if (put_user(zero, ubuf)) 4392 return -EFAULT; 4393 } else if (input_len >= len + 1) { 4394 /* ubuf can hold the string with NULL terminator */ 4395 if (copy_to_user(ubuf, buf, len + 1)) 4396 return -EFAULT; 4397 } else { 4398 /* ubuf cannot hold the string with NULL terminator, 4399 * do a partial copy with NULL terminator. 4400 */ 4401 char zero = '\0'; 4402 4403 err = -ENOSPC; 4404 if (copy_to_user(ubuf, buf, input_len - 1)) 4405 return -EFAULT; 4406 if (put_user(zero, ubuf + input_len - 1)) 4407 return -EFAULT; 4408 } 4409 } 4410 4411 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 4412 put_user(fd_type, &uattr->task_fd_query.fd_type) || 4413 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 4414 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 4415 return -EFAULT; 4416 4417 return err; 4418 } 4419 4420 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 4421 4422 static int bpf_task_fd_query(const union bpf_attr *attr, 4423 union bpf_attr __user *uattr) 4424 { 4425 pid_t pid = attr->task_fd_query.pid; 4426 u32 fd = attr->task_fd_query.fd; 4427 const struct perf_event *event; 4428 struct task_struct *task; 4429 struct file *file; 4430 int err; 4431 4432 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 4433 return -EINVAL; 4434 4435 if (!capable(CAP_SYS_ADMIN)) 4436 return -EPERM; 4437 4438 if (attr->task_fd_query.flags != 0) 4439 return -EINVAL; 4440 4441 rcu_read_lock(); 4442 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 4443 rcu_read_unlock(); 4444 if (!task) 4445 return -ENOENT; 4446 4447 err = 0; 4448 file = fget_task(task, fd); 4449 put_task_struct(task); 4450 if (!file) 4451 return -EBADF; 4452 4453 if (file->f_op == &bpf_link_fops) { 4454 struct bpf_link *link = file->private_data; 4455 4456 if (link->ops == &bpf_raw_tp_link_lops) { 4457 struct bpf_raw_tp_link *raw_tp = 4458 container_of(link, struct bpf_raw_tp_link, link); 4459 struct bpf_raw_event_map *btp = raw_tp->btp; 4460 4461 err = bpf_task_fd_query_copy(attr, uattr, 4462 raw_tp->link.prog->aux->id, 4463 BPF_FD_TYPE_RAW_TRACEPOINT, 4464 btp->tp->name, 0, 0); 4465 goto put_file; 4466 } 4467 goto out_not_supp; 4468 } 4469 4470 event = perf_get_event(file); 4471 if (!IS_ERR(event)) { 4472 u64 probe_offset, probe_addr; 4473 u32 prog_id, fd_type; 4474 const char *buf; 4475 4476 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 4477 &buf, &probe_offset, 4478 &probe_addr); 4479 if (!err) 4480 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 4481 fd_type, buf, 4482 probe_offset, 4483 probe_addr); 4484 goto put_file; 4485 } 4486 4487 out_not_supp: 4488 err = -ENOTSUPP; 4489 put_file: 4490 fput(file); 4491 return err; 4492 } 4493 4494 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 4495 4496 #define BPF_DO_BATCH(fn, ...) \ 4497 do { \ 4498 if (!fn) { \ 4499 err = -ENOTSUPP; \ 4500 goto err_put; \ 4501 } \ 4502 err = fn(__VA_ARGS__); \ 4503 } while (0) 4504 4505 static int bpf_map_do_batch(const union bpf_attr *attr, 4506 union bpf_attr __user *uattr, 4507 int cmd) 4508 { 4509 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 4510 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 4511 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 4512 struct bpf_map *map; 4513 int err, ufd; 4514 struct fd f; 4515 4516 if (CHECK_ATTR(BPF_MAP_BATCH)) 4517 return -EINVAL; 4518 4519 ufd = attr->batch.map_fd; 4520 f = fdget(ufd); 4521 map = __bpf_map_get(f); 4522 if (IS_ERR(map)) 4523 return PTR_ERR(map); 4524 if (has_write) 4525 bpf_map_write_active_inc(map); 4526 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 4527 err = -EPERM; 4528 goto err_put; 4529 } 4530 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 4531 err = -EPERM; 4532 goto err_put; 4533 } 4534 4535 if (cmd == BPF_MAP_LOOKUP_BATCH) 4536 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 4537 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 4538 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 4539 else if (cmd == BPF_MAP_UPDATE_BATCH) 4540 BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr); 4541 else 4542 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 4543 err_put: 4544 if (has_write) 4545 bpf_map_write_active_dec(map); 4546 fdput(f); 4547 return err; 4548 } 4549 4550 #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4551 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4552 { 4553 enum bpf_prog_type ptype; 4554 struct bpf_prog *prog; 4555 int ret; 4556 4557 if (CHECK_ATTR(BPF_LINK_CREATE)) 4558 return -EINVAL; 4559 4560 prog = bpf_prog_get(attr->link_create.prog_fd); 4561 if (IS_ERR(prog)) 4562 return PTR_ERR(prog); 4563 4564 ret = bpf_prog_attach_check_attach_type(prog, 4565 attr->link_create.attach_type); 4566 if (ret) 4567 goto out; 4568 4569 switch (prog->type) { 4570 case BPF_PROG_TYPE_EXT: 4571 break; 4572 case BPF_PROG_TYPE_PERF_EVENT: 4573 case BPF_PROG_TYPE_TRACEPOINT: 4574 if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4575 ret = -EINVAL; 4576 goto out; 4577 } 4578 break; 4579 case BPF_PROG_TYPE_KPROBE: 4580 if (attr->link_create.attach_type != BPF_PERF_EVENT && 4581 attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { 4582 ret = -EINVAL; 4583 goto out; 4584 } 4585 break; 4586 default: 4587 ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4588 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4589 ret = -EINVAL; 4590 goto out; 4591 } 4592 break; 4593 } 4594 4595 switch (prog->type) { 4596 case BPF_PROG_TYPE_CGROUP_SKB: 4597 case BPF_PROG_TYPE_CGROUP_SOCK: 4598 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4599 case BPF_PROG_TYPE_SOCK_OPS: 4600 case BPF_PROG_TYPE_CGROUP_DEVICE: 4601 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4602 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4603 ret = cgroup_bpf_link_attach(attr, prog); 4604 break; 4605 case BPF_PROG_TYPE_EXT: 4606 ret = bpf_tracing_prog_attach(prog, 4607 attr->link_create.target_fd, 4608 attr->link_create.target_btf_id, 4609 attr->link_create.tracing.cookie); 4610 break; 4611 case BPF_PROG_TYPE_LSM: 4612 case BPF_PROG_TYPE_TRACING: 4613 if (attr->link_create.attach_type != prog->expected_attach_type) { 4614 ret = -EINVAL; 4615 goto out; 4616 } 4617 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 4618 ret = bpf_raw_tp_link_attach(prog, NULL); 4619 else if (prog->expected_attach_type == BPF_TRACE_ITER) 4620 ret = bpf_iter_link_attach(attr, uattr, prog); 4621 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 4622 ret = cgroup_bpf_link_attach(attr, prog); 4623 else 4624 ret = bpf_tracing_prog_attach(prog, 4625 attr->link_create.target_fd, 4626 attr->link_create.target_btf_id, 4627 attr->link_create.tracing.cookie); 4628 break; 4629 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4630 case BPF_PROG_TYPE_SK_LOOKUP: 4631 ret = netns_bpf_link_create(attr, prog); 4632 break; 4633 #ifdef CONFIG_NET 4634 case BPF_PROG_TYPE_XDP: 4635 ret = bpf_xdp_link_attach(attr, prog); 4636 break; 4637 #endif 4638 case BPF_PROG_TYPE_PERF_EVENT: 4639 case BPF_PROG_TYPE_TRACEPOINT: 4640 ret = bpf_perf_link_attach(attr, prog); 4641 break; 4642 case BPF_PROG_TYPE_KPROBE: 4643 if (attr->link_create.attach_type == BPF_PERF_EVENT) 4644 ret = bpf_perf_link_attach(attr, prog); 4645 else 4646 ret = bpf_kprobe_multi_link_attach(attr, prog); 4647 break; 4648 default: 4649 ret = -EINVAL; 4650 } 4651 4652 out: 4653 if (ret < 0) 4654 bpf_prog_put(prog); 4655 return ret; 4656 } 4657 4658 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 4659 4660 static int link_update(union bpf_attr *attr) 4661 { 4662 struct bpf_prog *old_prog = NULL, *new_prog; 4663 struct bpf_link *link; 4664 u32 flags; 4665 int ret; 4666 4667 if (CHECK_ATTR(BPF_LINK_UPDATE)) 4668 return -EINVAL; 4669 4670 flags = attr->link_update.flags; 4671 if (flags & ~BPF_F_REPLACE) 4672 return -EINVAL; 4673 4674 link = bpf_link_get_from_fd(attr->link_update.link_fd); 4675 if (IS_ERR(link)) 4676 return PTR_ERR(link); 4677 4678 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 4679 if (IS_ERR(new_prog)) { 4680 ret = PTR_ERR(new_prog); 4681 goto out_put_link; 4682 } 4683 4684 if (flags & BPF_F_REPLACE) { 4685 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 4686 if (IS_ERR(old_prog)) { 4687 ret = PTR_ERR(old_prog); 4688 old_prog = NULL; 4689 goto out_put_progs; 4690 } 4691 } else if (attr->link_update.old_prog_fd) { 4692 ret = -EINVAL; 4693 goto out_put_progs; 4694 } 4695 4696 if (link->ops->update_prog) 4697 ret = link->ops->update_prog(link, new_prog, old_prog); 4698 else 4699 ret = -EINVAL; 4700 4701 out_put_progs: 4702 if (old_prog) 4703 bpf_prog_put(old_prog); 4704 if (ret) 4705 bpf_prog_put(new_prog); 4706 out_put_link: 4707 bpf_link_put(link); 4708 return ret; 4709 } 4710 4711 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 4712 4713 static int link_detach(union bpf_attr *attr) 4714 { 4715 struct bpf_link *link; 4716 int ret; 4717 4718 if (CHECK_ATTR(BPF_LINK_DETACH)) 4719 return -EINVAL; 4720 4721 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 4722 if (IS_ERR(link)) 4723 return PTR_ERR(link); 4724 4725 if (link->ops->detach) 4726 ret = link->ops->detach(link); 4727 else 4728 ret = -EOPNOTSUPP; 4729 4730 bpf_link_put(link); 4731 return ret; 4732 } 4733 4734 static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 4735 { 4736 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 4737 } 4738 4739 struct bpf_link *bpf_link_by_id(u32 id) 4740 { 4741 struct bpf_link *link; 4742 4743 if (!id) 4744 return ERR_PTR(-ENOENT); 4745 4746 spin_lock_bh(&link_idr_lock); 4747 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 4748 link = idr_find(&link_idr, id); 4749 if (link) { 4750 if (link->id) 4751 link = bpf_link_inc_not_zero(link); 4752 else 4753 link = ERR_PTR(-EAGAIN); 4754 } else { 4755 link = ERR_PTR(-ENOENT); 4756 } 4757 spin_unlock_bh(&link_idr_lock); 4758 return link; 4759 } 4760 4761 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 4762 { 4763 struct bpf_link *link; 4764 4765 spin_lock_bh(&link_idr_lock); 4766 again: 4767 link = idr_get_next(&link_idr, id); 4768 if (link) { 4769 link = bpf_link_inc_not_zero(link); 4770 if (IS_ERR(link)) { 4771 (*id)++; 4772 goto again; 4773 } 4774 } 4775 spin_unlock_bh(&link_idr_lock); 4776 4777 return link; 4778 } 4779 4780 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 4781 4782 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 4783 { 4784 struct bpf_link *link; 4785 u32 id = attr->link_id; 4786 int fd; 4787 4788 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 4789 return -EINVAL; 4790 4791 if (!capable(CAP_SYS_ADMIN)) 4792 return -EPERM; 4793 4794 link = bpf_link_by_id(id); 4795 if (IS_ERR(link)) 4796 return PTR_ERR(link); 4797 4798 fd = bpf_link_new_fd(link); 4799 if (fd < 0) 4800 bpf_link_put(link); 4801 4802 return fd; 4803 } 4804 4805 DEFINE_MUTEX(bpf_stats_enabled_mutex); 4806 4807 static int bpf_stats_release(struct inode *inode, struct file *file) 4808 { 4809 mutex_lock(&bpf_stats_enabled_mutex); 4810 static_key_slow_dec(&bpf_stats_enabled_key.key); 4811 mutex_unlock(&bpf_stats_enabled_mutex); 4812 return 0; 4813 } 4814 4815 static const struct file_operations bpf_stats_fops = { 4816 .release = bpf_stats_release, 4817 }; 4818 4819 static int bpf_enable_runtime_stats(void) 4820 { 4821 int fd; 4822 4823 mutex_lock(&bpf_stats_enabled_mutex); 4824 4825 /* Set a very high limit to avoid overflow */ 4826 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 4827 mutex_unlock(&bpf_stats_enabled_mutex); 4828 return -EBUSY; 4829 } 4830 4831 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 4832 if (fd >= 0) 4833 static_key_slow_inc(&bpf_stats_enabled_key.key); 4834 4835 mutex_unlock(&bpf_stats_enabled_mutex); 4836 return fd; 4837 } 4838 4839 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 4840 4841 static int bpf_enable_stats(union bpf_attr *attr) 4842 { 4843 4844 if (CHECK_ATTR(BPF_ENABLE_STATS)) 4845 return -EINVAL; 4846 4847 if (!capable(CAP_SYS_ADMIN)) 4848 return -EPERM; 4849 4850 switch (attr->enable_stats.type) { 4851 case BPF_STATS_RUN_TIME: 4852 return bpf_enable_runtime_stats(); 4853 default: 4854 break; 4855 } 4856 return -EINVAL; 4857 } 4858 4859 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 4860 4861 static int bpf_iter_create(union bpf_attr *attr) 4862 { 4863 struct bpf_link *link; 4864 int err; 4865 4866 if (CHECK_ATTR(BPF_ITER_CREATE)) 4867 return -EINVAL; 4868 4869 if (attr->iter_create.flags) 4870 return -EINVAL; 4871 4872 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 4873 if (IS_ERR(link)) 4874 return PTR_ERR(link); 4875 4876 err = bpf_iter_new_fd(link); 4877 bpf_link_put(link); 4878 4879 return err; 4880 } 4881 4882 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 4883 4884 static int bpf_prog_bind_map(union bpf_attr *attr) 4885 { 4886 struct bpf_prog *prog; 4887 struct bpf_map *map; 4888 struct bpf_map **used_maps_old, **used_maps_new; 4889 int i, ret = 0; 4890 4891 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 4892 return -EINVAL; 4893 4894 if (attr->prog_bind_map.flags) 4895 return -EINVAL; 4896 4897 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 4898 if (IS_ERR(prog)) 4899 return PTR_ERR(prog); 4900 4901 map = bpf_map_get(attr->prog_bind_map.map_fd); 4902 if (IS_ERR(map)) { 4903 ret = PTR_ERR(map); 4904 goto out_prog_put; 4905 } 4906 4907 mutex_lock(&prog->aux->used_maps_mutex); 4908 4909 used_maps_old = prog->aux->used_maps; 4910 4911 for (i = 0; i < prog->aux->used_map_cnt; i++) 4912 if (used_maps_old[i] == map) { 4913 bpf_map_put(map); 4914 goto out_unlock; 4915 } 4916 4917 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 4918 sizeof(used_maps_new[0]), 4919 GFP_KERNEL); 4920 if (!used_maps_new) { 4921 ret = -ENOMEM; 4922 goto out_unlock; 4923 } 4924 4925 memcpy(used_maps_new, used_maps_old, 4926 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 4927 used_maps_new[prog->aux->used_map_cnt] = map; 4928 4929 prog->aux->used_map_cnt++; 4930 prog->aux->used_maps = used_maps_new; 4931 4932 kfree(used_maps_old); 4933 4934 out_unlock: 4935 mutex_unlock(&prog->aux->used_maps_mutex); 4936 4937 if (ret) 4938 bpf_map_put(map); 4939 out_prog_put: 4940 bpf_prog_put(prog); 4941 return ret; 4942 } 4943 4944 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) 4945 { 4946 union bpf_attr attr; 4947 bool capable; 4948 int err; 4949 4950 capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; 4951 4952 /* Intent here is for unprivileged_bpf_disabled to block key object 4953 * creation commands for unprivileged users; other actions depend 4954 * of fd availability and access to bpffs, so are dependent on 4955 * object creation success. Capabilities are later verified for 4956 * operations such as load and map create, so even with unprivileged 4957 * BPF disabled, capability checks are still carried out for these 4958 * and other operations. 4959 */ 4960 if (!capable && 4961 (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) 4962 return -EPERM; 4963 4964 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 4965 if (err) 4966 return err; 4967 size = min_t(u32, size, sizeof(attr)); 4968 4969 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 4970 memset(&attr, 0, sizeof(attr)); 4971 if (copy_from_bpfptr(&attr, uattr, size) != 0) 4972 return -EFAULT; 4973 4974 err = security_bpf(cmd, &attr, size); 4975 if (err < 0) 4976 return err; 4977 4978 switch (cmd) { 4979 case BPF_MAP_CREATE: 4980 err = map_create(&attr); 4981 break; 4982 case BPF_MAP_LOOKUP_ELEM: 4983 err = map_lookup_elem(&attr); 4984 break; 4985 case BPF_MAP_UPDATE_ELEM: 4986 err = map_update_elem(&attr, uattr); 4987 break; 4988 case BPF_MAP_DELETE_ELEM: 4989 err = map_delete_elem(&attr, uattr); 4990 break; 4991 case BPF_MAP_GET_NEXT_KEY: 4992 err = map_get_next_key(&attr); 4993 break; 4994 case BPF_MAP_FREEZE: 4995 err = map_freeze(&attr); 4996 break; 4997 case BPF_PROG_LOAD: 4998 err = bpf_prog_load(&attr, uattr); 4999 break; 5000 case BPF_OBJ_PIN: 5001 err = bpf_obj_pin(&attr); 5002 break; 5003 case BPF_OBJ_GET: 5004 err = bpf_obj_get(&attr); 5005 break; 5006 case BPF_PROG_ATTACH: 5007 err = bpf_prog_attach(&attr); 5008 break; 5009 case BPF_PROG_DETACH: 5010 err = bpf_prog_detach(&attr); 5011 break; 5012 case BPF_PROG_QUERY: 5013 err = bpf_prog_query(&attr, uattr.user); 5014 break; 5015 case BPF_PROG_TEST_RUN: 5016 err = bpf_prog_test_run(&attr, uattr.user); 5017 break; 5018 case BPF_PROG_GET_NEXT_ID: 5019 err = bpf_obj_get_next_id(&attr, uattr.user, 5020 &prog_idr, &prog_idr_lock); 5021 break; 5022 case BPF_MAP_GET_NEXT_ID: 5023 err = bpf_obj_get_next_id(&attr, uattr.user, 5024 &map_idr, &map_idr_lock); 5025 break; 5026 case BPF_BTF_GET_NEXT_ID: 5027 err = bpf_obj_get_next_id(&attr, uattr.user, 5028 &btf_idr, &btf_idr_lock); 5029 break; 5030 case BPF_PROG_GET_FD_BY_ID: 5031 err = bpf_prog_get_fd_by_id(&attr); 5032 break; 5033 case BPF_MAP_GET_FD_BY_ID: 5034 err = bpf_map_get_fd_by_id(&attr); 5035 break; 5036 case BPF_OBJ_GET_INFO_BY_FD: 5037 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 5038 break; 5039 case BPF_RAW_TRACEPOINT_OPEN: 5040 err = bpf_raw_tracepoint_open(&attr); 5041 break; 5042 case BPF_BTF_LOAD: 5043 err = bpf_btf_load(&attr, uattr); 5044 break; 5045 case BPF_BTF_GET_FD_BY_ID: 5046 err = bpf_btf_get_fd_by_id(&attr); 5047 break; 5048 case BPF_TASK_FD_QUERY: 5049 err = bpf_task_fd_query(&attr, uattr.user); 5050 break; 5051 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 5052 err = map_lookup_and_delete_elem(&attr); 5053 break; 5054 case BPF_MAP_LOOKUP_BATCH: 5055 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 5056 break; 5057 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 5058 err = bpf_map_do_batch(&attr, uattr.user, 5059 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 5060 break; 5061 case BPF_MAP_UPDATE_BATCH: 5062 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 5063 break; 5064 case BPF_MAP_DELETE_BATCH: 5065 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 5066 break; 5067 case BPF_LINK_CREATE: 5068 err = link_create(&attr, uattr); 5069 break; 5070 case BPF_LINK_UPDATE: 5071 err = link_update(&attr); 5072 break; 5073 case BPF_LINK_GET_FD_BY_ID: 5074 err = bpf_link_get_fd_by_id(&attr); 5075 break; 5076 case BPF_LINK_GET_NEXT_ID: 5077 err = bpf_obj_get_next_id(&attr, uattr.user, 5078 &link_idr, &link_idr_lock); 5079 break; 5080 case BPF_ENABLE_STATS: 5081 err = bpf_enable_stats(&attr); 5082 break; 5083 case BPF_ITER_CREATE: 5084 err = bpf_iter_create(&attr); 5085 break; 5086 case BPF_LINK_DETACH: 5087 err = link_detach(&attr); 5088 break; 5089 case BPF_PROG_BIND_MAP: 5090 err = bpf_prog_bind_map(&attr); 5091 break; 5092 default: 5093 err = -EINVAL; 5094 break; 5095 } 5096 5097 return err; 5098 } 5099 5100 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 5101 { 5102 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 5103 } 5104 5105 static bool syscall_prog_is_valid_access(int off, int size, 5106 enum bpf_access_type type, 5107 const struct bpf_prog *prog, 5108 struct bpf_insn_access_aux *info) 5109 { 5110 if (off < 0 || off >= U16_MAX) 5111 return false; 5112 if (off % size != 0) 5113 return false; 5114 return true; 5115 } 5116 5117 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 5118 { 5119 switch (cmd) { 5120 case BPF_MAP_CREATE: 5121 case BPF_MAP_DELETE_ELEM: 5122 case BPF_MAP_UPDATE_ELEM: 5123 case BPF_MAP_FREEZE: 5124 case BPF_MAP_GET_FD_BY_ID: 5125 case BPF_PROG_LOAD: 5126 case BPF_BTF_LOAD: 5127 case BPF_LINK_CREATE: 5128 case BPF_RAW_TRACEPOINT_OPEN: 5129 break; 5130 default: 5131 return -EINVAL; 5132 } 5133 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 5134 } 5135 5136 5137 /* To shut up -Wmissing-prototypes. 5138 * This function is used by the kernel light skeleton 5139 * to load bpf programs when modules are loaded or during kernel boot. 5140 * See tools/lib/bpf/skel_internal.h 5141 */ 5142 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 5143 5144 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 5145 { 5146 struct bpf_prog * __maybe_unused prog; 5147 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 5148 5149 switch (cmd) { 5150 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 5151 case BPF_PROG_TEST_RUN: 5152 if (attr->test.data_in || attr->test.data_out || 5153 attr->test.ctx_out || attr->test.duration || 5154 attr->test.repeat || attr->test.flags) 5155 return -EINVAL; 5156 5157 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 5158 if (IS_ERR(prog)) 5159 return PTR_ERR(prog); 5160 5161 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 5162 attr->test.ctx_size_in > U16_MAX) { 5163 bpf_prog_put(prog); 5164 return -EINVAL; 5165 } 5166 5167 run_ctx.bpf_cookie = 0; 5168 run_ctx.saved_run_ctx = NULL; 5169 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 5170 /* recursion detected */ 5171 bpf_prog_put(prog); 5172 return -EBUSY; 5173 } 5174 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 5175 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 5176 &run_ctx); 5177 bpf_prog_put(prog); 5178 return 0; 5179 #endif 5180 default: 5181 return ____bpf_sys_bpf(cmd, attr, size); 5182 } 5183 } 5184 EXPORT_SYMBOL(kern_sys_bpf); 5185 5186 static const struct bpf_func_proto bpf_sys_bpf_proto = { 5187 .func = bpf_sys_bpf, 5188 .gpl_only = false, 5189 .ret_type = RET_INTEGER, 5190 .arg1_type = ARG_ANYTHING, 5191 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5192 .arg3_type = ARG_CONST_SIZE, 5193 }; 5194 5195 const struct bpf_func_proto * __weak 5196 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5197 { 5198 return bpf_base_func_proto(func_id); 5199 } 5200 5201 BPF_CALL_1(bpf_sys_close, u32, fd) 5202 { 5203 /* When bpf program calls this helper there should not be 5204 * an fdget() without matching completed fdput(). 5205 * This helper is allowed in the following callchain only: 5206 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 5207 */ 5208 return close_fd(fd); 5209 } 5210 5211 static const struct bpf_func_proto bpf_sys_close_proto = { 5212 .func = bpf_sys_close, 5213 .gpl_only = false, 5214 .ret_type = RET_INTEGER, 5215 .arg1_type = ARG_ANYTHING, 5216 }; 5217 5218 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 5219 { 5220 if (flags) 5221 return -EINVAL; 5222 5223 if (name_sz <= 1 || name[name_sz - 1]) 5224 return -EINVAL; 5225 5226 if (!bpf_dump_raw_ok(current_cred())) 5227 return -EPERM; 5228 5229 *res = kallsyms_lookup_name(name); 5230 return *res ? 0 : -ENOENT; 5231 } 5232 5233 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 5234 .func = bpf_kallsyms_lookup_name, 5235 .gpl_only = false, 5236 .ret_type = RET_INTEGER, 5237 .arg1_type = ARG_PTR_TO_MEM, 5238 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 5239 .arg3_type = ARG_ANYTHING, 5240 .arg4_type = ARG_PTR_TO_LONG, 5241 }; 5242 5243 static const struct bpf_func_proto * 5244 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5245 { 5246 switch (func_id) { 5247 case BPF_FUNC_sys_bpf: 5248 return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; 5249 case BPF_FUNC_btf_find_by_name_kind: 5250 return &bpf_btf_find_by_name_kind_proto; 5251 case BPF_FUNC_sys_close: 5252 return &bpf_sys_close_proto; 5253 case BPF_FUNC_kallsyms_lookup_name: 5254 return &bpf_kallsyms_lookup_name_proto; 5255 default: 5256 return tracing_prog_func_proto(func_id, prog); 5257 } 5258 } 5259 5260 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 5261 .get_func_proto = syscall_prog_func_proto, 5262 .is_valid_access = syscall_prog_is_valid_access, 5263 }; 5264 5265 const struct bpf_prog_ops bpf_syscall_prog_ops = { 5266 .test_run = bpf_prog_test_run_syscall, 5267 }; 5268 5269 #ifdef CONFIG_SYSCTL 5270 static int bpf_stats_handler(struct ctl_table *table, int write, 5271 void *buffer, size_t *lenp, loff_t *ppos) 5272 { 5273 struct static_key *key = (struct static_key *)table->data; 5274 static int saved_val; 5275 int val, ret; 5276 struct ctl_table tmp = { 5277 .data = &val, 5278 .maxlen = sizeof(val), 5279 .mode = table->mode, 5280 .extra1 = SYSCTL_ZERO, 5281 .extra2 = SYSCTL_ONE, 5282 }; 5283 5284 if (write && !capable(CAP_SYS_ADMIN)) 5285 return -EPERM; 5286 5287 mutex_lock(&bpf_stats_enabled_mutex); 5288 val = saved_val; 5289 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 5290 if (write && !ret && val != saved_val) { 5291 if (val) 5292 static_key_slow_inc(key); 5293 else 5294 static_key_slow_dec(key); 5295 saved_val = val; 5296 } 5297 mutex_unlock(&bpf_stats_enabled_mutex); 5298 return ret; 5299 } 5300 5301 void __weak unpriv_ebpf_notify(int new_state) 5302 { 5303 } 5304 5305 static int bpf_unpriv_handler(struct ctl_table *table, int write, 5306 void *buffer, size_t *lenp, loff_t *ppos) 5307 { 5308 int ret, unpriv_enable = *(int *)table->data; 5309 bool locked_state = unpriv_enable == 1; 5310 struct ctl_table tmp = *table; 5311 5312 if (write && !capable(CAP_SYS_ADMIN)) 5313 return -EPERM; 5314 5315 tmp.data = &unpriv_enable; 5316 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 5317 if (write && !ret) { 5318 if (locked_state && unpriv_enable != 1) 5319 return -EPERM; 5320 *(int *)table->data = unpriv_enable; 5321 } 5322 5323 unpriv_ebpf_notify(unpriv_enable); 5324 5325 return ret; 5326 } 5327 5328 static struct ctl_table bpf_syscall_table[] = { 5329 { 5330 .procname = "unprivileged_bpf_disabled", 5331 .data = &sysctl_unprivileged_bpf_disabled, 5332 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 5333 .mode = 0644, 5334 .proc_handler = bpf_unpriv_handler, 5335 .extra1 = SYSCTL_ZERO, 5336 .extra2 = SYSCTL_TWO, 5337 }, 5338 { 5339 .procname = "bpf_stats_enabled", 5340 .data = &bpf_stats_enabled_key.key, 5341 .mode = 0644, 5342 .proc_handler = bpf_stats_handler, 5343 }, 5344 { } 5345 }; 5346 5347 static int __init bpf_syscall_sysctl_init(void) 5348 { 5349 register_sysctl_init("kernel", bpf_syscall_table); 5350 return 0; 5351 } 5352 late_initcall(bpf_syscall_sysctl_init); 5353 #endif /* CONFIG_SYSCTL */ 5354