1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/bpf-cgroup.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/bpf_lirc.h> 8 #include <linux/bpf_verifier.h> 9 #include <linux/bsearch.h> 10 #include <linux/btf.h> 11 #include <linux/syscalls.h> 12 #include <linux/slab.h> 13 #include <linux/sched/signal.h> 14 #include <linux/vmalloc.h> 15 #include <linux/mmzone.h> 16 #include <linux/anon_inodes.h> 17 #include <linux/fdtable.h> 18 #include <linux/file.h> 19 #include <linux/fs.h> 20 #include <linux/license.h> 21 #include <linux/filter.h> 22 #include <linux/kernel.h> 23 #include <linux/idr.h> 24 #include <linux/cred.h> 25 #include <linux/timekeeping.h> 26 #include <linux/ctype.h> 27 #include <linux/nospec.h> 28 #include <linux/audit.h> 29 #include <uapi/linux/btf.h> 30 #include <linux/pgtable.h> 31 #include <linux/bpf_lsm.h> 32 #include <linux/poll.h> 33 #include <linux/sort.h> 34 #include <linux/bpf-netns.h> 35 #include <linux/rcupdate_trace.h> 36 #include <linux/memcontrol.h> 37 #include <linux/trace_events.h> 38 39 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 40 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 41 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 42 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 43 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 44 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 45 IS_FD_HASH(map)) 46 47 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 48 49 DEFINE_PER_CPU(int, bpf_prog_active); 50 static DEFINE_IDR(prog_idr); 51 static DEFINE_SPINLOCK(prog_idr_lock); 52 static DEFINE_IDR(map_idr); 53 static DEFINE_SPINLOCK(map_idr_lock); 54 static DEFINE_IDR(link_idr); 55 static DEFINE_SPINLOCK(link_idr_lock); 56 57 int sysctl_unprivileged_bpf_disabled __read_mostly = 58 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 59 60 static const struct bpf_map_ops * const bpf_map_types[] = { 61 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 62 #define BPF_MAP_TYPE(_id, _ops) \ 63 [_id] = &_ops, 64 #define BPF_LINK_TYPE(_id, _name) 65 #include <linux/bpf_types.h> 66 #undef BPF_PROG_TYPE 67 #undef BPF_MAP_TYPE 68 #undef BPF_LINK_TYPE 69 }; 70 71 /* 72 * If we're handed a bigger struct than we know of, ensure all the unknown bits 73 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 74 * we don't know about yet. 75 * 76 * There is a ToCToU between this function call and the following 77 * copy_from_user() call. However, this is not a concern since this function is 78 * meant to be a future-proofing of bits. 79 */ 80 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 81 size_t expected_size, 82 size_t actual_size) 83 { 84 int res; 85 86 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 87 return -E2BIG; 88 89 if (actual_size <= expected_size) 90 return 0; 91 92 if (uaddr.is_kernel) 93 res = memchr_inv(uaddr.kernel + expected_size, 0, 94 actual_size - expected_size) == NULL; 95 else 96 res = check_zeroed_user(uaddr.user + expected_size, 97 actual_size - expected_size); 98 if (res < 0) 99 return res; 100 return res ? 0 : -E2BIG; 101 } 102 103 const struct bpf_map_ops bpf_map_offload_ops = { 104 .map_meta_equal = bpf_map_meta_equal, 105 .map_alloc = bpf_map_offload_map_alloc, 106 .map_free = bpf_map_offload_map_free, 107 .map_check_btf = map_check_no_btf, 108 .map_mem_usage = bpf_map_offload_map_mem_usage, 109 }; 110 111 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 112 { 113 const struct bpf_map_ops *ops; 114 u32 type = attr->map_type; 115 struct bpf_map *map; 116 int err; 117 118 if (type >= ARRAY_SIZE(bpf_map_types)) 119 return ERR_PTR(-EINVAL); 120 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); 121 ops = bpf_map_types[type]; 122 if (!ops) 123 return ERR_PTR(-EINVAL); 124 125 if (ops->map_alloc_check) { 126 err = ops->map_alloc_check(attr); 127 if (err) 128 return ERR_PTR(err); 129 } 130 if (attr->map_ifindex) 131 ops = &bpf_map_offload_ops; 132 if (!ops->map_mem_usage) 133 return ERR_PTR(-EINVAL); 134 map = ops->map_alloc(attr); 135 if (IS_ERR(map)) 136 return map; 137 map->ops = ops; 138 map->map_type = type; 139 return map; 140 } 141 142 static void bpf_map_write_active_inc(struct bpf_map *map) 143 { 144 atomic64_inc(&map->writecnt); 145 } 146 147 static void bpf_map_write_active_dec(struct bpf_map *map) 148 { 149 atomic64_dec(&map->writecnt); 150 } 151 152 bool bpf_map_write_active(const struct bpf_map *map) 153 { 154 return atomic64_read(&map->writecnt) != 0; 155 } 156 157 static u32 bpf_map_value_size(const struct bpf_map *map) 158 { 159 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 160 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 161 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 162 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 163 return round_up(map->value_size, 8) * num_possible_cpus(); 164 else if (IS_FD_MAP(map)) 165 return sizeof(u32); 166 else 167 return map->value_size; 168 } 169 170 static void maybe_wait_bpf_programs(struct bpf_map *map) 171 { 172 /* Wait for any running BPF programs to complete so that 173 * userspace, when we return to it, knows that all programs 174 * that could be running use the new map value. 175 */ 176 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 177 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 178 synchronize_rcu(); 179 } 180 181 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 182 void *key, void *value, __u64 flags) 183 { 184 int err; 185 186 /* Need to create a kthread, thus must support schedule */ 187 if (bpf_map_is_offloaded(map)) { 188 return bpf_map_offload_update_elem(map, key, value, flags); 189 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 190 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 191 return map->ops->map_update_elem(map, key, value, flags); 192 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 193 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 194 return sock_map_update_elem_sys(map, key, value, flags); 195 } else if (IS_FD_PROG_ARRAY(map)) { 196 return bpf_fd_array_map_update_elem(map, map_file, key, value, 197 flags); 198 } 199 200 bpf_disable_instrumentation(); 201 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 202 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 203 err = bpf_percpu_hash_update(map, key, value, flags); 204 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 205 err = bpf_percpu_array_update(map, key, value, flags); 206 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 207 err = bpf_percpu_cgroup_storage_update(map, key, value, 208 flags); 209 } else if (IS_FD_ARRAY(map)) { 210 rcu_read_lock(); 211 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 212 flags); 213 rcu_read_unlock(); 214 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 215 rcu_read_lock(); 216 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 217 flags); 218 rcu_read_unlock(); 219 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 220 /* rcu_read_lock() is not needed */ 221 err = bpf_fd_reuseport_array_update_elem(map, key, value, 222 flags); 223 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 224 map->map_type == BPF_MAP_TYPE_STACK || 225 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 226 err = map->ops->map_push_elem(map, value, flags); 227 } else { 228 rcu_read_lock(); 229 err = map->ops->map_update_elem(map, key, value, flags); 230 rcu_read_unlock(); 231 } 232 bpf_enable_instrumentation(); 233 maybe_wait_bpf_programs(map); 234 235 return err; 236 } 237 238 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 239 __u64 flags) 240 { 241 void *ptr; 242 int err; 243 244 if (bpf_map_is_offloaded(map)) 245 return bpf_map_offload_lookup_elem(map, key, value); 246 247 bpf_disable_instrumentation(); 248 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 249 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 250 err = bpf_percpu_hash_copy(map, key, value); 251 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 252 err = bpf_percpu_array_copy(map, key, value); 253 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 254 err = bpf_percpu_cgroup_storage_copy(map, key, value); 255 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 256 err = bpf_stackmap_copy(map, key, value); 257 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 258 err = bpf_fd_array_map_lookup_elem(map, key, value); 259 } else if (IS_FD_HASH(map)) { 260 err = bpf_fd_htab_map_lookup_elem(map, key, value); 261 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 262 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 263 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 264 map->map_type == BPF_MAP_TYPE_STACK || 265 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 266 err = map->ops->map_peek_elem(map, value); 267 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 268 /* struct_ops map requires directly updating "value" */ 269 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 270 } else { 271 rcu_read_lock(); 272 if (map->ops->map_lookup_elem_sys_only) 273 ptr = map->ops->map_lookup_elem_sys_only(map, key); 274 else 275 ptr = map->ops->map_lookup_elem(map, key); 276 if (IS_ERR(ptr)) { 277 err = PTR_ERR(ptr); 278 } else if (!ptr) { 279 err = -ENOENT; 280 } else { 281 err = 0; 282 if (flags & BPF_F_LOCK) 283 /* lock 'ptr' and copy everything but lock */ 284 copy_map_value_locked(map, value, ptr, true); 285 else 286 copy_map_value(map, value, ptr); 287 /* mask lock and timer, since value wasn't zero inited */ 288 check_and_init_map_value(map, value); 289 } 290 rcu_read_unlock(); 291 } 292 293 bpf_enable_instrumentation(); 294 maybe_wait_bpf_programs(map); 295 296 return err; 297 } 298 299 /* Please, do not use this function outside from the map creation path 300 * (e.g. in map update path) without taking care of setting the active 301 * memory cgroup (see at bpf_map_kmalloc_node() for example). 302 */ 303 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 304 { 305 /* We really just want to fail instead of triggering OOM killer 306 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 307 * which is used for lower order allocation requests. 308 * 309 * It has been observed that higher order allocation requests done by 310 * vmalloc with __GFP_NORETRY being set might fail due to not trying 311 * to reclaim memory from the page cache, thus we set 312 * __GFP_RETRY_MAYFAIL to avoid such situations. 313 */ 314 315 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 316 unsigned int flags = 0; 317 unsigned long align = 1; 318 void *area; 319 320 if (size >= SIZE_MAX) 321 return NULL; 322 323 /* kmalloc()'ed memory can't be mmap()'ed */ 324 if (mmapable) { 325 BUG_ON(!PAGE_ALIGNED(size)); 326 align = SHMLBA; 327 flags = VM_USERMAP; 328 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 329 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 330 numa_node); 331 if (area != NULL) 332 return area; 333 } 334 335 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 336 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 337 flags, numa_node, __builtin_return_address(0)); 338 } 339 340 void *bpf_map_area_alloc(u64 size, int numa_node) 341 { 342 return __bpf_map_area_alloc(size, numa_node, false); 343 } 344 345 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 346 { 347 return __bpf_map_area_alloc(size, numa_node, true); 348 } 349 350 void bpf_map_area_free(void *area) 351 { 352 kvfree(area); 353 } 354 355 static u32 bpf_map_flags_retain_permanent(u32 flags) 356 { 357 /* Some map creation flags are not tied to the map object but 358 * rather to the map fd instead, so they have no meaning upon 359 * map object inspection since multiple file descriptors with 360 * different (access) properties can exist here. Thus, given 361 * this has zero meaning for the map itself, lets clear these 362 * from here. 363 */ 364 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 365 } 366 367 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 368 { 369 map->map_type = attr->map_type; 370 map->key_size = attr->key_size; 371 map->value_size = attr->value_size; 372 map->max_entries = attr->max_entries; 373 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 374 map->numa_node = bpf_map_attr_numa_node(attr); 375 map->map_extra = attr->map_extra; 376 } 377 378 static int bpf_map_alloc_id(struct bpf_map *map) 379 { 380 int id; 381 382 idr_preload(GFP_KERNEL); 383 spin_lock_bh(&map_idr_lock); 384 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 385 if (id > 0) 386 map->id = id; 387 spin_unlock_bh(&map_idr_lock); 388 idr_preload_end(); 389 390 if (WARN_ON_ONCE(!id)) 391 return -ENOSPC; 392 393 return id > 0 ? 0 : id; 394 } 395 396 void bpf_map_free_id(struct bpf_map *map) 397 { 398 unsigned long flags; 399 400 /* Offloaded maps are removed from the IDR store when their device 401 * disappears - even if someone holds an fd to them they are unusable, 402 * the memory is gone, all ops will fail; they are simply waiting for 403 * refcnt to drop to be freed. 404 */ 405 if (!map->id) 406 return; 407 408 spin_lock_irqsave(&map_idr_lock, flags); 409 410 idr_remove(&map_idr, map->id); 411 map->id = 0; 412 413 spin_unlock_irqrestore(&map_idr_lock, flags); 414 } 415 416 #ifdef CONFIG_MEMCG_KMEM 417 static void bpf_map_save_memcg(struct bpf_map *map) 418 { 419 /* Currently if a map is created by a process belonging to the root 420 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 421 * So we have to check map->objcg for being NULL each time it's 422 * being used. 423 */ 424 if (memcg_bpf_enabled()) 425 map->objcg = get_obj_cgroup_from_current(); 426 } 427 428 static void bpf_map_release_memcg(struct bpf_map *map) 429 { 430 if (map->objcg) 431 obj_cgroup_put(map->objcg); 432 } 433 434 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 435 { 436 if (map->objcg) 437 return get_mem_cgroup_from_objcg(map->objcg); 438 439 return root_mem_cgroup; 440 } 441 442 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 443 int node) 444 { 445 struct mem_cgroup *memcg, *old_memcg; 446 void *ptr; 447 448 memcg = bpf_map_get_memcg(map); 449 old_memcg = set_active_memcg(memcg); 450 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 451 set_active_memcg(old_memcg); 452 mem_cgroup_put(memcg); 453 454 return ptr; 455 } 456 457 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 458 { 459 struct mem_cgroup *memcg, *old_memcg; 460 void *ptr; 461 462 memcg = bpf_map_get_memcg(map); 463 old_memcg = set_active_memcg(memcg); 464 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 465 set_active_memcg(old_memcg); 466 mem_cgroup_put(memcg); 467 468 return ptr; 469 } 470 471 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 472 gfp_t flags) 473 { 474 struct mem_cgroup *memcg, *old_memcg; 475 void *ptr; 476 477 memcg = bpf_map_get_memcg(map); 478 old_memcg = set_active_memcg(memcg); 479 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 480 set_active_memcg(old_memcg); 481 mem_cgroup_put(memcg); 482 483 return ptr; 484 } 485 486 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 487 size_t align, gfp_t flags) 488 { 489 struct mem_cgroup *memcg, *old_memcg; 490 void __percpu *ptr; 491 492 memcg = bpf_map_get_memcg(map); 493 old_memcg = set_active_memcg(memcg); 494 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 495 set_active_memcg(old_memcg); 496 mem_cgroup_put(memcg); 497 498 return ptr; 499 } 500 501 #else 502 static void bpf_map_save_memcg(struct bpf_map *map) 503 { 504 } 505 506 static void bpf_map_release_memcg(struct bpf_map *map) 507 { 508 } 509 #endif 510 511 static int btf_field_cmp(const void *a, const void *b) 512 { 513 const struct btf_field *f1 = a, *f2 = b; 514 515 if (f1->offset < f2->offset) 516 return -1; 517 else if (f1->offset > f2->offset) 518 return 1; 519 return 0; 520 } 521 522 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 523 u32 field_mask) 524 { 525 struct btf_field *field; 526 527 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask)) 528 return NULL; 529 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 530 if (!field || !(field->type & field_mask)) 531 return NULL; 532 return field; 533 } 534 535 void btf_record_free(struct btf_record *rec) 536 { 537 int i; 538 539 if (IS_ERR_OR_NULL(rec)) 540 return; 541 for (i = 0; i < rec->cnt; i++) { 542 switch (rec->fields[i].type) { 543 case BPF_KPTR_UNREF: 544 case BPF_KPTR_REF: 545 if (rec->fields[i].kptr.module) 546 module_put(rec->fields[i].kptr.module); 547 btf_put(rec->fields[i].kptr.btf); 548 break; 549 case BPF_LIST_HEAD: 550 case BPF_LIST_NODE: 551 case BPF_RB_ROOT: 552 case BPF_RB_NODE: 553 case BPF_SPIN_LOCK: 554 case BPF_TIMER: 555 /* Nothing to release */ 556 break; 557 default: 558 WARN_ON_ONCE(1); 559 continue; 560 } 561 } 562 kfree(rec); 563 } 564 565 void bpf_map_free_record(struct bpf_map *map) 566 { 567 btf_record_free(map->record); 568 map->record = NULL; 569 } 570 571 struct btf_record *btf_record_dup(const struct btf_record *rec) 572 { 573 const struct btf_field *fields; 574 struct btf_record *new_rec; 575 int ret, size, i; 576 577 if (IS_ERR_OR_NULL(rec)) 578 return NULL; 579 size = offsetof(struct btf_record, fields[rec->cnt]); 580 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 581 if (!new_rec) 582 return ERR_PTR(-ENOMEM); 583 /* Do a deep copy of the btf_record */ 584 fields = rec->fields; 585 new_rec->cnt = 0; 586 for (i = 0; i < rec->cnt; i++) { 587 switch (fields[i].type) { 588 case BPF_KPTR_UNREF: 589 case BPF_KPTR_REF: 590 btf_get(fields[i].kptr.btf); 591 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 592 ret = -ENXIO; 593 goto free; 594 } 595 break; 596 case BPF_LIST_HEAD: 597 case BPF_LIST_NODE: 598 case BPF_RB_ROOT: 599 case BPF_RB_NODE: 600 case BPF_SPIN_LOCK: 601 case BPF_TIMER: 602 /* Nothing to acquire */ 603 break; 604 default: 605 ret = -EFAULT; 606 WARN_ON_ONCE(1); 607 goto free; 608 } 609 new_rec->cnt++; 610 } 611 return new_rec; 612 free: 613 btf_record_free(new_rec); 614 return ERR_PTR(ret); 615 } 616 617 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 618 { 619 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 620 int size; 621 622 if (!a_has_fields && !b_has_fields) 623 return true; 624 if (a_has_fields != b_has_fields) 625 return false; 626 if (rec_a->cnt != rec_b->cnt) 627 return false; 628 size = offsetof(struct btf_record, fields[rec_a->cnt]); 629 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 630 * members are zeroed out. So memcmp is safe to do without worrying 631 * about padding/unused fields. 632 * 633 * While spin_lock, timer, and kptr have no relation to map BTF, 634 * list_head metadata is specific to map BTF, the btf and value_rec 635 * members in particular. btf is the map BTF, while value_rec points to 636 * btf_record in that map BTF. 637 * 638 * So while by default, we don't rely on the map BTF (which the records 639 * were parsed from) matching for both records, which is not backwards 640 * compatible, in case list_head is part of it, we implicitly rely on 641 * that by way of depending on memcmp succeeding for it. 642 */ 643 return !memcmp(rec_a, rec_b, size); 644 } 645 646 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 647 { 648 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 649 return; 650 bpf_timer_cancel_and_free(obj + rec->timer_off); 651 } 652 653 extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); 654 655 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 656 { 657 const struct btf_field *fields; 658 int i; 659 660 if (IS_ERR_OR_NULL(rec)) 661 return; 662 fields = rec->fields; 663 for (i = 0; i < rec->cnt; i++) { 664 struct btf_struct_meta *pointee_struct_meta; 665 const struct btf_field *field = &fields[i]; 666 void *field_ptr = obj + field->offset; 667 void *xchgd_field; 668 669 switch (fields[i].type) { 670 case BPF_SPIN_LOCK: 671 break; 672 case BPF_TIMER: 673 bpf_timer_cancel_and_free(field_ptr); 674 break; 675 case BPF_KPTR_UNREF: 676 WRITE_ONCE(*(u64 *)field_ptr, 0); 677 break; 678 case BPF_KPTR_REF: 679 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); 680 if (!xchgd_field) 681 break; 682 683 if (!btf_is_kernel(field->kptr.btf)) { 684 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 685 field->kptr.btf_id); 686 WARN_ON_ONCE(!pointee_struct_meta); 687 migrate_disable(); 688 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 689 pointee_struct_meta->record : 690 NULL); 691 migrate_enable(); 692 } else { 693 field->kptr.dtor(xchgd_field); 694 } 695 break; 696 case BPF_LIST_HEAD: 697 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 698 continue; 699 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 700 break; 701 case BPF_RB_ROOT: 702 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 703 continue; 704 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 705 break; 706 case BPF_LIST_NODE: 707 case BPF_RB_NODE: 708 break; 709 default: 710 WARN_ON_ONCE(1); 711 continue; 712 } 713 } 714 } 715 716 /* called from workqueue */ 717 static void bpf_map_free_deferred(struct work_struct *work) 718 { 719 struct bpf_map *map = container_of(work, struct bpf_map, work); 720 struct btf_field_offs *foffs = map->field_offs; 721 struct btf_record *rec = map->record; 722 723 security_bpf_map_free(map); 724 bpf_map_release_memcg(map); 725 /* implementation dependent freeing */ 726 map->ops->map_free(map); 727 /* Delay freeing of field_offs and btf_record for maps, as map_free 728 * callback usually needs access to them. It is better to do it here 729 * than require each callback to do the free itself manually. 730 * 731 * Note that the btf_record stashed in map->inner_map_meta->record was 732 * already freed using the map_free callback for map in map case which 733 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 734 * template bpf_map struct used during verification. 735 */ 736 kfree(foffs); 737 btf_record_free(rec); 738 } 739 740 static void bpf_map_put_uref(struct bpf_map *map) 741 { 742 if (atomic64_dec_and_test(&map->usercnt)) { 743 if (map->ops->map_release_uref) 744 map->ops->map_release_uref(map); 745 } 746 } 747 748 /* decrement map refcnt and schedule it for freeing via workqueue 749 * (underlying map implementation ops->map_free() might sleep) 750 */ 751 void bpf_map_put(struct bpf_map *map) 752 { 753 if (atomic64_dec_and_test(&map->refcnt)) { 754 /* bpf_map_free_id() must be called first */ 755 bpf_map_free_id(map); 756 btf_put(map->btf); 757 INIT_WORK(&map->work, bpf_map_free_deferred); 758 /* Avoid spawning kworkers, since they all might contend 759 * for the same mutex like slab_mutex. 760 */ 761 queue_work(system_unbound_wq, &map->work); 762 } 763 } 764 EXPORT_SYMBOL_GPL(bpf_map_put); 765 766 void bpf_map_put_with_uref(struct bpf_map *map) 767 { 768 bpf_map_put_uref(map); 769 bpf_map_put(map); 770 } 771 772 static int bpf_map_release(struct inode *inode, struct file *filp) 773 { 774 struct bpf_map *map = filp->private_data; 775 776 if (map->ops->map_release) 777 map->ops->map_release(map, filp); 778 779 bpf_map_put_with_uref(map); 780 return 0; 781 } 782 783 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 784 { 785 fmode_t mode = f.file->f_mode; 786 787 /* Our file permissions may have been overridden by global 788 * map permissions facing syscall side. 789 */ 790 if (READ_ONCE(map->frozen)) 791 mode &= ~FMODE_CAN_WRITE; 792 return mode; 793 } 794 795 #ifdef CONFIG_PROC_FS 796 /* Show the memory usage of a bpf map */ 797 static u64 bpf_map_memory_usage(const struct bpf_map *map) 798 { 799 return map->ops->map_mem_usage(map); 800 } 801 802 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 803 { 804 struct bpf_map *map = filp->private_data; 805 u32 type = 0, jited = 0; 806 807 if (map_type_contains_progs(map)) { 808 spin_lock(&map->owner.lock); 809 type = map->owner.type; 810 jited = map->owner.jited; 811 spin_unlock(&map->owner.lock); 812 } 813 814 seq_printf(m, 815 "map_type:\t%u\n" 816 "key_size:\t%u\n" 817 "value_size:\t%u\n" 818 "max_entries:\t%u\n" 819 "map_flags:\t%#x\n" 820 "map_extra:\t%#llx\n" 821 "memlock:\t%llu\n" 822 "map_id:\t%u\n" 823 "frozen:\t%u\n", 824 map->map_type, 825 map->key_size, 826 map->value_size, 827 map->max_entries, 828 map->map_flags, 829 (unsigned long long)map->map_extra, 830 bpf_map_memory_usage(map), 831 map->id, 832 READ_ONCE(map->frozen)); 833 if (type) { 834 seq_printf(m, "owner_prog_type:\t%u\n", type); 835 seq_printf(m, "owner_jited:\t%u\n", jited); 836 } 837 } 838 #endif 839 840 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 841 loff_t *ppos) 842 { 843 /* We need this handler such that alloc_file() enables 844 * f_mode with FMODE_CAN_READ. 845 */ 846 return -EINVAL; 847 } 848 849 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 850 size_t siz, loff_t *ppos) 851 { 852 /* We need this handler such that alloc_file() enables 853 * f_mode with FMODE_CAN_WRITE. 854 */ 855 return -EINVAL; 856 } 857 858 /* called for any extra memory-mapped regions (except initial) */ 859 static void bpf_map_mmap_open(struct vm_area_struct *vma) 860 { 861 struct bpf_map *map = vma->vm_file->private_data; 862 863 if (vma->vm_flags & VM_MAYWRITE) 864 bpf_map_write_active_inc(map); 865 } 866 867 /* called for all unmapped memory region (including initial) */ 868 static void bpf_map_mmap_close(struct vm_area_struct *vma) 869 { 870 struct bpf_map *map = vma->vm_file->private_data; 871 872 if (vma->vm_flags & VM_MAYWRITE) 873 bpf_map_write_active_dec(map); 874 } 875 876 static const struct vm_operations_struct bpf_map_default_vmops = { 877 .open = bpf_map_mmap_open, 878 .close = bpf_map_mmap_close, 879 }; 880 881 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 882 { 883 struct bpf_map *map = filp->private_data; 884 int err; 885 886 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 887 return -ENOTSUPP; 888 889 if (!(vma->vm_flags & VM_SHARED)) 890 return -EINVAL; 891 892 mutex_lock(&map->freeze_mutex); 893 894 if (vma->vm_flags & VM_WRITE) { 895 if (map->frozen) { 896 err = -EPERM; 897 goto out; 898 } 899 /* map is meant to be read-only, so do not allow mapping as 900 * writable, because it's possible to leak a writable page 901 * reference and allows user-space to still modify it after 902 * freezing, while verifier will assume contents do not change 903 */ 904 if (map->map_flags & BPF_F_RDONLY_PROG) { 905 err = -EACCES; 906 goto out; 907 } 908 } 909 910 /* set default open/close callbacks */ 911 vma->vm_ops = &bpf_map_default_vmops; 912 vma->vm_private_data = map; 913 vm_flags_clear(vma, VM_MAYEXEC); 914 if (!(vma->vm_flags & VM_WRITE)) 915 /* disallow re-mapping with PROT_WRITE */ 916 vm_flags_clear(vma, VM_MAYWRITE); 917 918 err = map->ops->map_mmap(map, vma); 919 if (err) 920 goto out; 921 922 if (vma->vm_flags & VM_MAYWRITE) 923 bpf_map_write_active_inc(map); 924 out: 925 mutex_unlock(&map->freeze_mutex); 926 return err; 927 } 928 929 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 930 { 931 struct bpf_map *map = filp->private_data; 932 933 if (map->ops->map_poll) 934 return map->ops->map_poll(map, filp, pts); 935 936 return EPOLLERR; 937 } 938 939 const struct file_operations bpf_map_fops = { 940 #ifdef CONFIG_PROC_FS 941 .show_fdinfo = bpf_map_show_fdinfo, 942 #endif 943 .release = bpf_map_release, 944 .read = bpf_dummy_read, 945 .write = bpf_dummy_write, 946 .mmap = bpf_map_mmap, 947 .poll = bpf_map_poll, 948 }; 949 950 int bpf_map_new_fd(struct bpf_map *map, int flags) 951 { 952 int ret; 953 954 ret = security_bpf_map(map, OPEN_FMODE(flags)); 955 if (ret < 0) 956 return ret; 957 958 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 959 flags | O_CLOEXEC); 960 } 961 962 int bpf_get_file_flag(int flags) 963 { 964 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 965 return -EINVAL; 966 if (flags & BPF_F_RDONLY) 967 return O_RDONLY; 968 if (flags & BPF_F_WRONLY) 969 return O_WRONLY; 970 return O_RDWR; 971 } 972 973 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 974 #define CHECK_ATTR(CMD) \ 975 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 976 sizeof(attr->CMD##_LAST_FIELD), 0, \ 977 sizeof(*attr) - \ 978 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 979 sizeof(attr->CMD##_LAST_FIELD)) != NULL 980 981 /* dst and src must have at least "size" number of bytes. 982 * Return strlen on success and < 0 on error. 983 */ 984 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 985 { 986 const char *end = src + size; 987 const char *orig_src = src; 988 989 memset(dst, 0, size); 990 /* Copy all isalnum(), '_' and '.' chars. */ 991 while (src < end && *src) { 992 if (!isalnum(*src) && 993 *src != '_' && *src != '.') 994 return -EINVAL; 995 *dst++ = *src++; 996 } 997 998 /* No '\0' found in "size" number of bytes */ 999 if (src == end) 1000 return -EINVAL; 1001 1002 return src - orig_src; 1003 } 1004 1005 int map_check_no_btf(const struct bpf_map *map, 1006 const struct btf *btf, 1007 const struct btf_type *key_type, 1008 const struct btf_type *value_type) 1009 { 1010 return -ENOTSUPP; 1011 } 1012 1013 static int map_check_btf(struct bpf_map *map, const struct btf *btf, 1014 u32 btf_key_id, u32 btf_value_id) 1015 { 1016 const struct btf_type *key_type, *value_type; 1017 u32 key_size, value_size; 1018 int ret = 0; 1019 1020 /* Some maps allow key to be unspecified. */ 1021 if (btf_key_id) { 1022 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1023 if (!key_type || key_size != map->key_size) 1024 return -EINVAL; 1025 } else { 1026 key_type = btf_type_by_id(btf, 0); 1027 if (!map->ops->map_check_btf) 1028 return -EINVAL; 1029 } 1030 1031 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1032 if (!value_type || value_size != map->value_size) 1033 return -EINVAL; 1034 1035 map->record = btf_parse_fields(btf, value_type, 1036 BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1037 BPF_RB_ROOT, 1038 map->value_size); 1039 if (!IS_ERR_OR_NULL(map->record)) { 1040 int i; 1041 1042 if (!bpf_capable()) { 1043 ret = -EPERM; 1044 goto free_map_tab; 1045 } 1046 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1047 ret = -EACCES; 1048 goto free_map_tab; 1049 } 1050 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1051 switch (map->record->field_mask & (1 << i)) { 1052 case 0: 1053 continue; 1054 case BPF_SPIN_LOCK: 1055 if (map->map_type != BPF_MAP_TYPE_HASH && 1056 map->map_type != BPF_MAP_TYPE_ARRAY && 1057 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1058 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1059 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1060 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1061 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1062 ret = -EOPNOTSUPP; 1063 goto free_map_tab; 1064 } 1065 break; 1066 case BPF_TIMER: 1067 if (map->map_type != BPF_MAP_TYPE_HASH && 1068 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1069 map->map_type != BPF_MAP_TYPE_ARRAY) { 1070 ret = -EOPNOTSUPP; 1071 goto free_map_tab; 1072 } 1073 break; 1074 case BPF_KPTR_UNREF: 1075 case BPF_KPTR_REF: 1076 if (map->map_type != BPF_MAP_TYPE_HASH && 1077 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1078 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1079 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1080 map->map_type != BPF_MAP_TYPE_ARRAY && 1081 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1082 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1083 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1084 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1085 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1086 ret = -EOPNOTSUPP; 1087 goto free_map_tab; 1088 } 1089 break; 1090 case BPF_LIST_HEAD: 1091 case BPF_RB_ROOT: 1092 if (map->map_type != BPF_MAP_TYPE_HASH && 1093 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1094 map->map_type != BPF_MAP_TYPE_ARRAY) { 1095 ret = -EOPNOTSUPP; 1096 goto free_map_tab; 1097 } 1098 break; 1099 default: 1100 /* Fail if map_type checks are missing for a field type */ 1101 ret = -EOPNOTSUPP; 1102 goto free_map_tab; 1103 } 1104 } 1105 } 1106 1107 ret = btf_check_and_fixup_fields(btf, map->record); 1108 if (ret < 0) 1109 goto free_map_tab; 1110 1111 if (map->ops->map_check_btf) { 1112 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1113 if (ret < 0) 1114 goto free_map_tab; 1115 } 1116 1117 return ret; 1118 free_map_tab: 1119 bpf_map_free_record(map); 1120 return ret; 1121 } 1122 1123 #define BPF_MAP_CREATE_LAST_FIELD map_extra 1124 /* called via syscall */ 1125 static int map_create(union bpf_attr *attr) 1126 { 1127 int numa_node = bpf_map_attr_numa_node(attr); 1128 struct btf_field_offs *foffs; 1129 struct bpf_map *map; 1130 int f_flags; 1131 int err; 1132 1133 err = CHECK_ATTR(BPF_MAP_CREATE); 1134 if (err) 1135 return -EINVAL; 1136 1137 if (attr->btf_vmlinux_value_type_id) { 1138 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1139 attr->btf_key_type_id || attr->btf_value_type_id) 1140 return -EINVAL; 1141 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1142 return -EINVAL; 1143 } 1144 1145 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1146 attr->map_extra != 0) 1147 return -EINVAL; 1148 1149 f_flags = bpf_get_file_flag(attr->map_flags); 1150 if (f_flags < 0) 1151 return f_flags; 1152 1153 if (numa_node != NUMA_NO_NODE && 1154 ((unsigned int)numa_node >= nr_node_ids || 1155 !node_online(numa_node))) 1156 return -EINVAL; 1157 1158 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1159 map = find_and_alloc_map(attr); 1160 if (IS_ERR(map)) 1161 return PTR_ERR(map); 1162 1163 err = bpf_obj_name_cpy(map->name, attr->map_name, 1164 sizeof(attr->map_name)); 1165 if (err < 0) 1166 goto free_map; 1167 1168 atomic64_set(&map->refcnt, 1); 1169 atomic64_set(&map->usercnt, 1); 1170 mutex_init(&map->freeze_mutex); 1171 spin_lock_init(&map->owner.lock); 1172 1173 if (attr->btf_key_type_id || attr->btf_value_type_id || 1174 /* Even the map's value is a kernel's struct, 1175 * the bpf_prog.o must have BTF to begin with 1176 * to figure out the corresponding kernel's 1177 * counter part. Thus, attr->btf_fd has 1178 * to be valid also. 1179 */ 1180 attr->btf_vmlinux_value_type_id) { 1181 struct btf *btf; 1182 1183 btf = btf_get_by_fd(attr->btf_fd); 1184 if (IS_ERR(btf)) { 1185 err = PTR_ERR(btf); 1186 goto free_map; 1187 } 1188 if (btf_is_kernel(btf)) { 1189 btf_put(btf); 1190 err = -EACCES; 1191 goto free_map; 1192 } 1193 map->btf = btf; 1194 1195 if (attr->btf_value_type_id) { 1196 err = map_check_btf(map, btf, attr->btf_key_type_id, 1197 attr->btf_value_type_id); 1198 if (err) 1199 goto free_map; 1200 } 1201 1202 map->btf_key_type_id = attr->btf_key_type_id; 1203 map->btf_value_type_id = attr->btf_value_type_id; 1204 map->btf_vmlinux_value_type_id = 1205 attr->btf_vmlinux_value_type_id; 1206 } 1207 1208 1209 foffs = btf_parse_field_offs(map->record); 1210 if (IS_ERR(foffs)) { 1211 err = PTR_ERR(foffs); 1212 goto free_map; 1213 } 1214 map->field_offs = foffs; 1215 1216 err = security_bpf_map_alloc(map); 1217 if (err) 1218 goto free_map_field_offs; 1219 1220 err = bpf_map_alloc_id(map); 1221 if (err) 1222 goto free_map_sec; 1223 1224 bpf_map_save_memcg(map); 1225 1226 err = bpf_map_new_fd(map, f_flags); 1227 if (err < 0) { 1228 /* failed to allocate fd. 1229 * bpf_map_put_with_uref() is needed because the above 1230 * bpf_map_alloc_id() has published the map 1231 * to the userspace and the userspace may 1232 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1233 */ 1234 bpf_map_put_with_uref(map); 1235 return err; 1236 } 1237 1238 return err; 1239 1240 free_map_sec: 1241 security_bpf_map_free(map); 1242 free_map_field_offs: 1243 kfree(map->field_offs); 1244 free_map: 1245 btf_put(map->btf); 1246 map->ops->map_free(map); 1247 return err; 1248 } 1249 1250 /* if error is returned, fd is released. 1251 * On success caller should complete fd access with matching fdput() 1252 */ 1253 struct bpf_map *__bpf_map_get(struct fd f) 1254 { 1255 if (!f.file) 1256 return ERR_PTR(-EBADF); 1257 if (f.file->f_op != &bpf_map_fops) { 1258 fdput(f); 1259 return ERR_PTR(-EINVAL); 1260 } 1261 1262 return f.file->private_data; 1263 } 1264 1265 void bpf_map_inc(struct bpf_map *map) 1266 { 1267 atomic64_inc(&map->refcnt); 1268 } 1269 EXPORT_SYMBOL_GPL(bpf_map_inc); 1270 1271 void bpf_map_inc_with_uref(struct bpf_map *map) 1272 { 1273 atomic64_inc(&map->refcnt); 1274 atomic64_inc(&map->usercnt); 1275 } 1276 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1277 1278 struct bpf_map *bpf_map_get(u32 ufd) 1279 { 1280 struct fd f = fdget(ufd); 1281 struct bpf_map *map; 1282 1283 map = __bpf_map_get(f); 1284 if (IS_ERR(map)) 1285 return map; 1286 1287 bpf_map_inc(map); 1288 fdput(f); 1289 1290 return map; 1291 } 1292 EXPORT_SYMBOL(bpf_map_get); 1293 1294 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1295 { 1296 struct fd f = fdget(ufd); 1297 struct bpf_map *map; 1298 1299 map = __bpf_map_get(f); 1300 if (IS_ERR(map)) 1301 return map; 1302 1303 bpf_map_inc_with_uref(map); 1304 fdput(f); 1305 1306 return map; 1307 } 1308 1309 /* map_idr_lock should have been held or the map should have been 1310 * protected by rcu read lock. 1311 */ 1312 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1313 { 1314 int refold; 1315 1316 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1317 if (!refold) 1318 return ERR_PTR(-ENOENT); 1319 if (uref) 1320 atomic64_inc(&map->usercnt); 1321 1322 return map; 1323 } 1324 1325 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1326 { 1327 spin_lock_bh(&map_idr_lock); 1328 map = __bpf_map_inc_not_zero(map, false); 1329 spin_unlock_bh(&map_idr_lock); 1330 1331 return map; 1332 } 1333 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1334 1335 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 1336 { 1337 return -ENOTSUPP; 1338 } 1339 1340 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1341 { 1342 if (key_size) 1343 return vmemdup_user(ukey, key_size); 1344 1345 if (ukey) 1346 return ERR_PTR(-EINVAL); 1347 1348 return NULL; 1349 } 1350 1351 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1352 { 1353 if (key_size) 1354 return kvmemdup_bpfptr(ukey, key_size); 1355 1356 if (!bpfptr_is_null(ukey)) 1357 return ERR_PTR(-EINVAL); 1358 1359 return NULL; 1360 } 1361 1362 /* last field in 'union bpf_attr' used by this command */ 1363 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1364 1365 static int map_lookup_elem(union bpf_attr *attr) 1366 { 1367 void __user *ukey = u64_to_user_ptr(attr->key); 1368 void __user *uvalue = u64_to_user_ptr(attr->value); 1369 int ufd = attr->map_fd; 1370 struct bpf_map *map; 1371 void *key, *value; 1372 u32 value_size; 1373 struct fd f; 1374 int err; 1375 1376 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1377 return -EINVAL; 1378 1379 if (attr->flags & ~BPF_F_LOCK) 1380 return -EINVAL; 1381 1382 f = fdget(ufd); 1383 map = __bpf_map_get(f); 1384 if (IS_ERR(map)) 1385 return PTR_ERR(map); 1386 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1387 err = -EPERM; 1388 goto err_put; 1389 } 1390 1391 if ((attr->flags & BPF_F_LOCK) && 1392 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1393 err = -EINVAL; 1394 goto err_put; 1395 } 1396 1397 key = __bpf_copy_key(ukey, map->key_size); 1398 if (IS_ERR(key)) { 1399 err = PTR_ERR(key); 1400 goto err_put; 1401 } 1402 1403 value_size = bpf_map_value_size(map); 1404 1405 err = -ENOMEM; 1406 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1407 if (!value) 1408 goto free_key; 1409 1410 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1411 if (copy_from_user(value, uvalue, value_size)) 1412 err = -EFAULT; 1413 else 1414 err = bpf_map_copy_value(map, key, value, attr->flags); 1415 goto free_value; 1416 } 1417 1418 err = bpf_map_copy_value(map, key, value, attr->flags); 1419 if (err) 1420 goto free_value; 1421 1422 err = -EFAULT; 1423 if (copy_to_user(uvalue, value, value_size) != 0) 1424 goto free_value; 1425 1426 err = 0; 1427 1428 free_value: 1429 kvfree(value); 1430 free_key: 1431 kvfree(key); 1432 err_put: 1433 fdput(f); 1434 return err; 1435 } 1436 1437 1438 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1439 1440 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1441 { 1442 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1443 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1444 int ufd = attr->map_fd; 1445 struct bpf_map *map; 1446 void *key, *value; 1447 u32 value_size; 1448 struct fd f; 1449 int err; 1450 1451 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1452 return -EINVAL; 1453 1454 f = fdget(ufd); 1455 map = __bpf_map_get(f); 1456 if (IS_ERR(map)) 1457 return PTR_ERR(map); 1458 bpf_map_write_active_inc(map); 1459 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1460 err = -EPERM; 1461 goto err_put; 1462 } 1463 1464 if ((attr->flags & BPF_F_LOCK) && 1465 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1466 err = -EINVAL; 1467 goto err_put; 1468 } 1469 1470 key = ___bpf_copy_key(ukey, map->key_size); 1471 if (IS_ERR(key)) { 1472 err = PTR_ERR(key); 1473 goto err_put; 1474 } 1475 1476 value_size = bpf_map_value_size(map); 1477 value = kvmemdup_bpfptr(uvalue, value_size); 1478 if (IS_ERR(value)) { 1479 err = PTR_ERR(value); 1480 goto free_key; 1481 } 1482 1483 err = bpf_map_update_value(map, f.file, key, value, attr->flags); 1484 1485 kvfree(value); 1486 free_key: 1487 kvfree(key); 1488 err_put: 1489 bpf_map_write_active_dec(map); 1490 fdput(f); 1491 return err; 1492 } 1493 1494 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1495 1496 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1497 { 1498 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1499 int ufd = attr->map_fd; 1500 struct bpf_map *map; 1501 struct fd f; 1502 void *key; 1503 int err; 1504 1505 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1506 return -EINVAL; 1507 1508 f = fdget(ufd); 1509 map = __bpf_map_get(f); 1510 if (IS_ERR(map)) 1511 return PTR_ERR(map); 1512 bpf_map_write_active_inc(map); 1513 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1514 err = -EPERM; 1515 goto err_put; 1516 } 1517 1518 key = ___bpf_copy_key(ukey, map->key_size); 1519 if (IS_ERR(key)) { 1520 err = PTR_ERR(key); 1521 goto err_put; 1522 } 1523 1524 if (bpf_map_is_offloaded(map)) { 1525 err = bpf_map_offload_delete_elem(map, key); 1526 goto out; 1527 } else if (IS_FD_PROG_ARRAY(map) || 1528 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1529 /* These maps require sleepable context */ 1530 err = map->ops->map_delete_elem(map, key); 1531 goto out; 1532 } 1533 1534 bpf_disable_instrumentation(); 1535 rcu_read_lock(); 1536 err = map->ops->map_delete_elem(map, key); 1537 rcu_read_unlock(); 1538 bpf_enable_instrumentation(); 1539 maybe_wait_bpf_programs(map); 1540 out: 1541 kvfree(key); 1542 err_put: 1543 bpf_map_write_active_dec(map); 1544 fdput(f); 1545 return err; 1546 } 1547 1548 /* last field in 'union bpf_attr' used by this command */ 1549 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1550 1551 static int map_get_next_key(union bpf_attr *attr) 1552 { 1553 void __user *ukey = u64_to_user_ptr(attr->key); 1554 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1555 int ufd = attr->map_fd; 1556 struct bpf_map *map; 1557 void *key, *next_key; 1558 struct fd f; 1559 int err; 1560 1561 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1562 return -EINVAL; 1563 1564 f = fdget(ufd); 1565 map = __bpf_map_get(f); 1566 if (IS_ERR(map)) 1567 return PTR_ERR(map); 1568 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1569 err = -EPERM; 1570 goto err_put; 1571 } 1572 1573 if (ukey) { 1574 key = __bpf_copy_key(ukey, map->key_size); 1575 if (IS_ERR(key)) { 1576 err = PTR_ERR(key); 1577 goto err_put; 1578 } 1579 } else { 1580 key = NULL; 1581 } 1582 1583 err = -ENOMEM; 1584 next_key = kvmalloc(map->key_size, GFP_USER); 1585 if (!next_key) 1586 goto free_key; 1587 1588 if (bpf_map_is_offloaded(map)) { 1589 err = bpf_map_offload_get_next_key(map, key, next_key); 1590 goto out; 1591 } 1592 1593 rcu_read_lock(); 1594 err = map->ops->map_get_next_key(map, key, next_key); 1595 rcu_read_unlock(); 1596 out: 1597 if (err) 1598 goto free_next_key; 1599 1600 err = -EFAULT; 1601 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1602 goto free_next_key; 1603 1604 err = 0; 1605 1606 free_next_key: 1607 kvfree(next_key); 1608 free_key: 1609 kvfree(key); 1610 err_put: 1611 fdput(f); 1612 return err; 1613 } 1614 1615 int generic_map_delete_batch(struct bpf_map *map, 1616 const union bpf_attr *attr, 1617 union bpf_attr __user *uattr) 1618 { 1619 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1620 u32 cp, max_count; 1621 int err = 0; 1622 void *key; 1623 1624 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1625 return -EINVAL; 1626 1627 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1628 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1629 return -EINVAL; 1630 } 1631 1632 max_count = attr->batch.count; 1633 if (!max_count) 1634 return 0; 1635 1636 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1637 if (!key) 1638 return -ENOMEM; 1639 1640 for (cp = 0; cp < max_count; cp++) { 1641 err = -EFAULT; 1642 if (copy_from_user(key, keys + cp * map->key_size, 1643 map->key_size)) 1644 break; 1645 1646 if (bpf_map_is_offloaded(map)) { 1647 err = bpf_map_offload_delete_elem(map, key); 1648 break; 1649 } 1650 1651 bpf_disable_instrumentation(); 1652 rcu_read_lock(); 1653 err = map->ops->map_delete_elem(map, key); 1654 rcu_read_unlock(); 1655 bpf_enable_instrumentation(); 1656 if (err) 1657 break; 1658 cond_resched(); 1659 } 1660 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1661 err = -EFAULT; 1662 1663 kvfree(key); 1664 1665 maybe_wait_bpf_programs(map); 1666 return err; 1667 } 1668 1669 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 1670 const union bpf_attr *attr, 1671 union bpf_attr __user *uattr) 1672 { 1673 void __user *values = u64_to_user_ptr(attr->batch.values); 1674 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1675 u32 value_size, cp, max_count; 1676 void *key, *value; 1677 int err = 0; 1678 1679 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1680 return -EINVAL; 1681 1682 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1683 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1684 return -EINVAL; 1685 } 1686 1687 value_size = bpf_map_value_size(map); 1688 1689 max_count = attr->batch.count; 1690 if (!max_count) 1691 return 0; 1692 1693 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1694 if (!key) 1695 return -ENOMEM; 1696 1697 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1698 if (!value) { 1699 kvfree(key); 1700 return -ENOMEM; 1701 } 1702 1703 for (cp = 0; cp < max_count; cp++) { 1704 err = -EFAULT; 1705 if (copy_from_user(key, keys + cp * map->key_size, 1706 map->key_size) || 1707 copy_from_user(value, values + cp * value_size, value_size)) 1708 break; 1709 1710 err = bpf_map_update_value(map, map_file, key, value, 1711 attr->batch.elem_flags); 1712 1713 if (err) 1714 break; 1715 cond_resched(); 1716 } 1717 1718 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1719 err = -EFAULT; 1720 1721 kvfree(value); 1722 kvfree(key); 1723 return err; 1724 } 1725 1726 #define MAP_LOOKUP_RETRIES 3 1727 1728 int generic_map_lookup_batch(struct bpf_map *map, 1729 const union bpf_attr *attr, 1730 union bpf_attr __user *uattr) 1731 { 1732 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 1733 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1734 void __user *values = u64_to_user_ptr(attr->batch.values); 1735 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1736 void *buf, *buf_prevkey, *prev_key, *key, *value; 1737 int err, retry = MAP_LOOKUP_RETRIES; 1738 u32 value_size, cp, max_count; 1739 1740 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1741 return -EINVAL; 1742 1743 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1744 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 1745 return -EINVAL; 1746 1747 value_size = bpf_map_value_size(map); 1748 1749 max_count = attr->batch.count; 1750 if (!max_count) 1751 return 0; 1752 1753 if (put_user(0, &uattr->batch.count)) 1754 return -EFAULT; 1755 1756 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1757 if (!buf_prevkey) 1758 return -ENOMEM; 1759 1760 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 1761 if (!buf) { 1762 kvfree(buf_prevkey); 1763 return -ENOMEM; 1764 } 1765 1766 err = -EFAULT; 1767 prev_key = NULL; 1768 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 1769 goto free_buf; 1770 key = buf; 1771 value = key + map->key_size; 1772 if (ubatch) 1773 prev_key = buf_prevkey; 1774 1775 for (cp = 0; cp < max_count;) { 1776 rcu_read_lock(); 1777 err = map->ops->map_get_next_key(map, prev_key, key); 1778 rcu_read_unlock(); 1779 if (err) 1780 break; 1781 err = bpf_map_copy_value(map, key, value, 1782 attr->batch.elem_flags); 1783 1784 if (err == -ENOENT) { 1785 if (retry) { 1786 retry--; 1787 continue; 1788 } 1789 err = -EINTR; 1790 break; 1791 } 1792 1793 if (err) 1794 goto free_buf; 1795 1796 if (copy_to_user(keys + cp * map->key_size, key, 1797 map->key_size)) { 1798 err = -EFAULT; 1799 goto free_buf; 1800 } 1801 if (copy_to_user(values + cp * value_size, value, value_size)) { 1802 err = -EFAULT; 1803 goto free_buf; 1804 } 1805 1806 if (!prev_key) 1807 prev_key = buf_prevkey; 1808 1809 swap(prev_key, key); 1810 retry = MAP_LOOKUP_RETRIES; 1811 cp++; 1812 cond_resched(); 1813 } 1814 1815 if (err == -EFAULT) 1816 goto free_buf; 1817 1818 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 1819 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 1820 err = -EFAULT; 1821 1822 free_buf: 1823 kvfree(buf_prevkey); 1824 kvfree(buf); 1825 return err; 1826 } 1827 1828 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 1829 1830 static int map_lookup_and_delete_elem(union bpf_attr *attr) 1831 { 1832 void __user *ukey = u64_to_user_ptr(attr->key); 1833 void __user *uvalue = u64_to_user_ptr(attr->value); 1834 int ufd = attr->map_fd; 1835 struct bpf_map *map; 1836 void *key, *value; 1837 u32 value_size; 1838 struct fd f; 1839 int err; 1840 1841 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 1842 return -EINVAL; 1843 1844 if (attr->flags & ~BPF_F_LOCK) 1845 return -EINVAL; 1846 1847 f = fdget(ufd); 1848 map = __bpf_map_get(f); 1849 if (IS_ERR(map)) 1850 return PTR_ERR(map); 1851 bpf_map_write_active_inc(map); 1852 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 1853 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1854 err = -EPERM; 1855 goto err_put; 1856 } 1857 1858 if (attr->flags && 1859 (map->map_type == BPF_MAP_TYPE_QUEUE || 1860 map->map_type == BPF_MAP_TYPE_STACK)) { 1861 err = -EINVAL; 1862 goto err_put; 1863 } 1864 1865 if ((attr->flags & BPF_F_LOCK) && 1866 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1867 err = -EINVAL; 1868 goto err_put; 1869 } 1870 1871 key = __bpf_copy_key(ukey, map->key_size); 1872 if (IS_ERR(key)) { 1873 err = PTR_ERR(key); 1874 goto err_put; 1875 } 1876 1877 value_size = bpf_map_value_size(map); 1878 1879 err = -ENOMEM; 1880 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1881 if (!value) 1882 goto free_key; 1883 1884 err = -ENOTSUPP; 1885 if (map->map_type == BPF_MAP_TYPE_QUEUE || 1886 map->map_type == BPF_MAP_TYPE_STACK) { 1887 err = map->ops->map_pop_elem(map, value); 1888 } else if (map->map_type == BPF_MAP_TYPE_HASH || 1889 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1890 map->map_type == BPF_MAP_TYPE_LRU_HASH || 1891 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 1892 if (!bpf_map_is_offloaded(map)) { 1893 bpf_disable_instrumentation(); 1894 rcu_read_lock(); 1895 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 1896 rcu_read_unlock(); 1897 bpf_enable_instrumentation(); 1898 } 1899 } 1900 1901 if (err) 1902 goto free_value; 1903 1904 if (copy_to_user(uvalue, value, value_size) != 0) { 1905 err = -EFAULT; 1906 goto free_value; 1907 } 1908 1909 err = 0; 1910 1911 free_value: 1912 kvfree(value); 1913 free_key: 1914 kvfree(key); 1915 err_put: 1916 bpf_map_write_active_dec(map); 1917 fdput(f); 1918 return err; 1919 } 1920 1921 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 1922 1923 static int map_freeze(const union bpf_attr *attr) 1924 { 1925 int err = 0, ufd = attr->map_fd; 1926 struct bpf_map *map; 1927 struct fd f; 1928 1929 if (CHECK_ATTR(BPF_MAP_FREEZE)) 1930 return -EINVAL; 1931 1932 f = fdget(ufd); 1933 map = __bpf_map_get(f); 1934 if (IS_ERR(map)) 1935 return PTR_ERR(map); 1936 1937 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) { 1938 fdput(f); 1939 return -ENOTSUPP; 1940 } 1941 1942 mutex_lock(&map->freeze_mutex); 1943 if (bpf_map_write_active(map)) { 1944 err = -EBUSY; 1945 goto err_put; 1946 } 1947 if (READ_ONCE(map->frozen)) { 1948 err = -EBUSY; 1949 goto err_put; 1950 } 1951 if (!bpf_capable()) { 1952 err = -EPERM; 1953 goto err_put; 1954 } 1955 1956 WRITE_ONCE(map->frozen, true); 1957 err_put: 1958 mutex_unlock(&map->freeze_mutex); 1959 fdput(f); 1960 return err; 1961 } 1962 1963 static const struct bpf_prog_ops * const bpf_prog_types[] = { 1964 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 1965 [_id] = & _name ## _prog_ops, 1966 #define BPF_MAP_TYPE(_id, _ops) 1967 #define BPF_LINK_TYPE(_id, _name) 1968 #include <linux/bpf_types.h> 1969 #undef BPF_PROG_TYPE 1970 #undef BPF_MAP_TYPE 1971 #undef BPF_LINK_TYPE 1972 }; 1973 1974 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 1975 { 1976 const struct bpf_prog_ops *ops; 1977 1978 if (type >= ARRAY_SIZE(bpf_prog_types)) 1979 return -EINVAL; 1980 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 1981 ops = bpf_prog_types[type]; 1982 if (!ops) 1983 return -EINVAL; 1984 1985 if (!bpf_prog_is_offloaded(prog->aux)) 1986 prog->aux->ops = ops; 1987 else 1988 prog->aux->ops = &bpf_offload_prog_ops; 1989 prog->type = type; 1990 return 0; 1991 } 1992 1993 enum bpf_audit { 1994 BPF_AUDIT_LOAD, 1995 BPF_AUDIT_UNLOAD, 1996 BPF_AUDIT_MAX, 1997 }; 1998 1999 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 2000 [BPF_AUDIT_LOAD] = "LOAD", 2001 [BPF_AUDIT_UNLOAD] = "UNLOAD", 2002 }; 2003 2004 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 2005 { 2006 struct audit_context *ctx = NULL; 2007 struct audit_buffer *ab; 2008 2009 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 2010 return; 2011 if (audit_enabled == AUDIT_OFF) 2012 return; 2013 if (!in_irq() && !irqs_disabled()) 2014 ctx = audit_context(); 2015 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 2016 if (unlikely(!ab)) 2017 return; 2018 audit_log_format(ab, "prog-id=%u op=%s", 2019 prog->aux->id, bpf_audit_str[op]); 2020 audit_log_end(ab); 2021 } 2022 2023 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2024 { 2025 int id; 2026 2027 idr_preload(GFP_KERNEL); 2028 spin_lock_bh(&prog_idr_lock); 2029 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2030 if (id > 0) 2031 prog->aux->id = id; 2032 spin_unlock_bh(&prog_idr_lock); 2033 idr_preload_end(); 2034 2035 /* id is in [1, INT_MAX) */ 2036 if (WARN_ON_ONCE(!id)) 2037 return -ENOSPC; 2038 2039 return id > 0 ? 0 : id; 2040 } 2041 2042 void bpf_prog_free_id(struct bpf_prog *prog) 2043 { 2044 unsigned long flags; 2045 2046 /* cBPF to eBPF migrations are currently not in the idr store. 2047 * Offloaded programs are removed from the store when their device 2048 * disappears - even if someone grabs an fd to them they are unusable, 2049 * simply waiting for refcnt to drop to be freed. 2050 */ 2051 if (!prog->aux->id) 2052 return; 2053 2054 spin_lock_irqsave(&prog_idr_lock, flags); 2055 idr_remove(&prog_idr, prog->aux->id); 2056 prog->aux->id = 0; 2057 spin_unlock_irqrestore(&prog_idr_lock, flags); 2058 } 2059 2060 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2061 { 2062 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2063 2064 kvfree(aux->func_info); 2065 kfree(aux->func_info_aux); 2066 free_uid(aux->user); 2067 security_bpf_prog_free(aux); 2068 bpf_prog_free(aux->prog); 2069 } 2070 2071 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2072 { 2073 bpf_prog_kallsyms_del_all(prog); 2074 btf_put(prog->aux->btf); 2075 module_put(prog->aux->mod); 2076 kvfree(prog->aux->jited_linfo); 2077 kvfree(prog->aux->linfo); 2078 kfree(prog->aux->kfunc_tab); 2079 if (prog->aux->attach_btf) 2080 btf_put(prog->aux->attach_btf); 2081 2082 if (deferred) { 2083 if (prog->aux->sleepable) 2084 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2085 else 2086 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2087 } else { 2088 __bpf_prog_put_rcu(&prog->aux->rcu); 2089 } 2090 } 2091 2092 static void bpf_prog_put_deferred(struct work_struct *work) 2093 { 2094 struct bpf_prog_aux *aux; 2095 struct bpf_prog *prog; 2096 2097 aux = container_of(work, struct bpf_prog_aux, work); 2098 prog = aux->prog; 2099 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2100 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2101 bpf_prog_free_id(prog); 2102 __bpf_prog_put_noref(prog, true); 2103 } 2104 2105 static void __bpf_prog_put(struct bpf_prog *prog) 2106 { 2107 struct bpf_prog_aux *aux = prog->aux; 2108 2109 if (atomic64_dec_and_test(&aux->refcnt)) { 2110 if (in_irq() || irqs_disabled()) { 2111 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2112 schedule_work(&aux->work); 2113 } else { 2114 bpf_prog_put_deferred(&aux->work); 2115 } 2116 } 2117 } 2118 2119 void bpf_prog_put(struct bpf_prog *prog) 2120 { 2121 __bpf_prog_put(prog); 2122 } 2123 EXPORT_SYMBOL_GPL(bpf_prog_put); 2124 2125 static int bpf_prog_release(struct inode *inode, struct file *filp) 2126 { 2127 struct bpf_prog *prog = filp->private_data; 2128 2129 bpf_prog_put(prog); 2130 return 0; 2131 } 2132 2133 struct bpf_prog_kstats { 2134 u64 nsecs; 2135 u64 cnt; 2136 u64 misses; 2137 }; 2138 2139 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2140 { 2141 struct bpf_prog_stats *stats; 2142 unsigned int flags; 2143 2144 stats = this_cpu_ptr(prog->stats); 2145 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2146 u64_stats_inc(&stats->misses); 2147 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2148 } 2149 2150 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2151 struct bpf_prog_kstats *stats) 2152 { 2153 u64 nsecs = 0, cnt = 0, misses = 0; 2154 int cpu; 2155 2156 for_each_possible_cpu(cpu) { 2157 const struct bpf_prog_stats *st; 2158 unsigned int start; 2159 u64 tnsecs, tcnt, tmisses; 2160 2161 st = per_cpu_ptr(prog->stats, cpu); 2162 do { 2163 start = u64_stats_fetch_begin(&st->syncp); 2164 tnsecs = u64_stats_read(&st->nsecs); 2165 tcnt = u64_stats_read(&st->cnt); 2166 tmisses = u64_stats_read(&st->misses); 2167 } while (u64_stats_fetch_retry(&st->syncp, start)); 2168 nsecs += tnsecs; 2169 cnt += tcnt; 2170 misses += tmisses; 2171 } 2172 stats->nsecs = nsecs; 2173 stats->cnt = cnt; 2174 stats->misses = misses; 2175 } 2176 2177 #ifdef CONFIG_PROC_FS 2178 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2179 { 2180 const struct bpf_prog *prog = filp->private_data; 2181 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2182 struct bpf_prog_kstats stats; 2183 2184 bpf_prog_get_stats(prog, &stats); 2185 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2186 seq_printf(m, 2187 "prog_type:\t%u\n" 2188 "prog_jited:\t%u\n" 2189 "prog_tag:\t%s\n" 2190 "memlock:\t%llu\n" 2191 "prog_id:\t%u\n" 2192 "run_time_ns:\t%llu\n" 2193 "run_cnt:\t%llu\n" 2194 "recursion_misses:\t%llu\n" 2195 "verified_insns:\t%u\n", 2196 prog->type, 2197 prog->jited, 2198 prog_tag, 2199 prog->pages * 1ULL << PAGE_SHIFT, 2200 prog->aux->id, 2201 stats.nsecs, 2202 stats.cnt, 2203 stats.misses, 2204 prog->aux->verified_insns); 2205 } 2206 #endif 2207 2208 const struct file_operations bpf_prog_fops = { 2209 #ifdef CONFIG_PROC_FS 2210 .show_fdinfo = bpf_prog_show_fdinfo, 2211 #endif 2212 .release = bpf_prog_release, 2213 .read = bpf_dummy_read, 2214 .write = bpf_dummy_write, 2215 }; 2216 2217 int bpf_prog_new_fd(struct bpf_prog *prog) 2218 { 2219 int ret; 2220 2221 ret = security_bpf_prog(prog); 2222 if (ret < 0) 2223 return ret; 2224 2225 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2226 O_RDWR | O_CLOEXEC); 2227 } 2228 2229 static struct bpf_prog *____bpf_prog_get(struct fd f) 2230 { 2231 if (!f.file) 2232 return ERR_PTR(-EBADF); 2233 if (f.file->f_op != &bpf_prog_fops) { 2234 fdput(f); 2235 return ERR_PTR(-EINVAL); 2236 } 2237 2238 return f.file->private_data; 2239 } 2240 2241 void bpf_prog_add(struct bpf_prog *prog, int i) 2242 { 2243 atomic64_add(i, &prog->aux->refcnt); 2244 } 2245 EXPORT_SYMBOL_GPL(bpf_prog_add); 2246 2247 void bpf_prog_sub(struct bpf_prog *prog, int i) 2248 { 2249 /* Only to be used for undoing previous bpf_prog_add() in some 2250 * error path. We still know that another entity in our call 2251 * path holds a reference to the program, thus atomic_sub() can 2252 * be safely used in such cases! 2253 */ 2254 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2255 } 2256 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2257 2258 void bpf_prog_inc(struct bpf_prog *prog) 2259 { 2260 atomic64_inc(&prog->aux->refcnt); 2261 } 2262 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2263 2264 /* prog_idr_lock should have been held */ 2265 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2266 { 2267 int refold; 2268 2269 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2270 2271 if (!refold) 2272 return ERR_PTR(-ENOENT); 2273 2274 return prog; 2275 } 2276 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2277 2278 bool bpf_prog_get_ok(struct bpf_prog *prog, 2279 enum bpf_prog_type *attach_type, bool attach_drv) 2280 { 2281 /* not an attachment, just a refcount inc, always allow */ 2282 if (!attach_type) 2283 return true; 2284 2285 if (prog->type != *attach_type) 2286 return false; 2287 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2288 return false; 2289 2290 return true; 2291 } 2292 2293 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2294 bool attach_drv) 2295 { 2296 struct fd f = fdget(ufd); 2297 struct bpf_prog *prog; 2298 2299 prog = ____bpf_prog_get(f); 2300 if (IS_ERR(prog)) 2301 return prog; 2302 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 2303 prog = ERR_PTR(-EINVAL); 2304 goto out; 2305 } 2306 2307 bpf_prog_inc(prog); 2308 out: 2309 fdput(f); 2310 return prog; 2311 } 2312 2313 struct bpf_prog *bpf_prog_get(u32 ufd) 2314 { 2315 return __bpf_prog_get(ufd, NULL, false); 2316 } 2317 2318 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2319 bool attach_drv) 2320 { 2321 return __bpf_prog_get(ufd, &type, attach_drv); 2322 } 2323 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2324 2325 /* Initially all BPF programs could be loaded w/o specifying 2326 * expected_attach_type. Later for some of them specifying expected_attach_type 2327 * at load time became required so that program could be validated properly. 2328 * Programs of types that are allowed to be loaded both w/ and w/o (for 2329 * backward compatibility) expected_attach_type, should have the default attach 2330 * type assigned to expected_attach_type for the latter case, so that it can be 2331 * validated later at attach time. 2332 * 2333 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2334 * prog type requires it but has some attach types that have to be backward 2335 * compatible. 2336 */ 2337 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2338 { 2339 switch (attr->prog_type) { 2340 case BPF_PROG_TYPE_CGROUP_SOCK: 2341 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2342 * exist so checking for non-zero is the way to go here. 2343 */ 2344 if (!attr->expected_attach_type) 2345 attr->expected_attach_type = 2346 BPF_CGROUP_INET_SOCK_CREATE; 2347 break; 2348 case BPF_PROG_TYPE_SK_REUSEPORT: 2349 if (!attr->expected_attach_type) 2350 attr->expected_attach_type = 2351 BPF_SK_REUSEPORT_SELECT; 2352 break; 2353 } 2354 } 2355 2356 static int 2357 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2358 enum bpf_attach_type expected_attach_type, 2359 struct btf *attach_btf, u32 btf_id, 2360 struct bpf_prog *dst_prog) 2361 { 2362 if (btf_id) { 2363 if (btf_id > BTF_MAX_TYPE) 2364 return -EINVAL; 2365 2366 if (!attach_btf && !dst_prog) 2367 return -EINVAL; 2368 2369 switch (prog_type) { 2370 case BPF_PROG_TYPE_TRACING: 2371 case BPF_PROG_TYPE_LSM: 2372 case BPF_PROG_TYPE_STRUCT_OPS: 2373 case BPF_PROG_TYPE_EXT: 2374 break; 2375 default: 2376 return -EINVAL; 2377 } 2378 } 2379 2380 if (attach_btf && (!btf_id || dst_prog)) 2381 return -EINVAL; 2382 2383 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2384 prog_type != BPF_PROG_TYPE_EXT) 2385 return -EINVAL; 2386 2387 switch (prog_type) { 2388 case BPF_PROG_TYPE_CGROUP_SOCK: 2389 switch (expected_attach_type) { 2390 case BPF_CGROUP_INET_SOCK_CREATE: 2391 case BPF_CGROUP_INET_SOCK_RELEASE: 2392 case BPF_CGROUP_INET4_POST_BIND: 2393 case BPF_CGROUP_INET6_POST_BIND: 2394 return 0; 2395 default: 2396 return -EINVAL; 2397 } 2398 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2399 switch (expected_attach_type) { 2400 case BPF_CGROUP_INET4_BIND: 2401 case BPF_CGROUP_INET6_BIND: 2402 case BPF_CGROUP_INET4_CONNECT: 2403 case BPF_CGROUP_INET6_CONNECT: 2404 case BPF_CGROUP_INET4_GETPEERNAME: 2405 case BPF_CGROUP_INET6_GETPEERNAME: 2406 case BPF_CGROUP_INET4_GETSOCKNAME: 2407 case BPF_CGROUP_INET6_GETSOCKNAME: 2408 case BPF_CGROUP_UDP4_SENDMSG: 2409 case BPF_CGROUP_UDP6_SENDMSG: 2410 case BPF_CGROUP_UDP4_RECVMSG: 2411 case BPF_CGROUP_UDP6_RECVMSG: 2412 return 0; 2413 default: 2414 return -EINVAL; 2415 } 2416 case BPF_PROG_TYPE_CGROUP_SKB: 2417 switch (expected_attach_type) { 2418 case BPF_CGROUP_INET_INGRESS: 2419 case BPF_CGROUP_INET_EGRESS: 2420 return 0; 2421 default: 2422 return -EINVAL; 2423 } 2424 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2425 switch (expected_attach_type) { 2426 case BPF_CGROUP_SETSOCKOPT: 2427 case BPF_CGROUP_GETSOCKOPT: 2428 return 0; 2429 default: 2430 return -EINVAL; 2431 } 2432 case BPF_PROG_TYPE_SK_LOOKUP: 2433 if (expected_attach_type == BPF_SK_LOOKUP) 2434 return 0; 2435 return -EINVAL; 2436 case BPF_PROG_TYPE_SK_REUSEPORT: 2437 switch (expected_attach_type) { 2438 case BPF_SK_REUSEPORT_SELECT: 2439 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2440 return 0; 2441 default: 2442 return -EINVAL; 2443 } 2444 case BPF_PROG_TYPE_SYSCALL: 2445 case BPF_PROG_TYPE_EXT: 2446 if (expected_attach_type) 2447 return -EINVAL; 2448 fallthrough; 2449 default: 2450 return 0; 2451 } 2452 } 2453 2454 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2455 { 2456 switch (prog_type) { 2457 case BPF_PROG_TYPE_SCHED_CLS: 2458 case BPF_PROG_TYPE_SCHED_ACT: 2459 case BPF_PROG_TYPE_XDP: 2460 case BPF_PROG_TYPE_LWT_IN: 2461 case BPF_PROG_TYPE_LWT_OUT: 2462 case BPF_PROG_TYPE_LWT_XMIT: 2463 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2464 case BPF_PROG_TYPE_SK_SKB: 2465 case BPF_PROG_TYPE_SK_MSG: 2466 case BPF_PROG_TYPE_LIRC_MODE2: 2467 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2468 case BPF_PROG_TYPE_CGROUP_DEVICE: 2469 case BPF_PROG_TYPE_CGROUP_SOCK: 2470 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2471 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2472 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2473 case BPF_PROG_TYPE_SOCK_OPS: 2474 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2475 return true; 2476 case BPF_PROG_TYPE_CGROUP_SKB: 2477 /* always unpriv */ 2478 case BPF_PROG_TYPE_SK_REUSEPORT: 2479 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2480 default: 2481 return false; 2482 } 2483 } 2484 2485 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2486 { 2487 switch (prog_type) { 2488 case BPF_PROG_TYPE_KPROBE: 2489 case BPF_PROG_TYPE_TRACEPOINT: 2490 case BPF_PROG_TYPE_PERF_EVENT: 2491 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2492 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2493 case BPF_PROG_TYPE_TRACING: 2494 case BPF_PROG_TYPE_LSM: 2495 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2496 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2497 return true; 2498 default: 2499 return false; 2500 } 2501 } 2502 2503 /* last field in 'union bpf_attr' used by this command */ 2504 #define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size 2505 2506 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) 2507 { 2508 enum bpf_prog_type type = attr->prog_type; 2509 struct bpf_prog *prog, *dst_prog = NULL; 2510 struct btf *attach_btf = NULL; 2511 int err; 2512 char license[128]; 2513 bool is_gpl; 2514 2515 if (CHECK_ATTR(BPF_PROG_LOAD)) 2516 return -EINVAL; 2517 2518 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2519 BPF_F_ANY_ALIGNMENT | 2520 BPF_F_TEST_STATE_FREQ | 2521 BPF_F_SLEEPABLE | 2522 BPF_F_TEST_RND_HI32 | 2523 BPF_F_XDP_HAS_FRAGS | 2524 BPF_F_XDP_DEV_BOUND_ONLY)) 2525 return -EINVAL; 2526 2527 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2528 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2529 !bpf_capable()) 2530 return -EPERM; 2531 2532 /* copy eBPF program license from user space */ 2533 if (strncpy_from_bpfptr(license, 2534 make_bpfptr(attr->license, uattr.is_kernel), 2535 sizeof(license) - 1) < 0) 2536 return -EFAULT; 2537 license[sizeof(license) - 1] = 0; 2538 2539 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2540 is_gpl = license_is_gpl_compatible(license); 2541 2542 if (attr->insn_cnt == 0 || 2543 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) 2544 return -E2BIG; 2545 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2546 type != BPF_PROG_TYPE_CGROUP_SKB && 2547 !bpf_capable()) 2548 return -EPERM; 2549 2550 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) 2551 return -EPERM; 2552 if (is_perfmon_prog_type(type) && !perfmon_capable()) 2553 return -EPERM; 2554 2555 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2556 * or btf, we need to check which one it is 2557 */ 2558 if (attr->attach_prog_fd) { 2559 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2560 if (IS_ERR(dst_prog)) { 2561 dst_prog = NULL; 2562 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2563 if (IS_ERR(attach_btf)) 2564 return -EINVAL; 2565 if (!btf_is_kernel(attach_btf)) { 2566 /* attaching through specifying bpf_prog's BTF 2567 * objects directly might be supported eventually 2568 */ 2569 btf_put(attach_btf); 2570 return -ENOTSUPP; 2571 } 2572 } 2573 } else if (attr->attach_btf_id) { 2574 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2575 attach_btf = bpf_get_btf_vmlinux(); 2576 if (IS_ERR(attach_btf)) 2577 return PTR_ERR(attach_btf); 2578 if (!attach_btf) 2579 return -EINVAL; 2580 btf_get(attach_btf); 2581 } 2582 2583 bpf_prog_load_fixup_attach_type(attr); 2584 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2585 attach_btf, attr->attach_btf_id, 2586 dst_prog)) { 2587 if (dst_prog) 2588 bpf_prog_put(dst_prog); 2589 if (attach_btf) 2590 btf_put(attach_btf); 2591 return -EINVAL; 2592 } 2593 2594 /* plain bpf_prog allocation */ 2595 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2596 if (!prog) { 2597 if (dst_prog) 2598 bpf_prog_put(dst_prog); 2599 if (attach_btf) 2600 btf_put(attach_btf); 2601 return -ENOMEM; 2602 } 2603 2604 prog->expected_attach_type = attr->expected_attach_type; 2605 prog->aux->attach_btf = attach_btf; 2606 prog->aux->attach_btf_id = attr->attach_btf_id; 2607 prog->aux->dst_prog = dst_prog; 2608 prog->aux->dev_bound = !!attr->prog_ifindex; 2609 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2610 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2611 2612 err = security_bpf_prog_alloc(prog->aux); 2613 if (err) 2614 goto free_prog; 2615 2616 prog->aux->user = get_current_user(); 2617 prog->len = attr->insn_cnt; 2618 2619 err = -EFAULT; 2620 if (copy_from_bpfptr(prog->insns, 2621 make_bpfptr(attr->insns, uattr.is_kernel), 2622 bpf_prog_insn_size(prog)) != 0) 2623 goto free_prog_sec; 2624 2625 prog->orig_prog = NULL; 2626 prog->jited = 0; 2627 2628 atomic64_set(&prog->aux->refcnt, 1); 2629 prog->gpl_compatible = is_gpl ? 1 : 0; 2630 2631 if (bpf_prog_is_dev_bound(prog->aux)) { 2632 err = bpf_prog_dev_bound_init(prog, attr); 2633 if (err) 2634 goto free_prog_sec; 2635 } 2636 2637 if (type == BPF_PROG_TYPE_EXT && dst_prog && 2638 bpf_prog_is_dev_bound(dst_prog->aux)) { 2639 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 2640 if (err) 2641 goto free_prog_sec; 2642 } 2643 2644 /* find program type: socket_filter vs tracing_filter */ 2645 err = find_prog_type(type, prog); 2646 if (err < 0) 2647 goto free_prog_sec; 2648 2649 prog->aux->load_time = ktime_get_boottime_ns(); 2650 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 2651 sizeof(attr->prog_name)); 2652 if (err < 0) 2653 goto free_prog_sec; 2654 2655 /* run eBPF verifier */ 2656 err = bpf_check(&prog, attr, uattr); 2657 if (err < 0) 2658 goto free_used_maps; 2659 2660 prog = bpf_prog_select_runtime(prog, &err); 2661 if (err < 0) 2662 goto free_used_maps; 2663 2664 err = bpf_prog_alloc_id(prog); 2665 if (err) 2666 goto free_used_maps; 2667 2668 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 2669 * effectively publicly exposed. However, retrieving via 2670 * bpf_prog_get_fd_by_id() will take another reference, 2671 * therefore it cannot be gone underneath us. 2672 * 2673 * Only for the time /after/ successful bpf_prog_new_fd() 2674 * and before returning to userspace, we might just hold 2675 * one reference and any parallel close on that fd could 2676 * rip everything out. Hence, below notifications must 2677 * happen before bpf_prog_new_fd(). 2678 * 2679 * Also, any failure handling from this point onwards must 2680 * be using bpf_prog_put() given the program is exposed. 2681 */ 2682 bpf_prog_kallsyms_add(prog); 2683 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 2684 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 2685 2686 err = bpf_prog_new_fd(prog); 2687 if (err < 0) 2688 bpf_prog_put(prog); 2689 return err; 2690 2691 free_used_maps: 2692 /* In case we have subprogs, we need to wait for a grace 2693 * period before we can tear down JIT memory since symbols 2694 * are already exposed under kallsyms. 2695 */ 2696 __bpf_prog_put_noref(prog, prog->aux->func_cnt); 2697 return err; 2698 free_prog_sec: 2699 free_uid(prog->aux->user); 2700 security_bpf_prog_free(prog->aux); 2701 free_prog: 2702 if (prog->aux->attach_btf) 2703 btf_put(prog->aux->attach_btf); 2704 bpf_prog_free(prog); 2705 return err; 2706 } 2707 2708 #define BPF_OBJ_LAST_FIELD file_flags 2709 2710 static int bpf_obj_pin(const union bpf_attr *attr) 2711 { 2712 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 2713 return -EINVAL; 2714 2715 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2716 } 2717 2718 static int bpf_obj_get(const union bpf_attr *attr) 2719 { 2720 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 2721 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 2722 return -EINVAL; 2723 2724 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 2725 attr->file_flags); 2726 } 2727 2728 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 2729 const struct bpf_link_ops *ops, struct bpf_prog *prog) 2730 { 2731 atomic64_set(&link->refcnt, 1); 2732 link->type = type; 2733 link->id = 0; 2734 link->ops = ops; 2735 link->prog = prog; 2736 } 2737 2738 static void bpf_link_free_id(int id) 2739 { 2740 if (!id) 2741 return; 2742 2743 spin_lock_bh(&link_idr_lock); 2744 idr_remove(&link_idr, id); 2745 spin_unlock_bh(&link_idr_lock); 2746 } 2747 2748 /* Clean up bpf_link and corresponding anon_inode file and FD. After 2749 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2750 * anon_inode's release() call. This helper marksbpf_link as 2751 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2752 * is not decremented, it's the responsibility of a calling code that failed 2753 * to complete bpf_link initialization. 2754 */ 2755 void bpf_link_cleanup(struct bpf_link_primer *primer) 2756 { 2757 primer->link->prog = NULL; 2758 bpf_link_free_id(primer->id); 2759 fput(primer->file); 2760 put_unused_fd(primer->fd); 2761 } 2762 2763 void bpf_link_inc(struct bpf_link *link) 2764 { 2765 atomic64_inc(&link->refcnt); 2766 } 2767 2768 /* bpf_link_free is guaranteed to be called from process context */ 2769 static void bpf_link_free(struct bpf_link *link) 2770 { 2771 bpf_link_free_id(link->id); 2772 if (link->prog) { 2773 /* detach BPF program, clean up used resources */ 2774 link->ops->release(link); 2775 bpf_prog_put(link->prog); 2776 } 2777 /* free bpf_link and its containing memory */ 2778 link->ops->dealloc(link); 2779 } 2780 2781 static void bpf_link_put_deferred(struct work_struct *work) 2782 { 2783 struct bpf_link *link = container_of(work, struct bpf_link, work); 2784 2785 bpf_link_free(link); 2786 } 2787 2788 /* bpf_link_put can be called from atomic context, but ensures that resources 2789 * are freed from process context 2790 */ 2791 void bpf_link_put(struct bpf_link *link) 2792 { 2793 if (!atomic64_dec_and_test(&link->refcnt)) 2794 return; 2795 2796 if (in_atomic()) { 2797 INIT_WORK(&link->work, bpf_link_put_deferred); 2798 schedule_work(&link->work); 2799 } else { 2800 bpf_link_free(link); 2801 } 2802 } 2803 EXPORT_SYMBOL(bpf_link_put); 2804 2805 static int bpf_link_release(struct inode *inode, struct file *filp) 2806 { 2807 struct bpf_link *link = filp->private_data; 2808 2809 bpf_link_put(link); 2810 return 0; 2811 } 2812 2813 #ifdef CONFIG_PROC_FS 2814 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 2815 #define BPF_MAP_TYPE(_id, _ops) 2816 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 2817 static const char *bpf_link_type_strs[] = { 2818 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 2819 #include <linux/bpf_types.h> 2820 }; 2821 #undef BPF_PROG_TYPE 2822 #undef BPF_MAP_TYPE 2823 #undef BPF_LINK_TYPE 2824 2825 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 2826 { 2827 const struct bpf_link *link = filp->private_data; 2828 const struct bpf_prog *prog = link->prog; 2829 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2830 2831 seq_printf(m, 2832 "link_type:\t%s\n" 2833 "link_id:\t%u\n", 2834 bpf_link_type_strs[link->type], 2835 link->id); 2836 if (prog) { 2837 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2838 seq_printf(m, 2839 "prog_tag:\t%s\n" 2840 "prog_id:\t%u\n", 2841 prog_tag, 2842 prog->aux->id); 2843 } 2844 if (link->ops->show_fdinfo) 2845 link->ops->show_fdinfo(link, m); 2846 } 2847 #endif 2848 2849 static const struct file_operations bpf_link_fops = { 2850 #ifdef CONFIG_PROC_FS 2851 .show_fdinfo = bpf_link_show_fdinfo, 2852 #endif 2853 .release = bpf_link_release, 2854 .read = bpf_dummy_read, 2855 .write = bpf_dummy_write, 2856 }; 2857 2858 static int bpf_link_alloc_id(struct bpf_link *link) 2859 { 2860 int id; 2861 2862 idr_preload(GFP_KERNEL); 2863 spin_lock_bh(&link_idr_lock); 2864 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 2865 spin_unlock_bh(&link_idr_lock); 2866 idr_preload_end(); 2867 2868 return id; 2869 } 2870 2871 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 2872 * reserving unused FD and allocating ID from link_idr. This is to be paired 2873 * with bpf_link_settle() to install FD and ID and expose bpf_link to 2874 * user-space, if bpf_link is successfully attached. If not, bpf_link and 2875 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 2876 * transient state is passed around in struct bpf_link_primer. 2877 * This is preferred way to create and initialize bpf_link, especially when 2878 * there are complicated and expensive operations in between creating bpf_link 2879 * itself and attaching it to BPF hook. By using bpf_link_prime() and 2880 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 2881 * expensive (and potentially failing) roll back operations in a rare case 2882 * that file, FD, or ID can't be allocated. 2883 */ 2884 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 2885 { 2886 struct file *file; 2887 int fd, id; 2888 2889 fd = get_unused_fd_flags(O_CLOEXEC); 2890 if (fd < 0) 2891 return fd; 2892 2893 2894 id = bpf_link_alloc_id(link); 2895 if (id < 0) { 2896 put_unused_fd(fd); 2897 return id; 2898 } 2899 2900 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); 2901 if (IS_ERR(file)) { 2902 bpf_link_free_id(id); 2903 put_unused_fd(fd); 2904 return PTR_ERR(file); 2905 } 2906 2907 primer->link = link; 2908 primer->file = file; 2909 primer->fd = fd; 2910 primer->id = id; 2911 return 0; 2912 } 2913 2914 int bpf_link_settle(struct bpf_link_primer *primer) 2915 { 2916 /* make bpf_link fetchable by ID */ 2917 spin_lock_bh(&link_idr_lock); 2918 primer->link->id = primer->id; 2919 spin_unlock_bh(&link_idr_lock); 2920 /* make bpf_link fetchable by FD */ 2921 fd_install(primer->fd, primer->file); 2922 /* pass through installed FD */ 2923 return primer->fd; 2924 } 2925 2926 int bpf_link_new_fd(struct bpf_link *link) 2927 { 2928 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); 2929 } 2930 2931 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 2932 { 2933 struct fd f = fdget(ufd); 2934 struct bpf_link *link; 2935 2936 if (!f.file) 2937 return ERR_PTR(-EBADF); 2938 if (f.file->f_op != &bpf_link_fops) { 2939 fdput(f); 2940 return ERR_PTR(-EINVAL); 2941 } 2942 2943 link = f.file->private_data; 2944 bpf_link_inc(link); 2945 fdput(f); 2946 2947 return link; 2948 } 2949 EXPORT_SYMBOL(bpf_link_get_from_fd); 2950 2951 static void bpf_tracing_link_release(struct bpf_link *link) 2952 { 2953 struct bpf_tracing_link *tr_link = 2954 container_of(link, struct bpf_tracing_link, link.link); 2955 2956 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 2957 tr_link->trampoline)); 2958 2959 bpf_trampoline_put(tr_link->trampoline); 2960 2961 /* tgt_prog is NULL if target is a kernel function */ 2962 if (tr_link->tgt_prog) 2963 bpf_prog_put(tr_link->tgt_prog); 2964 } 2965 2966 static void bpf_tracing_link_dealloc(struct bpf_link *link) 2967 { 2968 struct bpf_tracing_link *tr_link = 2969 container_of(link, struct bpf_tracing_link, link.link); 2970 2971 kfree(tr_link); 2972 } 2973 2974 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 2975 struct seq_file *seq) 2976 { 2977 struct bpf_tracing_link *tr_link = 2978 container_of(link, struct bpf_tracing_link, link.link); 2979 2980 seq_printf(seq, 2981 "attach_type:\t%d\n", 2982 tr_link->attach_type); 2983 } 2984 2985 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 2986 struct bpf_link_info *info) 2987 { 2988 struct bpf_tracing_link *tr_link = 2989 container_of(link, struct bpf_tracing_link, link.link); 2990 2991 info->tracing.attach_type = tr_link->attach_type; 2992 bpf_trampoline_unpack_key(tr_link->trampoline->key, 2993 &info->tracing.target_obj_id, 2994 &info->tracing.target_btf_id); 2995 2996 return 0; 2997 } 2998 2999 static const struct bpf_link_ops bpf_tracing_link_lops = { 3000 .release = bpf_tracing_link_release, 3001 .dealloc = bpf_tracing_link_dealloc, 3002 .show_fdinfo = bpf_tracing_link_show_fdinfo, 3003 .fill_link_info = bpf_tracing_link_fill_link_info, 3004 }; 3005 3006 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 3007 int tgt_prog_fd, 3008 u32 btf_id, 3009 u64 bpf_cookie) 3010 { 3011 struct bpf_link_primer link_primer; 3012 struct bpf_prog *tgt_prog = NULL; 3013 struct bpf_trampoline *tr = NULL; 3014 struct bpf_tracing_link *link; 3015 u64 key = 0; 3016 int err; 3017 3018 switch (prog->type) { 3019 case BPF_PROG_TYPE_TRACING: 3020 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 3021 prog->expected_attach_type != BPF_TRACE_FEXIT && 3022 prog->expected_attach_type != BPF_MODIFY_RETURN) { 3023 err = -EINVAL; 3024 goto out_put_prog; 3025 } 3026 break; 3027 case BPF_PROG_TYPE_EXT: 3028 if (prog->expected_attach_type != 0) { 3029 err = -EINVAL; 3030 goto out_put_prog; 3031 } 3032 break; 3033 case BPF_PROG_TYPE_LSM: 3034 if (prog->expected_attach_type != BPF_LSM_MAC) { 3035 err = -EINVAL; 3036 goto out_put_prog; 3037 } 3038 break; 3039 default: 3040 err = -EINVAL; 3041 goto out_put_prog; 3042 } 3043 3044 if (!!tgt_prog_fd != !!btf_id) { 3045 err = -EINVAL; 3046 goto out_put_prog; 3047 } 3048 3049 if (tgt_prog_fd) { 3050 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ 3051 if (prog->type != BPF_PROG_TYPE_EXT) { 3052 err = -EINVAL; 3053 goto out_put_prog; 3054 } 3055 3056 tgt_prog = bpf_prog_get(tgt_prog_fd); 3057 if (IS_ERR(tgt_prog)) { 3058 err = PTR_ERR(tgt_prog); 3059 tgt_prog = NULL; 3060 goto out_put_prog; 3061 } 3062 3063 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3064 } 3065 3066 link = kzalloc(sizeof(*link), GFP_USER); 3067 if (!link) { 3068 err = -ENOMEM; 3069 goto out_put_prog; 3070 } 3071 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3072 &bpf_tracing_link_lops, prog); 3073 link->attach_type = prog->expected_attach_type; 3074 link->link.cookie = bpf_cookie; 3075 3076 mutex_lock(&prog->aux->dst_mutex); 3077 3078 /* There are a few possible cases here: 3079 * 3080 * - if prog->aux->dst_trampoline is set, the program was just loaded 3081 * and not yet attached to anything, so we can use the values stored 3082 * in prog->aux 3083 * 3084 * - if prog->aux->dst_trampoline is NULL, the program has already been 3085 * attached to a target and its initial target was cleared (below) 3086 * 3087 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3088 * target_btf_id using the link_create API. 3089 * 3090 * - if tgt_prog == NULL when this function was called using the old 3091 * raw_tracepoint_open API, and we need a target from prog->aux 3092 * 3093 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3094 * was detached and is going for re-attachment. 3095 */ 3096 if (!prog->aux->dst_trampoline && !tgt_prog) { 3097 /* 3098 * Allow re-attach for TRACING and LSM programs. If it's 3099 * currently linked, bpf_trampoline_link_prog will fail. 3100 * EXT programs need to specify tgt_prog_fd, so they 3101 * re-attach in separate code path. 3102 */ 3103 if (prog->type != BPF_PROG_TYPE_TRACING && 3104 prog->type != BPF_PROG_TYPE_LSM) { 3105 err = -EINVAL; 3106 goto out_unlock; 3107 } 3108 btf_id = prog->aux->attach_btf_id; 3109 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3110 } 3111 3112 if (!prog->aux->dst_trampoline || 3113 (key && key != prog->aux->dst_trampoline->key)) { 3114 /* If there is no saved target, or the specified target is 3115 * different from the destination specified at load time, we 3116 * need a new trampoline and a check for compatibility 3117 */ 3118 struct bpf_attach_target_info tgt_info = {}; 3119 3120 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3121 &tgt_info); 3122 if (err) 3123 goto out_unlock; 3124 3125 if (tgt_info.tgt_mod) { 3126 module_put(prog->aux->mod); 3127 prog->aux->mod = tgt_info.tgt_mod; 3128 } 3129 3130 tr = bpf_trampoline_get(key, &tgt_info); 3131 if (!tr) { 3132 err = -ENOMEM; 3133 goto out_unlock; 3134 } 3135 } else { 3136 /* The caller didn't specify a target, or the target was the 3137 * same as the destination supplied during program load. This 3138 * means we can reuse the trampoline and reference from program 3139 * load time, and there is no need to allocate a new one. This 3140 * can only happen once for any program, as the saved values in 3141 * prog->aux are cleared below. 3142 */ 3143 tr = prog->aux->dst_trampoline; 3144 tgt_prog = prog->aux->dst_prog; 3145 } 3146 3147 err = bpf_link_prime(&link->link.link, &link_primer); 3148 if (err) 3149 goto out_unlock; 3150 3151 err = bpf_trampoline_link_prog(&link->link, tr); 3152 if (err) { 3153 bpf_link_cleanup(&link_primer); 3154 link = NULL; 3155 goto out_unlock; 3156 } 3157 3158 link->tgt_prog = tgt_prog; 3159 link->trampoline = tr; 3160 3161 /* Always clear the trampoline and target prog from prog->aux to make 3162 * sure the original attach destination is not kept alive after a 3163 * program is (re-)attached to another target. 3164 */ 3165 if (prog->aux->dst_prog && 3166 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3167 /* got extra prog ref from syscall, or attaching to different prog */ 3168 bpf_prog_put(prog->aux->dst_prog); 3169 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3170 /* we allocated a new trampoline, so free the old one */ 3171 bpf_trampoline_put(prog->aux->dst_trampoline); 3172 3173 prog->aux->dst_prog = NULL; 3174 prog->aux->dst_trampoline = NULL; 3175 mutex_unlock(&prog->aux->dst_mutex); 3176 3177 return bpf_link_settle(&link_primer); 3178 out_unlock: 3179 if (tr && tr != prog->aux->dst_trampoline) 3180 bpf_trampoline_put(tr); 3181 mutex_unlock(&prog->aux->dst_mutex); 3182 kfree(link); 3183 out_put_prog: 3184 if (tgt_prog_fd && tgt_prog) 3185 bpf_prog_put(tgt_prog); 3186 return err; 3187 } 3188 3189 struct bpf_raw_tp_link { 3190 struct bpf_link link; 3191 struct bpf_raw_event_map *btp; 3192 }; 3193 3194 static void bpf_raw_tp_link_release(struct bpf_link *link) 3195 { 3196 struct bpf_raw_tp_link *raw_tp = 3197 container_of(link, struct bpf_raw_tp_link, link); 3198 3199 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); 3200 bpf_put_raw_tracepoint(raw_tp->btp); 3201 } 3202 3203 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3204 { 3205 struct bpf_raw_tp_link *raw_tp = 3206 container_of(link, struct bpf_raw_tp_link, link); 3207 3208 kfree(raw_tp); 3209 } 3210 3211 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3212 struct seq_file *seq) 3213 { 3214 struct bpf_raw_tp_link *raw_tp_link = 3215 container_of(link, struct bpf_raw_tp_link, link); 3216 3217 seq_printf(seq, 3218 "tp_name:\t%s\n", 3219 raw_tp_link->btp->tp->name); 3220 } 3221 3222 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3223 struct bpf_link_info *info) 3224 { 3225 struct bpf_raw_tp_link *raw_tp_link = 3226 container_of(link, struct bpf_raw_tp_link, link); 3227 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3228 const char *tp_name = raw_tp_link->btp->tp->name; 3229 u32 ulen = info->raw_tracepoint.tp_name_len; 3230 size_t tp_len = strlen(tp_name); 3231 3232 if (!ulen ^ !ubuf) 3233 return -EINVAL; 3234 3235 info->raw_tracepoint.tp_name_len = tp_len + 1; 3236 3237 if (!ubuf) 3238 return 0; 3239 3240 if (ulen >= tp_len + 1) { 3241 if (copy_to_user(ubuf, tp_name, tp_len + 1)) 3242 return -EFAULT; 3243 } else { 3244 char zero = '\0'; 3245 3246 if (copy_to_user(ubuf, tp_name, ulen - 1)) 3247 return -EFAULT; 3248 if (put_user(zero, ubuf + ulen - 1)) 3249 return -EFAULT; 3250 return -ENOSPC; 3251 } 3252 3253 return 0; 3254 } 3255 3256 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3257 .release = bpf_raw_tp_link_release, 3258 .dealloc = bpf_raw_tp_link_dealloc, 3259 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3260 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3261 }; 3262 3263 #ifdef CONFIG_PERF_EVENTS 3264 struct bpf_perf_link { 3265 struct bpf_link link; 3266 struct file *perf_file; 3267 }; 3268 3269 static void bpf_perf_link_release(struct bpf_link *link) 3270 { 3271 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3272 struct perf_event *event = perf_link->perf_file->private_data; 3273 3274 perf_event_free_bpf_prog(event); 3275 fput(perf_link->perf_file); 3276 } 3277 3278 static void bpf_perf_link_dealloc(struct bpf_link *link) 3279 { 3280 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3281 3282 kfree(perf_link); 3283 } 3284 3285 static const struct bpf_link_ops bpf_perf_link_lops = { 3286 .release = bpf_perf_link_release, 3287 .dealloc = bpf_perf_link_dealloc, 3288 }; 3289 3290 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3291 { 3292 struct bpf_link_primer link_primer; 3293 struct bpf_perf_link *link; 3294 struct perf_event *event; 3295 struct file *perf_file; 3296 int err; 3297 3298 if (attr->link_create.flags) 3299 return -EINVAL; 3300 3301 perf_file = perf_event_get(attr->link_create.target_fd); 3302 if (IS_ERR(perf_file)) 3303 return PTR_ERR(perf_file); 3304 3305 link = kzalloc(sizeof(*link), GFP_USER); 3306 if (!link) { 3307 err = -ENOMEM; 3308 goto out_put_file; 3309 } 3310 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog); 3311 link->perf_file = perf_file; 3312 3313 err = bpf_link_prime(&link->link, &link_primer); 3314 if (err) { 3315 kfree(link); 3316 goto out_put_file; 3317 } 3318 3319 event = perf_file->private_data; 3320 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 3321 if (err) { 3322 bpf_link_cleanup(&link_primer); 3323 goto out_put_file; 3324 } 3325 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 3326 bpf_prog_inc(prog); 3327 3328 return bpf_link_settle(&link_primer); 3329 3330 out_put_file: 3331 fput(perf_file); 3332 return err; 3333 } 3334 #else 3335 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3336 { 3337 return -EOPNOTSUPP; 3338 } 3339 #endif /* CONFIG_PERF_EVENTS */ 3340 3341 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 3342 const char __user *user_tp_name) 3343 { 3344 struct bpf_link_primer link_primer; 3345 struct bpf_raw_tp_link *link; 3346 struct bpf_raw_event_map *btp; 3347 const char *tp_name; 3348 char buf[128]; 3349 int err; 3350 3351 switch (prog->type) { 3352 case BPF_PROG_TYPE_TRACING: 3353 case BPF_PROG_TYPE_EXT: 3354 case BPF_PROG_TYPE_LSM: 3355 if (user_tp_name) 3356 /* The attach point for this category of programs 3357 * should be specified via btf_id during program load. 3358 */ 3359 return -EINVAL; 3360 if (prog->type == BPF_PROG_TYPE_TRACING && 3361 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 3362 tp_name = prog->aux->attach_func_name; 3363 break; 3364 } 3365 return bpf_tracing_prog_attach(prog, 0, 0, 0); 3366 case BPF_PROG_TYPE_RAW_TRACEPOINT: 3367 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 3368 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 3369 return -EFAULT; 3370 buf[sizeof(buf) - 1] = 0; 3371 tp_name = buf; 3372 break; 3373 default: 3374 return -EINVAL; 3375 } 3376 3377 btp = bpf_get_raw_tracepoint(tp_name); 3378 if (!btp) 3379 return -ENOENT; 3380 3381 link = kzalloc(sizeof(*link), GFP_USER); 3382 if (!link) { 3383 err = -ENOMEM; 3384 goto out_put_btp; 3385 } 3386 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 3387 &bpf_raw_tp_link_lops, prog); 3388 link->btp = btp; 3389 3390 err = bpf_link_prime(&link->link, &link_primer); 3391 if (err) { 3392 kfree(link); 3393 goto out_put_btp; 3394 } 3395 3396 err = bpf_probe_register(link->btp, prog); 3397 if (err) { 3398 bpf_link_cleanup(&link_primer); 3399 goto out_put_btp; 3400 } 3401 3402 return bpf_link_settle(&link_primer); 3403 3404 out_put_btp: 3405 bpf_put_raw_tracepoint(btp); 3406 return err; 3407 } 3408 3409 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 3410 3411 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 3412 { 3413 struct bpf_prog *prog; 3414 int fd; 3415 3416 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 3417 return -EINVAL; 3418 3419 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 3420 if (IS_ERR(prog)) 3421 return PTR_ERR(prog); 3422 3423 fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name)); 3424 if (fd < 0) 3425 bpf_prog_put(prog); 3426 return fd; 3427 } 3428 3429 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3430 enum bpf_attach_type attach_type) 3431 { 3432 switch (prog->type) { 3433 case BPF_PROG_TYPE_CGROUP_SOCK: 3434 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3435 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3436 case BPF_PROG_TYPE_SK_LOOKUP: 3437 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3438 case BPF_PROG_TYPE_CGROUP_SKB: 3439 if (!capable(CAP_NET_ADMIN)) 3440 /* cg-skb progs can be loaded by unpriv user. 3441 * check permissions at attach time. 3442 */ 3443 return -EPERM; 3444 return prog->enforce_expected_attach_type && 3445 prog->expected_attach_type != attach_type ? 3446 -EINVAL : 0; 3447 default: 3448 return 0; 3449 } 3450 } 3451 3452 static enum bpf_prog_type 3453 attach_type_to_prog_type(enum bpf_attach_type attach_type) 3454 { 3455 switch (attach_type) { 3456 case BPF_CGROUP_INET_INGRESS: 3457 case BPF_CGROUP_INET_EGRESS: 3458 return BPF_PROG_TYPE_CGROUP_SKB; 3459 case BPF_CGROUP_INET_SOCK_CREATE: 3460 case BPF_CGROUP_INET_SOCK_RELEASE: 3461 case BPF_CGROUP_INET4_POST_BIND: 3462 case BPF_CGROUP_INET6_POST_BIND: 3463 return BPF_PROG_TYPE_CGROUP_SOCK; 3464 case BPF_CGROUP_INET4_BIND: 3465 case BPF_CGROUP_INET6_BIND: 3466 case BPF_CGROUP_INET4_CONNECT: 3467 case BPF_CGROUP_INET6_CONNECT: 3468 case BPF_CGROUP_INET4_GETPEERNAME: 3469 case BPF_CGROUP_INET6_GETPEERNAME: 3470 case BPF_CGROUP_INET4_GETSOCKNAME: 3471 case BPF_CGROUP_INET6_GETSOCKNAME: 3472 case BPF_CGROUP_UDP4_SENDMSG: 3473 case BPF_CGROUP_UDP6_SENDMSG: 3474 case BPF_CGROUP_UDP4_RECVMSG: 3475 case BPF_CGROUP_UDP6_RECVMSG: 3476 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 3477 case BPF_CGROUP_SOCK_OPS: 3478 return BPF_PROG_TYPE_SOCK_OPS; 3479 case BPF_CGROUP_DEVICE: 3480 return BPF_PROG_TYPE_CGROUP_DEVICE; 3481 case BPF_SK_MSG_VERDICT: 3482 return BPF_PROG_TYPE_SK_MSG; 3483 case BPF_SK_SKB_STREAM_PARSER: 3484 case BPF_SK_SKB_STREAM_VERDICT: 3485 case BPF_SK_SKB_VERDICT: 3486 return BPF_PROG_TYPE_SK_SKB; 3487 case BPF_LIRC_MODE2: 3488 return BPF_PROG_TYPE_LIRC_MODE2; 3489 case BPF_FLOW_DISSECTOR: 3490 return BPF_PROG_TYPE_FLOW_DISSECTOR; 3491 case BPF_CGROUP_SYSCTL: 3492 return BPF_PROG_TYPE_CGROUP_SYSCTL; 3493 case BPF_CGROUP_GETSOCKOPT: 3494 case BPF_CGROUP_SETSOCKOPT: 3495 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 3496 case BPF_TRACE_ITER: 3497 case BPF_TRACE_RAW_TP: 3498 case BPF_TRACE_FENTRY: 3499 case BPF_TRACE_FEXIT: 3500 case BPF_MODIFY_RETURN: 3501 return BPF_PROG_TYPE_TRACING; 3502 case BPF_LSM_MAC: 3503 return BPF_PROG_TYPE_LSM; 3504 case BPF_SK_LOOKUP: 3505 return BPF_PROG_TYPE_SK_LOOKUP; 3506 case BPF_XDP: 3507 return BPF_PROG_TYPE_XDP; 3508 case BPF_LSM_CGROUP: 3509 return BPF_PROG_TYPE_LSM; 3510 default: 3511 return BPF_PROG_TYPE_UNSPEC; 3512 } 3513 } 3514 3515 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd 3516 3517 #define BPF_F_ATTACH_MASK \ 3518 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) 3519 3520 static int bpf_prog_attach(const union bpf_attr *attr) 3521 { 3522 enum bpf_prog_type ptype; 3523 struct bpf_prog *prog; 3524 int ret; 3525 3526 if (CHECK_ATTR(BPF_PROG_ATTACH)) 3527 return -EINVAL; 3528 3529 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 3530 return -EINVAL; 3531 3532 ptype = attach_type_to_prog_type(attr->attach_type); 3533 if (ptype == BPF_PROG_TYPE_UNSPEC) 3534 return -EINVAL; 3535 3536 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 3537 if (IS_ERR(prog)) 3538 return PTR_ERR(prog); 3539 3540 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 3541 bpf_prog_put(prog); 3542 return -EINVAL; 3543 } 3544 3545 switch (ptype) { 3546 case BPF_PROG_TYPE_SK_SKB: 3547 case BPF_PROG_TYPE_SK_MSG: 3548 ret = sock_map_get_from_fd(attr, prog); 3549 break; 3550 case BPF_PROG_TYPE_LIRC_MODE2: 3551 ret = lirc_prog_attach(attr, prog); 3552 break; 3553 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3554 ret = netns_bpf_prog_attach(attr, prog); 3555 break; 3556 case BPF_PROG_TYPE_CGROUP_DEVICE: 3557 case BPF_PROG_TYPE_CGROUP_SKB: 3558 case BPF_PROG_TYPE_CGROUP_SOCK: 3559 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3560 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3561 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3562 case BPF_PROG_TYPE_SOCK_OPS: 3563 case BPF_PROG_TYPE_LSM: 3564 if (ptype == BPF_PROG_TYPE_LSM && 3565 prog->expected_attach_type != BPF_LSM_CGROUP) 3566 ret = -EINVAL; 3567 else 3568 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 3569 break; 3570 default: 3571 ret = -EINVAL; 3572 } 3573 3574 if (ret) 3575 bpf_prog_put(prog); 3576 return ret; 3577 } 3578 3579 #define BPF_PROG_DETACH_LAST_FIELD attach_type 3580 3581 static int bpf_prog_detach(const union bpf_attr *attr) 3582 { 3583 enum bpf_prog_type ptype; 3584 3585 if (CHECK_ATTR(BPF_PROG_DETACH)) 3586 return -EINVAL; 3587 3588 ptype = attach_type_to_prog_type(attr->attach_type); 3589 3590 switch (ptype) { 3591 case BPF_PROG_TYPE_SK_MSG: 3592 case BPF_PROG_TYPE_SK_SKB: 3593 return sock_map_prog_detach(attr, ptype); 3594 case BPF_PROG_TYPE_LIRC_MODE2: 3595 return lirc_prog_detach(attr); 3596 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3597 return netns_bpf_prog_detach(attr, ptype); 3598 case BPF_PROG_TYPE_CGROUP_DEVICE: 3599 case BPF_PROG_TYPE_CGROUP_SKB: 3600 case BPF_PROG_TYPE_CGROUP_SOCK: 3601 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3602 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3603 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3604 case BPF_PROG_TYPE_SOCK_OPS: 3605 case BPF_PROG_TYPE_LSM: 3606 return cgroup_bpf_prog_detach(attr, ptype); 3607 default: 3608 return -EINVAL; 3609 } 3610 } 3611 3612 #define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags 3613 3614 static int bpf_prog_query(const union bpf_attr *attr, 3615 union bpf_attr __user *uattr) 3616 { 3617 if (!capable(CAP_NET_ADMIN)) 3618 return -EPERM; 3619 if (CHECK_ATTR(BPF_PROG_QUERY)) 3620 return -EINVAL; 3621 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 3622 return -EINVAL; 3623 3624 switch (attr->query.attach_type) { 3625 case BPF_CGROUP_INET_INGRESS: 3626 case BPF_CGROUP_INET_EGRESS: 3627 case BPF_CGROUP_INET_SOCK_CREATE: 3628 case BPF_CGROUP_INET_SOCK_RELEASE: 3629 case BPF_CGROUP_INET4_BIND: 3630 case BPF_CGROUP_INET6_BIND: 3631 case BPF_CGROUP_INET4_POST_BIND: 3632 case BPF_CGROUP_INET6_POST_BIND: 3633 case BPF_CGROUP_INET4_CONNECT: 3634 case BPF_CGROUP_INET6_CONNECT: 3635 case BPF_CGROUP_INET4_GETPEERNAME: 3636 case BPF_CGROUP_INET6_GETPEERNAME: 3637 case BPF_CGROUP_INET4_GETSOCKNAME: 3638 case BPF_CGROUP_INET6_GETSOCKNAME: 3639 case BPF_CGROUP_UDP4_SENDMSG: 3640 case BPF_CGROUP_UDP6_SENDMSG: 3641 case BPF_CGROUP_UDP4_RECVMSG: 3642 case BPF_CGROUP_UDP6_RECVMSG: 3643 case BPF_CGROUP_SOCK_OPS: 3644 case BPF_CGROUP_DEVICE: 3645 case BPF_CGROUP_SYSCTL: 3646 case BPF_CGROUP_GETSOCKOPT: 3647 case BPF_CGROUP_SETSOCKOPT: 3648 case BPF_LSM_CGROUP: 3649 return cgroup_bpf_prog_query(attr, uattr); 3650 case BPF_LIRC_MODE2: 3651 return lirc_prog_query(attr, uattr); 3652 case BPF_FLOW_DISSECTOR: 3653 case BPF_SK_LOOKUP: 3654 return netns_bpf_prog_query(attr, uattr); 3655 case BPF_SK_SKB_STREAM_PARSER: 3656 case BPF_SK_SKB_STREAM_VERDICT: 3657 case BPF_SK_MSG_VERDICT: 3658 case BPF_SK_SKB_VERDICT: 3659 return sock_map_bpf_prog_query(attr, uattr); 3660 default: 3661 return -EINVAL; 3662 } 3663 } 3664 3665 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 3666 3667 static int bpf_prog_test_run(const union bpf_attr *attr, 3668 union bpf_attr __user *uattr) 3669 { 3670 struct bpf_prog *prog; 3671 int ret = -ENOTSUPP; 3672 3673 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 3674 return -EINVAL; 3675 3676 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 3677 (!attr->test.ctx_size_in && attr->test.ctx_in)) 3678 return -EINVAL; 3679 3680 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 3681 (!attr->test.ctx_size_out && attr->test.ctx_out)) 3682 return -EINVAL; 3683 3684 prog = bpf_prog_get(attr->test.prog_fd); 3685 if (IS_ERR(prog)) 3686 return PTR_ERR(prog); 3687 3688 if (prog->aux->ops->test_run) 3689 ret = prog->aux->ops->test_run(prog, attr, uattr); 3690 3691 bpf_prog_put(prog); 3692 return ret; 3693 } 3694 3695 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 3696 3697 static int bpf_obj_get_next_id(const union bpf_attr *attr, 3698 union bpf_attr __user *uattr, 3699 struct idr *idr, 3700 spinlock_t *lock) 3701 { 3702 u32 next_id = attr->start_id; 3703 int err = 0; 3704 3705 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 3706 return -EINVAL; 3707 3708 if (!capable(CAP_SYS_ADMIN)) 3709 return -EPERM; 3710 3711 next_id++; 3712 spin_lock_bh(lock); 3713 if (!idr_get_next(idr, &next_id)) 3714 err = -ENOENT; 3715 spin_unlock_bh(lock); 3716 3717 if (!err) 3718 err = put_user(next_id, &uattr->next_id); 3719 3720 return err; 3721 } 3722 3723 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 3724 { 3725 struct bpf_map *map; 3726 3727 spin_lock_bh(&map_idr_lock); 3728 again: 3729 map = idr_get_next(&map_idr, id); 3730 if (map) { 3731 map = __bpf_map_inc_not_zero(map, false); 3732 if (IS_ERR(map)) { 3733 (*id)++; 3734 goto again; 3735 } 3736 } 3737 spin_unlock_bh(&map_idr_lock); 3738 3739 return map; 3740 } 3741 3742 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 3743 { 3744 struct bpf_prog *prog; 3745 3746 spin_lock_bh(&prog_idr_lock); 3747 again: 3748 prog = idr_get_next(&prog_idr, id); 3749 if (prog) { 3750 prog = bpf_prog_inc_not_zero(prog); 3751 if (IS_ERR(prog)) { 3752 (*id)++; 3753 goto again; 3754 } 3755 } 3756 spin_unlock_bh(&prog_idr_lock); 3757 3758 return prog; 3759 } 3760 3761 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 3762 3763 struct bpf_prog *bpf_prog_by_id(u32 id) 3764 { 3765 struct bpf_prog *prog; 3766 3767 if (!id) 3768 return ERR_PTR(-ENOENT); 3769 3770 spin_lock_bh(&prog_idr_lock); 3771 prog = idr_find(&prog_idr, id); 3772 if (prog) 3773 prog = bpf_prog_inc_not_zero(prog); 3774 else 3775 prog = ERR_PTR(-ENOENT); 3776 spin_unlock_bh(&prog_idr_lock); 3777 return prog; 3778 } 3779 3780 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 3781 { 3782 struct bpf_prog *prog; 3783 u32 id = attr->prog_id; 3784 int fd; 3785 3786 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 3787 return -EINVAL; 3788 3789 if (!capable(CAP_SYS_ADMIN)) 3790 return -EPERM; 3791 3792 prog = bpf_prog_by_id(id); 3793 if (IS_ERR(prog)) 3794 return PTR_ERR(prog); 3795 3796 fd = bpf_prog_new_fd(prog); 3797 if (fd < 0) 3798 bpf_prog_put(prog); 3799 3800 return fd; 3801 } 3802 3803 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 3804 3805 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 3806 { 3807 struct bpf_map *map; 3808 u32 id = attr->map_id; 3809 int f_flags; 3810 int fd; 3811 3812 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 3813 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 3814 return -EINVAL; 3815 3816 if (!capable(CAP_SYS_ADMIN)) 3817 return -EPERM; 3818 3819 f_flags = bpf_get_file_flag(attr->open_flags); 3820 if (f_flags < 0) 3821 return f_flags; 3822 3823 spin_lock_bh(&map_idr_lock); 3824 map = idr_find(&map_idr, id); 3825 if (map) 3826 map = __bpf_map_inc_not_zero(map, true); 3827 else 3828 map = ERR_PTR(-ENOENT); 3829 spin_unlock_bh(&map_idr_lock); 3830 3831 if (IS_ERR(map)) 3832 return PTR_ERR(map); 3833 3834 fd = bpf_map_new_fd(map, f_flags); 3835 if (fd < 0) 3836 bpf_map_put_with_uref(map); 3837 3838 return fd; 3839 } 3840 3841 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 3842 unsigned long addr, u32 *off, 3843 u32 *type) 3844 { 3845 const struct bpf_map *map; 3846 int i; 3847 3848 mutex_lock(&prog->aux->used_maps_mutex); 3849 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 3850 map = prog->aux->used_maps[i]; 3851 if (map == (void *)addr) { 3852 *type = BPF_PSEUDO_MAP_FD; 3853 goto out; 3854 } 3855 if (!map->ops->map_direct_value_meta) 3856 continue; 3857 if (!map->ops->map_direct_value_meta(map, addr, off)) { 3858 *type = BPF_PSEUDO_MAP_VALUE; 3859 goto out; 3860 } 3861 } 3862 map = NULL; 3863 3864 out: 3865 mutex_unlock(&prog->aux->used_maps_mutex); 3866 return map; 3867 } 3868 3869 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 3870 const struct cred *f_cred) 3871 { 3872 const struct bpf_map *map; 3873 struct bpf_insn *insns; 3874 u32 off, type; 3875 u64 imm; 3876 u8 code; 3877 int i; 3878 3879 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 3880 GFP_USER); 3881 if (!insns) 3882 return insns; 3883 3884 for (i = 0; i < prog->len; i++) { 3885 code = insns[i].code; 3886 3887 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 3888 insns[i].code = BPF_JMP | BPF_CALL; 3889 insns[i].imm = BPF_FUNC_tail_call; 3890 /* fall-through */ 3891 } 3892 if (code == (BPF_JMP | BPF_CALL) || 3893 code == (BPF_JMP | BPF_CALL_ARGS)) { 3894 if (code == (BPF_JMP | BPF_CALL_ARGS)) 3895 insns[i].code = BPF_JMP | BPF_CALL; 3896 if (!bpf_dump_raw_ok(f_cred)) 3897 insns[i].imm = 0; 3898 continue; 3899 } 3900 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 3901 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 3902 continue; 3903 } 3904 3905 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 3906 continue; 3907 3908 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 3909 map = bpf_map_from_imm(prog, imm, &off, &type); 3910 if (map) { 3911 insns[i].src_reg = type; 3912 insns[i].imm = map->id; 3913 insns[i + 1].imm = off; 3914 continue; 3915 } 3916 } 3917 3918 return insns; 3919 } 3920 3921 static int set_info_rec_size(struct bpf_prog_info *info) 3922 { 3923 /* 3924 * Ensure info.*_rec_size is the same as kernel expected size 3925 * 3926 * or 3927 * 3928 * Only allow zero *_rec_size if both _rec_size and _cnt are 3929 * zero. In this case, the kernel will set the expected 3930 * _rec_size back to the info. 3931 */ 3932 3933 if ((info->nr_func_info || info->func_info_rec_size) && 3934 info->func_info_rec_size != sizeof(struct bpf_func_info)) 3935 return -EINVAL; 3936 3937 if ((info->nr_line_info || info->line_info_rec_size) && 3938 info->line_info_rec_size != sizeof(struct bpf_line_info)) 3939 return -EINVAL; 3940 3941 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 3942 info->jited_line_info_rec_size != sizeof(__u64)) 3943 return -EINVAL; 3944 3945 info->func_info_rec_size = sizeof(struct bpf_func_info); 3946 info->line_info_rec_size = sizeof(struct bpf_line_info); 3947 info->jited_line_info_rec_size = sizeof(__u64); 3948 3949 return 0; 3950 } 3951 3952 static int bpf_prog_get_info_by_fd(struct file *file, 3953 struct bpf_prog *prog, 3954 const union bpf_attr *attr, 3955 union bpf_attr __user *uattr) 3956 { 3957 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3958 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 3959 struct bpf_prog_info info; 3960 u32 info_len = attr->info.info_len; 3961 struct bpf_prog_kstats stats; 3962 char __user *uinsns; 3963 u32 ulen; 3964 int err; 3965 3966 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 3967 if (err) 3968 return err; 3969 info_len = min_t(u32, sizeof(info), info_len); 3970 3971 memset(&info, 0, sizeof(info)); 3972 if (copy_from_user(&info, uinfo, info_len)) 3973 return -EFAULT; 3974 3975 info.type = prog->type; 3976 info.id = prog->aux->id; 3977 info.load_time = prog->aux->load_time; 3978 info.created_by_uid = from_kuid_munged(current_user_ns(), 3979 prog->aux->user->uid); 3980 info.gpl_compatible = prog->gpl_compatible; 3981 3982 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 3983 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 3984 3985 mutex_lock(&prog->aux->used_maps_mutex); 3986 ulen = info.nr_map_ids; 3987 info.nr_map_ids = prog->aux->used_map_cnt; 3988 ulen = min_t(u32, info.nr_map_ids, ulen); 3989 if (ulen) { 3990 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 3991 u32 i; 3992 3993 for (i = 0; i < ulen; i++) 3994 if (put_user(prog->aux->used_maps[i]->id, 3995 &user_map_ids[i])) { 3996 mutex_unlock(&prog->aux->used_maps_mutex); 3997 return -EFAULT; 3998 } 3999 } 4000 mutex_unlock(&prog->aux->used_maps_mutex); 4001 4002 err = set_info_rec_size(&info); 4003 if (err) 4004 return err; 4005 4006 bpf_prog_get_stats(prog, &stats); 4007 info.run_time_ns = stats.nsecs; 4008 info.run_cnt = stats.cnt; 4009 info.recursion_misses = stats.misses; 4010 4011 info.verified_insns = prog->aux->verified_insns; 4012 4013 if (!bpf_capable()) { 4014 info.jited_prog_len = 0; 4015 info.xlated_prog_len = 0; 4016 info.nr_jited_ksyms = 0; 4017 info.nr_jited_func_lens = 0; 4018 info.nr_func_info = 0; 4019 info.nr_line_info = 0; 4020 info.nr_jited_line_info = 0; 4021 goto done; 4022 } 4023 4024 ulen = info.xlated_prog_len; 4025 info.xlated_prog_len = bpf_prog_insn_size(prog); 4026 if (info.xlated_prog_len && ulen) { 4027 struct bpf_insn *insns_sanitized; 4028 bool fault; 4029 4030 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { 4031 info.xlated_prog_insns = 0; 4032 goto done; 4033 } 4034 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 4035 if (!insns_sanitized) 4036 return -ENOMEM; 4037 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 4038 ulen = min_t(u32, info.xlated_prog_len, ulen); 4039 fault = copy_to_user(uinsns, insns_sanitized, ulen); 4040 kfree(insns_sanitized); 4041 if (fault) 4042 return -EFAULT; 4043 } 4044 4045 if (bpf_prog_is_offloaded(prog->aux)) { 4046 err = bpf_prog_offload_info_fill(&info, prog); 4047 if (err) 4048 return err; 4049 goto done; 4050 } 4051 4052 /* NOTE: the following code is supposed to be skipped for offload. 4053 * bpf_prog_offload_info_fill() is the place to fill similar fields 4054 * for offload. 4055 */ 4056 ulen = info.jited_prog_len; 4057 if (prog->aux->func_cnt) { 4058 u32 i; 4059 4060 info.jited_prog_len = 0; 4061 for (i = 0; i < prog->aux->func_cnt; i++) 4062 info.jited_prog_len += prog->aux->func[i]->jited_len; 4063 } else { 4064 info.jited_prog_len = prog->jited_len; 4065 } 4066 4067 if (info.jited_prog_len && ulen) { 4068 if (bpf_dump_raw_ok(file->f_cred)) { 4069 uinsns = u64_to_user_ptr(info.jited_prog_insns); 4070 ulen = min_t(u32, info.jited_prog_len, ulen); 4071 4072 /* for multi-function programs, copy the JITed 4073 * instructions for all the functions 4074 */ 4075 if (prog->aux->func_cnt) { 4076 u32 len, free, i; 4077 u8 *img; 4078 4079 free = ulen; 4080 for (i = 0; i < prog->aux->func_cnt; i++) { 4081 len = prog->aux->func[i]->jited_len; 4082 len = min_t(u32, len, free); 4083 img = (u8 *) prog->aux->func[i]->bpf_func; 4084 if (copy_to_user(uinsns, img, len)) 4085 return -EFAULT; 4086 uinsns += len; 4087 free -= len; 4088 if (!free) 4089 break; 4090 } 4091 } else { 4092 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 4093 return -EFAULT; 4094 } 4095 } else { 4096 info.jited_prog_insns = 0; 4097 } 4098 } 4099 4100 ulen = info.nr_jited_ksyms; 4101 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 4102 if (ulen) { 4103 if (bpf_dump_raw_ok(file->f_cred)) { 4104 unsigned long ksym_addr; 4105 u64 __user *user_ksyms; 4106 u32 i; 4107 4108 /* copy the address of the kernel symbol 4109 * corresponding to each function 4110 */ 4111 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 4112 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 4113 if (prog->aux->func_cnt) { 4114 for (i = 0; i < ulen; i++) { 4115 ksym_addr = (unsigned long) 4116 prog->aux->func[i]->bpf_func; 4117 if (put_user((u64) ksym_addr, 4118 &user_ksyms[i])) 4119 return -EFAULT; 4120 } 4121 } else { 4122 ksym_addr = (unsigned long) prog->bpf_func; 4123 if (put_user((u64) ksym_addr, &user_ksyms[0])) 4124 return -EFAULT; 4125 } 4126 } else { 4127 info.jited_ksyms = 0; 4128 } 4129 } 4130 4131 ulen = info.nr_jited_func_lens; 4132 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 4133 if (ulen) { 4134 if (bpf_dump_raw_ok(file->f_cred)) { 4135 u32 __user *user_lens; 4136 u32 func_len, i; 4137 4138 /* copy the JITed image lengths for each function */ 4139 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 4140 user_lens = u64_to_user_ptr(info.jited_func_lens); 4141 if (prog->aux->func_cnt) { 4142 for (i = 0; i < ulen; i++) { 4143 func_len = 4144 prog->aux->func[i]->jited_len; 4145 if (put_user(func_len, &user_lens[i])) 4146 return -EFAULT; 4147 } 4148 } else { 4149 func_len = prog->jited_len; 4150 if (put_user(func_len, &user_lens[0])) 4151 return -EFAULT; 4152 } 4153 } else { 4154 info.jited_func_lens = 0; 4155 } 4156 } 4157 4158 if (prog->aux->btf) 4159 info.btf_id = btf_obj_id(prog->aux->btf); 4160 info.attach_btf_id = prog->aux->attach_btf_id; 4161 if (attach_btf) 4162 info.attach_btf_obj_id = btf_obj_id(attach_btf); 4163 4164 ulen = info.nr_func_info; 4165 info.nr_func_info = prog->aux->func_info_cnt; 4166 if (info.nr_func_info && ulen) { 4167 char __user *user_finfo; 4168 4169 user_finfo = u64_to_user_ptr(info.func_info); 4170 ulen = min_t(u32, info.nr_func_info, ulen); 4171 if (copy_to_user(user_finfo, prog->aux->func_info, 4172 info.func_info_rec_size * ulen)) 4173 return -EFAULT; 4174 } 4175 4176 ulen = info.nr_line_info; 4177 info.nr_line_info = prog->aux->nr_linfo; 4178 if (info.nr_line_info && ulen) { 4179 __u8 __user *user_linfo; 4180 4181 user_linfo = u64_to_user_ptr(info.line_info); 4182 ulen = min_t(u32, info.nr_line_info, ulen); 4183 if (copy_to_user(user_linfo, prog->aux->linfo, 4184 info.line_info_rec_size * ulen)) 4185 return -EFAULT; 4186 } 4187 4188 ulen = info.nr_jited_line_info; 4189 if (prog->aux->jited_linfo) 4190 info.nr_jited_line_info = prog->aux->nr_linfo; 4191 else 4192 info.nr_jited_line_info = 0; 4193 if (info.nr_jited_line_info && ulen) { 4194 if (bpf_dump_raw_ok(file->f_cred)) { 4195 unsigned long line_addr; 4196 __u64 __user *user_linfo; 4197 u32 i; 4198 4199 user_linfo = u64_to_user_ptr(info.jited_line_info); 4200 ulen = min_t(u32, info.nr_jited_line_info, ulen); 4201 for (i = 0; i < ulen; i++) { 4202 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 4203 if (put_user((__u64)line_addr, &user_linfo[i])) 4204 return -EFAULT; 4205 } 4206 } else { 4207 info.jited_line_info = 0; 4208 } 4209 } 4210 4211 ulen = info.nr_prog_tags; 4212 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 4213 if (ulen) { 4214 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 4215 u32 i; 4216 4217 user_prog_tags = u64_to_user_ptr(info.prog_tags); 4218 ulen = min_t(u32, info.nr_prog_tags, ulen); 4219 if (prog->aux->func_cnt) { 4220 for (i = 0; i < ulen; i++) { 4221 if (copy_to_user(user_prog_tags[i], 4222 prog->aux->func[i]->tag, 4223 BPF_TAG_SIZE)) 4224 return -EFAULT; 4225 } 4226 } else { 4227 if (copy_to_user(user_prog_tags[0], 4228 prog->tag, BPF_TAG_SIZE)) 4229 return -EFAULT; 4230 } 4231 } 4232 4233 done: 4234 if (copy_to_user(uinfo, &info, info_len) || 4235 put_user(info_len, &uattr->info.info_len)) 4236 return -EFAULT; 4237 4238 return 0; 4239 } 4240 4241 static int bpf_map_get_info_by_fd(struct file *file, 4242 struct bpf_map *map, 4243 const union bpf_attr *attr, 4244 union bpf_attr __user *uattr) 4245 { 4246 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4247 struct bpf_map_info info; 4248 u32 info_len = attr->info.info_len; 4249 int err; 4250 4251 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4252 if (err) 4253 return err; 4254 info_len = min_t(u32, sizeof(info), info_len); 4255 4256 memset(&info, 0, sizeof(info)); 4257 info.type = map->map_type; 4258 info.id = map->id; 4259 info.key_size = map->key_size; 4260 info.value_size = map->value_size; 4261 info.max_entries = map->max_entries; 4262 info.map_flags = map->map_flags; 4263 info.map_extra = map->map_extra; 4264 memcpy(info.name, map->name, sizeof(map->name)); 4265 4266 if (map->btf) { 4267 info.btf_id = btf_obj_id(map->btf); 4268 info.btf_key_type_id = map->btf_key_type_id; 4269 info.btf_value_type_id = map->btf_value_type_id; 4270 } 4271 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 4272 4273 if (bpf_map_is_offloaded(map)) { 4274 err = bpf_map_offload_info_fill(&info, map); 4275 if (err) 4276 return err; 4277 } 4278 4279 if (copy_to_user(uinfo, &info, info_len) || 4280 put_user(info_len, &uattr->info.info_len)) 4281 return -EFAULT; 4282 4283 return 0; 4284 } 4285 4286 static int bpf_btf_get_info_by_fd(struct file *file, 4287 struct btf *btf, 4288 const union bpf_attr *attr, 4289 union bpf_attr __user *uattr) 4290 { 4291 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4292 u32 info_len = attr->info.info_len; 4293 int err; 4294 4295 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 4296 if (err) 4297 return err; 4298 4299 return btf_get_info_by_fd(btf, attr, uattr); 4300 } 4301 4302 static int bpf_link_get_info_by_fd(struct file *file, 4303 struct bpf_link *link, 4304 const union bpf_attr *attr, 4305 union bpf_attr __user *uattr) 4306 { 4307 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4308 struct bpf_link_info info; 4309 u32 info_len = attr->info.info_len; 4310 int err; 4311 4312 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4313 if (err) 4314 return err; 4315 info_len = min_t(u32, sizeof(info), info_len); 4316 4317 memset(&info, 0, sizeof(info)); 4318 if (copy_from_user(&info, uinfo, info_len)) 4319 return -EFAULT; 4320 4321 info.type = link->type; 4322 info.id = link->id; 4323 if (link->prog) 4324 info.prog_id = link->prog->aux->id; 4325 4326 if (link->ops->fill_link_info) { 4327 err = link->ops->fill_link_info(link, &info); 4328 if (err) 4329 return err; 4330 } 4331 4332 if (copy_to_user(uinfo, &info, info_len) || 4333 put_user(info_len, &uattr->info.info_len)) 4334 return -EFAULT; 4335 4336 return 0; 4337 } 4338 4339 4340 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 4341 4342 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 4343 union bpf_attr __user *uattr) 4344 { 4345 int ufd = attr->info.bpf_fd; 4346 struct fd f; 4347 int err; 4348 4349 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 4350 return -EINVAL; 4351 4352 f = fdget(ufd); 4353 if (!f.file) 4354 return -EBADFD; 4355 4356 if (f.file->f_op == &bpf_prog_fops) 4357 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, 4358 uattr); 4359 else if (f.file->f_op == &bpf_map_fops) 4360 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, 4361 uattr); 4362 else if (f.file->f_op == &btf_fops) 4363 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 4364 else if (f.file->f_op == &bpf_link_fops) 4365 err = bpf_link_get_info_by_fd(f.file, f.file->private_data, 4366 attr, uattr); 4367 else 4368 err = -EINVAL; 4369 4370 fdput(f); 4371 return err; 4372 } 4373 4374 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level 4375 4376 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr) 4377 { 4378 if (CHECK_ATTR(BPF_BTF_LOAD)) 4379 return -EINVAL; 4380 4381 if (!bpf_capable()) 4382 return -EPERM; 4383 4384 return btf_new_fd(attr, uattr); 4385 } 4386 4387 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id 4388 4389 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 4390 { 4391 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 4392 return -EINVAL; 4393 4394 if (!capable(CAP_SYS_ADMIN)) 4395 return -EPERM; 4396 4397 return btf_get_fd_by_id(attr->btf_id); 4398 } 4399 4400 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 4401 union bpf_attr __user *uattr, 4402 u32 prog_id, u32 fd_type, 4403 const char *buf, u64 probe_offset, 4404 u64 probe_addr) 4405 { 4406 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 4407 u32 len = buf ? strlen(buf) : 0, input_len; 4408 int err = 0; 4409 4410 if (put_user(len, &uattr->task_fd_query.buf_len)) 4411 return -EFAULT; 4412 input_len = attr->task_fd_query.buf_len; 4413 if (input_len && ubuf) { 4414 if (!len) { 4415 /* nothing to copy, just make ubuf NULL terminated */ 4416 char zero = '\0'; 4417 4418 if (put_user(zero, ubuf)) 4419 return -EFAULT; 4420 } else if (input_len >= len + 1) { 4421 /* ubuf can hold the string with NULL terminator */ 4422 if (copy_to_user(ubuf, buf, len + 1)) 4423 return -EFAULT; 4424 } else { 4425 /* ubuf cannot hold the string with NULL terminator, 4426 * do a partial copy with NULL terminator. 4427 */ 4428 char zero = '\0'; 4429 4430 err = -ENOSPC; 4431 if (copy_to_user(ubuf, buf, input_len - 1)) 4432 return -EFAULT; 4433 if (put_user(zero, ubuf + input_len - 1)) 4434 return -EFAULT; 4435 } 4436 } 4437 4438 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 4439 put_user(fd_type, &uattr->task_fd_query.fd_type) || 4440 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 4441 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 4442 return -EFAULT; 4443 4444 return err; 4445 } 4446 4447 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 4448 4449 static int bpf_task_fd_query(const union bpf_attr *attr, 4450 union bpf_attr __user *uattr) 4451 { 4452 pid_t pid = attr->task_fd_query.pid; 4453 u32 fd = attr->task_fd_query.fd; 4454 const struct perf_event *event; 4455 struct task_struct *task; 4456 struct file *file; 4457 int err; 4458 4459 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 4460 return -EINVAL; 4461 4462 if (!capable(CAP_SYS_ADMIN)) 4463 return -EPERM; 4464 4465 if (attr->task_fd_query.flags != 0) 4466 return -EINVAL; 4467 4468 rcu_read_lock(); 4469 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 4470 rcu_read_unlock(); 4471 if (!task) 4472 return -ENOENT; 4473 4474 err = 0; 4475 file = fget_task(task, fd); 4476 put_task_struct(task); 4477 if (!file) 4478 return -EBADF; 4479 4480 if (file->f_op == &bpf_link_fops) { 4481 struct bpf_link *link = file->private_data; 4482 4483 if (link->ops == &bpf_raw_tp_link_lops) { 4484 struct bpf_raw_tp_link *raw_tp = 4485 container_of(link, struct bpf_raw_tp_link, link); 4486 struct bpf_raw_event_map *btp = raw_tp->btp; 4487 4488 err = bpf_task_fd_query_copy(attr, uattr, 4489 raw_tp->link.prog->aux->id, 4490 BPF_FD_TYPE_RAW_TRACEPOINT, 4491 btp->tp->name, 0, 0); 4492 goto put_file; 4493 } 4494 goto out_not_supp; 4495 } 4496 4497 event = perf_get_event(file); 4498 if (!IS_ERR(event)) { 4499 u64 probe_offset, probe_addr; 4500 u32 prog_id, fd_type; 4501 const char *buf; 4502 4503 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 4504 &buf, &probe_offset, 4505 &probe_addr); 4506 if (!err) 4507 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 4508 fd_type, buf, 4509 probe_offset, 4510 probe_addr); 4511 goto put_file; 4512 } 4513 4514 out_not_supp: 4515 err = -ENOTSUPP; 4516 put_file: 4517 fput(file); 4518 return err; 4519 } 4520 4521 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 4522 4523 #define BPF_DO_BATCH(fn, ...) \ 4524 do { \ 4525 if (!fn) { \ 4526 err = -ENOTSUPP; \ 4527 goto err_put; \ 4528 } \ 4529 err = fn(__VA_ARGS__); \ 4530 } while (0) 4531 4532 static int bpf_map_do_batch(const union bpf_attr *attr, 4533 union bpf_attr __user *uattr, 4534 int cmd) 4535 { 4536 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 4537 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 4538 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 4539 struct bpf_map *map; 4540 int err, ufd; 4541 struct fd f; 4542 4543 if (CHECK_ATTR(BPF_MAP_BATCH)) 4544 return -EINVAL; 4545 4546 ufd = attr->batch.map_fd; 4547 f = fdget(ufd); 4548 map = __bpf_map_get(f); 4549 if (IS_ERR(map)) 4550 return PTR_ERR(map); 4551 if (has_write) 4552 bpf_map_write_active_inc(map); 4553 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 4554 err = -EPERM; 4555 goto err_put; 4556 } 4557 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 4558 err = -EPERM; 4559 goto err_put; 4560 } 4561 4562 if (cmd == BPF_MAP_LOOKUP_BATCH) 4563 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 4564 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 4565 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 4566 else if (cmd == BPF_MAP_UPDATE_BATCH) 4567 BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr); 4568 else 4569 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 4570 err_put: 4571 if (has_write) 4572 bpf_map_write_active_dec(map); 4573 fdput(f); 4574 return err; 4575 } 4576 4577 #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4578 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4579 { 4580 enum bpf_prog_type ptype; 4581 struct bpf_prog *prog; 4582 int ret; 4583 4584 if (CHECK_ATTR(BPF_LINK_CREATE)) 4585 return -EINVAL; 4586 4587 if (attr->link_create.attach_type == BPF_STRUCT_OPS) 4588 return bpf_struct_ops_link_create(attr); 4589 4590 prog = bpf_prog_get(attr->link_create.prog_fd); 4591 if (IS_ERR(prog)) 4592 return PTR_ERR(prog); 4593 4594 ret = bpf_prog_attach_check_attach_type(prog, 4595 attr->link_create.attach_type); 4596 if (ret) 4597 goto out; 4598 4599 switch (prog->type) { 4600 case BPF_PROG_TYPE_EXT: 4601 break; 4602 case BPF_PROG_TYPE_PERF_EVENT: 4603 case BPF_PROG_TYPE_TRACEPOINT: 4604 if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4605 ret = -EINVAL; 4606 goto out; 4607 } 4608 break; 4609 case BPF_PROG_TYPE_KPROBE: 4610 if (attr->link_create.attach_type != BPF_PERF_EVENT && 4611 attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { 4612 ret = -EINVAL; 4613 goto out; 4614 } 4615 break; 4616 default: 4617 ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4618 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4619 ret = -EINVAL; 4620 goto out; 4621 } 4622 break; 4623 } 4624 4625 switch (prog->type) { 4626 case BPF_PROG_TYPE_CGROUP_SKB: 4627 case BPF_PROG_TYPE_CGROUP_SOCK: 4628 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4629 case BPF_PROG_TYPE_SOCK_OPS: 4630 case BPF_PROG_TYPE_CGROUP_DEVICE: 4631 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4632 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4633 ret = cgroup_bpf_link_attach(attr, prog); 4634 break; 4635 case BPF_PROG_TYPE_EXT: 4636 ret = bpf_tracing_prog_attach(prog, 4637 attr->link_create.target_fd, 4638 attr->link_create.target_btf_id, 4639 attr->link_create.tracing.cookie); 4640 break; 4641 case BPF_PROG_TYPE_LSM: 4642 case BPF_PROG_TYPE_TRACING: 4643 if (attr->link_create.attach_type != prog->expected_attach_type) { 4644 ret = -EINVAL; 4645 goto out; 4646 } 4647 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 4648 ret = bpf_raw_tp_link_attach(prog, NULL); 4649 else if (prog->expected_attach_type == BPF_TRACE_ITER) 4650 ret = bpf_iter_link_attach(attr, uattr, prog); 4651 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 4652 ret = cgroup_bpf_link_attach(attr, prog); 4653 else 4654 ret = bpf_tracing_prog_attach(prog, 4655 attr->link_create.target_fd, 4656 attr->link_create.target_btf_id, 4657 attr->link_create.tracing.cookie); 4658 break; 4659 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4660 case BPF_PROG_TYPE_SK_LOOKUP: 4661 ret = netns_bpf_link_create(attr, prog); 4662 break; 4663 #ifdef CONFIG_NET 4664 case BPF_PROG_TYPE_XDP: 4665 ret = bpf_xdp_link_attach(attr, prog); 4666 break; 4667 #endif 4668 case BPF_PROG_TYPE_PERF_EVENT: 4669 case BPF_PROG_TYPE_TRACEPOINT: 4670 ret = bpf_perf_link_attach(attr, prog); 4671 break; 4672 case BPF_PROG_TYPE_KPROBE: 4673 if (attr->link_create.attach_type == BPF_PERF_EVENT) 4674 ret = bpf_perf_link_attach(attr, prog); 4675 else 4676 ret = bpf_kprobe_multi_link_attach(attr, prog); 4677 break; 4678 default: 4679 ret = -EINVAL; 4680 } 4681 4682 out: 4683 if (ret < 0) 4684 bpf_prog_put(prog); 4685 return ret; 4686 } 4687 4688 static int link_update_map(struct bpf_link *link, union bpf_attr *attr) 4689 { 4690 struct bpf_map *new_map, *old_map = NULL; 4691 int ret; 4692 4693 new_map = bpf_map_get(attr->link_update.new_map_fd); 4694 if (IS_ERR(new_map)) 4695 return PTR_ERR(new_map); 4696 4697 if (attr->link_update.flags & BPF_F_REPLACE) { 4698 old_map = bpf_map_get(attr->link_update.old_map_fd); 4699 if (IS_ERR(old_map)) { 4700 ret = PTR_ERR(old_map); 4701 goto out_put; 4702 } 4703 } else if (attr->link_update.old_map_fd) { 4704 ret = -EINVAL; 4705 goto out_put; 4706 } 4707 4708 ret = link->ops->update_map(link, new_map, old_map); 4709 4710 if (old_map) 4711 bpf_map_put(old_map); 4712 out_put: 4713 bpf_map_put(new_map); 4714 return ret; 4715 } 4716 4717 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 4718 4719 static int link_update(union bpf_attr *attr) 4720 { 4721 struct bpf_prog *old_prog = NULL, *new_prog; 4722 struct bpf_link *link; 4723 u32 flags; 4724 int ret; 4725 4726 if (CHECK_ATTR(BPF_LINK_UPDATE)) 4727 return -EINVAL; 4728 4729 flags = attr->link_update.flags; 4730 if (flags & ~BPF_F_REPLACE) 4731 return -EINVAL; 4732 4733 link = bpf_link_get_from_fd(attr->link_update.link_fd); 4734 if (IS_ERR(link)) 4735 return PTR_ERR(link); 4736 4737 if (link->ops->update_map) { 4738 ret = link_update_map(link, attr); 4739 goto out_put_link; 4740 } 4741 4742 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 4743 if (IS_ERR(new_prog)) { 4744 ret = PTR_ERR(new_prog); 4745 goto out_put_link; 4746 } 4747 4748 if (flags & BPF_F_REPLACE) { 4749 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 4750 if (IS_ERR(old_prog)) { 4751 ret = PTR_ERR(old_prog); 4752 old_prog = NULL; 4753 goto out_put_progs; 4754 } 4755 } else if (attr->link_update.old_prog_fd) { 4756 ret = -EINVAL; 4757 goto out_put_progs; 4758 } 4759 4760 if (link->ops->update_prog) 4761 ret = link->ops->update_prog(link, new_prog, old_prog); 4762 else 4763 ret = -EINVAL; 4764 4765 out_put_progs: 4766 if (old_prog) 4767 bpf_prog_put(old_prog); 4768 if (ret) 4769 bpf_prog_put(new_prog); 4770 out_put_link: 4771 bpf_link_put(link); 4772 return ret; 4773 } 4774 4775 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 4776 4777 static int link_detach(union bpf_attr *attr) 4778 { 4779 struct bpf_link *link; 4780 int ret; 4781 4782 if (CHECK_ATTR(BPF_LINK_DETACH)) 4783 return -EINVAL; 4784 4785 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 4786 if (IS_ERR(link)) 4787 return PTR_ERR(link); 4788 4789 if (link->ops->detach) 4790 ret = link->ops->detach(link); 4791 else 4792 ret = -EOPNOTSUPP; 4793 4794 bpf_link_put(link); 4795 return ret; 4796 } 4797 4798 static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 4799 { 4800 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 4801 } 4802 4803 struct bpf_link *bpf_link_by_id(u32 id) 4804 { 4805 struct bpf_link *link; 4806 4807 if (!id) 4808 return ERR_PTR(-ENOENT); 4809 4810 spin_lock_bh(&link_idr_lock); 4811 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 4812 link = idr_find(&link_idr, id); 4813 if (link) { 4814 if (link->id) 4815 link = bpf_link_inc_not_zero(link); 4816 else 4817 link = ERR_PTR(-EAGAIN); 4818 } else { 4819 link = ERR_PTR(-ENOENT); 4820 } 4821 spin_unlock_bh(&link_idr_lock); 4822 return link; 4823 } 4824 4825 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 4826 { 4827 struct bpf_link *link; 4828 4829 spin_lock_bh(&link_idr_lock); 4830 again: 4831 link = idr_get_next(&link_idr, id); 4832 if (link) { 4833 link = bpf_link_inc_not_zero(link); 4834 if (IS_ERR(link)) { 4835 (*id)++; 4836 goto again; 4837 } 4838 } 4839 spin_unlock_bh(&link_idr_lock); 4840 4841 return link; 4842 } 4843 4844 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 4845 4846 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 4847 { 4848 struct bpf_link *link; 4849 u32 id = attr->link_id; 4850 int fd; 4851 4852 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 4853 return -EINVAL; 4854 4855 if (!capable(CAP_SYS_ADMIN)) 4856 return -EPERM; 4857 4858 link = bpf_link_by_id(id); 4859 if (IS_ERR(link)) 4860 return PTR_ERR(link); 4861 4862 fd = bpf_link_new_fd(link); 4863 if (fd < 0) 4864 bpf_link_put(link); 4865 4866 return fd; 4867 } 4868 4869 DEFINE_MUTEX(bpf_stats_enabled_mutex); 4870 4871 static int bpf_stats_release(struct inode *inode, struct file *file) 4872 { 4873 mutex_lock(&bpf_stats_enabled_mutex); 4874 static_key_slow_dec(&bpf_stats_enabled_key.key); 4875 mutex_unlock(&bpf_stats_enabled_mutex); 4876 return 0; 4877 } 4878 4879 static const struct file_operations bpf_stats_fops = { 4880 .release = bpf_stats_release, 4881 }; 4882 4883 static int bpf_enable_runtime_stats(void) 4884 { 4885 int fd; 4886 4887 mutex_lock(&bpf_stats_enabled_mutex); 4888 4889 /* Set a very high limit to avoid overflow */ 4890 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 4891 mutex_unlock(&bpf_stats_enabled_mutex); 4892 return -EBUSY; 4893 } 4894 4895 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 4896 if (fd >= 0) 4897 static_key_slow_inc(&bpf_stats_enabled_key.key); 4898 4899 mutex_unlock(&bpf_stats_enabled_mutex); 4900 return fd; 4901 } 4902 4903 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 4904 4905 static int bpf_enable_stats(union bpf_attr *attr) 4906 { 4907 4908 if (CHECK_ATTR(BPF_ENABLE_STATS)) 4909 return -EINVAL; 4910 4911 if (!capable(CAP_SYS_ADMIN)) 4912 return -EPERM; 4913 4914 switch (attr->enable_stats.type) { 4915 case BPF_STATS_RUN_TIME: 4916 return bpf_enable_runtime_stats(); 4917 default: 4918 break; 4919 } 4920 return -EINVAL; 4921 } 4922 4923 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 4924 4925 static int bpf_iter_create(union bpf_attr *attr) 4926 { 4927 struct bpf_link *link; 4928 int err; 4929 4930 if (CHECK_ATTR(BPF_ITER_CREATE)) 4931 return -EINVAL; 4932 4933 if (attr->iter_create.flags) 4934 return -EINVAL; 4935 4936 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 4937 if (IS_ERR(link)) 4938 return PTR_ERR(link); 4939 4940 err = bpf_iter_new_fd(link); 4941 bpf_link_put(link); 4942 4943 return err; 4944 } 4945 4946 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 4947 4948 static int bpf_prog_bind_map(union bpf_attr *attr) 4949 { 4950 struct bpf_prog *prog; 4951 struct bpf_map *map; 4952 struct bpf_map **used_maps_old, **used_maps_new; 4953 int i, ret = 0; 4954 4955 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 4956 return -EINVAL; 4957 4958 if (attr->prog_bind_map.flags) 4959 return -EINVAL; 4960 4961 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 4962 if (IS_ERR(prog)) 4963 return PTR_ERR(prog); 4964 4965 map = bpf_map_get(attr->prog_bind_map.map_fd); 4966 if (IS_ERR(map)) { 4967 ret = PTR_ERR(map); 4968 goto out_prog_put; 4969 } 4970 4971 mutex_lock(&prog->aux->used_maps_mutex); 4972 4973 used_maps_old = prog->aux->used_maps; 4974 4975 for (i = 0; i < prog->aux->used_map_cnt; i++) 4976 if (used_maps_old[i] == map) { 4977 bpf_map_put(map); 4978 goto out_unlock; 4979 } 4980 4981 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 4982 sizeof(used_maps_new[0]), 4983 GFP_KERNEL); 4984 if (!used_maps_new) { 4985 ret = -ENOMEM; 4986 goto out_unlock; 4987 } 4988 4989 memcpy(used_maps_new, used_maps_old, 4990 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 4991 used_maps_new[prog->aux->used_map_cnt] = map; 4992 4993 prog->aux->used_map_cnt++; 4994 prog->aux->used_maps = used_maps_new; 4995 4996 kfree(used_maps_old); 4997 4998 out_unlock: 4999 mutex_unlock(&prog->aux->used_maps_mutex); 5000 5001 if (ret) 5002 bpf_map_put(map); 5003 out_prog_put: 5004 bpf_prog_put(prog); 5005 return ret; 5006 } 5007 5008 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) 5009 { 5010 union bpf_attr attr; 5011 bool capable; 5012 int err; 5013 5014 capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; 5015 5016 /* Intent here is for unprivileged_bpf_disabled to block key object 5017 * creation commands for unprivileged users; other actions depend 5018 * of fd availability and access to bpffs, so are dependent on 5019 * object creation success. Capabilities are later verified for 5020 * operations such as load and map create, so even with unprivileged 5021 * BPF disabled, capability checks are still carried out for these 5022 * and other operations. 5023 */ 5024 if (!capable && 5025 (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) 5026 return -EPERM; 5027 5028 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 5029 if (err) 5030 return err; 5031 size = min_t(u32, size, sizeof(attr)); 5032 5033 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 5034 memset(&attr, 0, sizeof(attr)); 5035 if (copy_from_bpfptr(&attr, uattr, size) != 0) 5036 return -EFAULT; 5037 5038 err = security_bpf(cmd, &attr, size); 5039 if (err < 0) 5040 return err; 5041 5042 switch (cmd) { 5043 case BPF_MAP_CREATE: 5044 err = map_create(&attr); 5045 break; 5046 case BPF_MAP_LOOKUP_ELEM: 5047 err = map_lookup_elem(&attr); 5048 break; 5049 case BPF_MAP_UPDATE_ELEM: 5050 err = map_update_elem(&attr, uattr); 5051 break; 5052 case BPF_MAP_DELETE_ELEM: 5053 err = map_delete_elem(&attr, uattr); 5054 break; 5055 case BPF_MAP_GET_NEXT_KEY: 5056 err = map_get_next_key(&attr); 5057 break; 5058 case BPF_MAP_FREEZE: 5059 err = map_freeze(&attr); 5060 break; 5061 case BPF_PROG_LOAD: 5062 err = bpf_prog_load(&attr, uattr); 5063 break; 5064 case BPF_OBJ_PIN: 5065 err = bpf_obj_pin(&attr); 5066 break; 5067 case BPF_OBJ_GET: 5068 err = bpf_obj_get(&attr); 5069 break; 5070 case BPF_PROG_ATTACH: 5071 err = bpf_prog_attach(&attr); 5072 break; 5073 case BPF_PROG_DETACH: 5074 err = bpf_prog_detach(&attr); 5075 break; 5076 case BPF_PROG_QUERY: 5077 err = bpf_prog_query(&attr, uattr.user); 5078 break; 5079 case BPF_PROG_TEST_RUN: 5080 err = bpf_prog_test_run(&attr, uattr.user); 5081 break; 5082 case BPF_PROG_GET_NEXT_ID: 5083 err = bpf_obj_get_next_id(&attr, uattr.user, 5084 &prog_idr, &prog_idr_lock); 5085 break; 5086 case BPF_MAP_GET_NEXT_ID: 5087 err = bpf_obj_get_next_id(&attr, uattr.user, 5088 &map_idr, &map_idr_lock); 5089 break; 5090 case BPF_BTF_GET_NEXT_ID: 5091 err = bpf_obj_get_next_id(&attr, uattr.user, 5092 &btf_idr, &btf_idr_lock); 5093 break; 5094 case BPF_PROG_GET_FD_BY_ID: 5095 err = bpf_prog_get_fd_by_id(&attr); 5096 break; 5097 case BPF_MAP_GET_FD_BY_ID: 5098 err = bpf_map_get_fd_by_id(&attr); 5099 break; 5100 case BPF_OBJ_GET_INFO_BY_FD: 5101 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 5102 break; 5103 case BPF_RAW_TRACEPOINT_OPEN: 5104 err = bpf_raw_tracepoint_open(&attr); 5105 break; 5106 case BPF_BTF_LOAD: 5107 err = bpf_btf_load(&attr, uattr); 5108 break; 5109 case BPF_BTF_GET_FD_BY_ID: 5110 err = bpf_btf_get_fd_by_id(&attr); 5111 break; 5112 case BPF_TASK_FD_QUERY: 5113 err = bpf_task_fd_query(&attr, uattr.user); 5114 break; 5115 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 5116 err = map_lookup_and_delete_elem(&attr); 5117 break; 5118 case BPF_MAP_LOOKUP_BATCH: 5119 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 5120 break; 5121 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 5122 err = bpf_map_do_batch(&attr, uattr.user, 5123 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 5124 break; 5125 case BPF_MAP_UPDATE_BATCH: 5126 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 5127 break; 5128 case BPF_MAP_DELETE_BATCH: 5129 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 5130 break; 5131 case BPF_LINK_CREATE: 5132 err = link_create(&attr, uattr); 5133 break; 5134 case BPF_LINK_UPDATE: 5135 err = link_update(&attr); 5136 break; 5137 case BPF_LINK_GET_FD_BY_ID: 5138 err = bpf_link_get_fd_by_id(&attr); 5139 break; 5140 case BPF_LINK_GET_NEXT_ID: 5141 err = bpf_obj_get_next_id(&attr, uattr.user, 5142 &link_idr, &link_idr_lock); 5143 break; 5144 case BPF_ENABLE_STATS: 5145 err = bpf_enable_stats(&attr); 5146 break; 5147 case BPF_ITER_CREATE: 5148 err = bpf_iter_create(&attr); 5149 break; 5150 case BPF_LINK_DETACH: 5151 err = link_detach(&attr); 5152 break; 5153 case BPF_PROG_BIND_MAP: 5154 err = bpf_prog_bind_map(&attr); 5155 break; 5156 default: 5157 err = -EINVAL; 5158 break; 5159 } 5160 5161 return err; 5162 } 5163 5164 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 5165 { 5166 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 5167 } 5168 5169 static bool syscall_prog_is_valid_access(int off, int size, 5170 enum bpf_access_type type, 5171 const struct bpf_prog *prog, 5172 struct bpf_insn_access_aux *info) 5173 { 5174 if (off < 0 || off >= U16_MAX) 5175 return false; 5176 if (off % size != 0) 5177 return false; 5178 return true; 5179 } 5180 5181 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 5182 { 5183 switch (cmd) { 5184 case BPF_MAP_CREATE: 5185 case BPF_MAP_DELETE_ELEM: 5186 case BPF_MAP_UPDATE_ELEM: 5187 case BPF_MAP_FREEZE: 5188 case BPF_MAP_GET_FD_BY_ID: 5189 case BPF_PROG_LOAD: 5190 case BPF_BTF_LOAD: 5191 case BPF_LINK_CREATE: 5192 case BPF_RAW_TRACEPOINT_OPEN: 5193 break; 5194 default: 5195 return -EINVAL; 5196 } 5197 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 5198 } 5199 5200 5201 /* To shut up -Wmissing-prototypes. 5202 * This function is used by the kernel light skeleton 5203 * to load bpf programs when modules are loaded or during kernel boot. 5204 * See tools/lib/bpf/skel_internal.h 5205 */ 5206 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 5207 5208 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 5209 { 5210 struct bpf_prog * __maybe_unused prog; 5211 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 5212 5213 switch (cmd) { 5214 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 5215 case BPF_PROG_TEST_RUN: 5216 if (attr->test.data_in || attr->test.data_out || 5217 attr->test.ctx_out || attr->test.duration || 5218 attr->test.repeat || attr->test.flags) 5219 return -EINVAL; 5220 5221 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 5222 if (IS_ERR(prog)) 5223 return PTR_ERR(prog); 5224 5225 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 5226 attr->test.ctx_size_in > U16_MAX) { 5227 bpf_prog_put(prog); 5228 return -EINVAL; 5229 } 5230 5231 run_ctx.bpf_cookie = 0; 5232 run_ctx.saved_run_ctx = NULL; 5233 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 5234 /* recursion detected */ 5235 bpf_prog_put(prog); 5236 return -EBUSY; 5237 } 5238 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 5239 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 5240 &run_ctx); 5241 bpf_prog_put(prog); 5242 return 0; 5243 #endif 5244 default: 5245 return ____bpf_sys_bpf(cmd, attr, size); 5246 } 5247 } 5248 EXPORT_SYMBOL(kern_sys_bpf); 5249 5250 static const struct bpf_func_proto bpf_sys_bpf_proto = { 5251 .func = bpf_sys_bpf, 5252 .gpl_only = false, 5253 .ret_type = RET_INTEGER, 5254 .arg1_type = ARG_ANYTHING, 5255 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5256 .arg3_type = ARG_CONST_SIZE, 5257 }; 5258 5259 const struct bpf_func_proto * __weak 5260 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5261 { 5262 return bpf_base_func_proto(func_id); 5263 } 5264 5265 BPF_CALL_1(bpf_sys_close, u32, fd) 5266 { 5267 /* When bpf program calls this helper there should not be 5268 * an fdget() without matching completed fdput(). 5269 * This helper is allowed in the following callchain only: 5270 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 5271 */ 5272 return close_fd(fd); 5273 } 5274 5275 static const struct bpf_func_proto bpf_sys_close_proto = { 5276 .func = bpf_sys_close, 5277 .gpl_only = false, 5278 .ret_type = RET_INTEGER, 5279 .arg1_type = ARG_ANYTHING, 5280 }; 5281 5282 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 5283 { 5284 if (flags) 5285 return -EINVAL; 5286 5287 if (name_sz <= 1 || name[name_sz - 1]) 5288 return -EINVAL; 5289 5290 if (!bpf_dump_raw_ok(current_cred())) 5291 return -EPERM; 5292 5293 *res = kallsyms_lookup_name(name); 5294 return *res ? 0 : -ENOENT; 5295 } 5296 5297 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 5298 .func = bpf_kallsyms_lookup_name, 5299 .gpl_only = false, 5300 .ret_type = RET_INTEGER, 5301 .arg1_type = ARG_PTR_TO_MEM, 5302 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 5303 .arg3_type = ARG_ANYTHING, 5304 .arg4_type = ARG_PTR_TO_LONG, 5305 }; 5306 5307 static const struct bpf_func_proto * 5308 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5309 { 5310 switch (func_id) { 5311 case BPF_FUNC_sys_bpf: 5312 return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; 5313 case BPF_FUNC_btf_find_by_name_kind: 5314 return &bpf_btf_find_by_name_kind_proto; 5315 case BPF_FUNC_sys_close: 5316 return &bpf_sys_close_proto; 5317 case BPF_FUNC_kallsyms_lookup_name: 5318 return &bpf_kallsyms_lookup_name_proto; 5319 default: 5320 return tracing_prog_func_proto(func_id, prog); 5321 } 5322 } 5323 5324 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 5325 .get_func_proto = syscall_prog_func_proto, 5326 .is_valid_access = syscall_prog_is_valid_access, 5327 }; 5328 5329 const struct bpf_prog_ops bpf_syscall_prog_ops = { 5330 .test_run = bpf_prog_test_run_syscall, 5331 }; 5332 5333 #ifdef CONFIG_SYSCTL 5334 static int bpf_stats_handler(struct ctl_table *table, int write, 5335 void *buffer, size_t *lenp, loff_t *ppos) 5336 { 5337 struct static_key *key = (struct static_key *)table->data; 5338 static int saved_val; 5339 int val, ret; 5340 struct ctl_table tmp = { 5341 .data = &val, 5342 .maxlen = sizeof(val), 5343 .mode = table->mode, 5344 .extra1 = SYSCTL_ZERO, 5345 .extra2 = SYSCTL_ONE, 5346 }; 5347 5348 if (write && !capable(CAP_SYS_ADMIN)) 5349 return -EPERM; 5350 5351 mutex_lock(&bpf_stats_enabled_mutex); 5352 val = saved_val; 5353 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 5354 if (write && !ret && val != saved_val) { 5355 if (val) 5356 static_key_slow_inc(key); 5357 else 5358 static_key_slow_dec(key); 5359 saved_val = val; 5360 } 5361 mutex_unlock(&bpf_stats_enabled_mutex); 5362 return ret; 5363 } 5364 5365 void __weak unpriv_ebpf_notify(int new_state) 5366 { 5367 } 5368 5369 static int bpf_unpriv_handler(struct ctl_table *table, int write, 5370 void *buffer, size_t *lenp, loff_t *ppos) 5371 { 5372 int ret, unpriv_enable = *(int *)table->data; 5373 bool locked_state = unpriv_enable == 1; 5374 struct ctl_table tmp = *table; 5375 5376 if (write && !capable(CAP_SYS_ADMIN)) 5377 return -EPERM; 5378 5379 tmp.data = &unpriv_enable; 5380 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 5381 if (write && !ret) { 5382 if (locked_state && unpriv_enable != 1) 5383 return -EPERM; 5384 *(int *)table->data = unpriv_enable; 5385 } 5386 5387 unpriv_ebpf_notify(unpriv_enable); 5388 5389 return ret; 5390 } 5391 5392 static struct ctl_table bpf_syscall_table[] = { 5393 { 5394 .procname = "unprivileged_bpf_disabled", 5395 .data = &sysctl_unprivileged_bpf_disabled, 5396 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 5397 .mode = 0644, 5398 .proc_handler = bpf_unpriv_handler, 5399 .extra1 = SYSCTL_ZERO, 5400 .extra2 = SYSCTL_TWO, 5401 }, 5402 { 5403 .procname = "bpf_stats_enabled", 5404 .data = &bpf_stats_enabled_key.key, 5405 .mode = 0644, 5406 .proc_handler = bpf_stats_handler, 5407 }, 5408 { } 5409 }; 5410 5411 static int __init bpf_syscall_sysctl_init(void) 5412 { 5413 register_sysctl_init("kernel", bpf_syscall_table); 5414 return 0; 5415 } 5416 late_initcall(bpf_syscall_sysctl_init); 5417 #endif /* CONFIG_SYSCTL */ 5418