1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/bpf-cgroup.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/bpf_lirc.h> 8 #include <linux/bpf_verifier.h> 9 #include <linux/bsearch.h> 10 #include <linux/btf.h> 11 #include <linux/syscalls.h> 12 #include <linux/slab.h> 13 #include <linux/sched/signal.h> 14 #include <linux/vmalloc.h> 15 #include <linux/mmzone.h> 16 #include <linux/anon_inodes.h> 17 #include <linux/fdtable.h> 18 #include <linux/file.h> 19 #include <linux/fs.h> 20 #include <linux/license.h> 21 #include <linux/filter.h> 22 #include <linux/kernel.h> 23 #include <linux/idr.h> 24 #include <linux/cred.h> 25 #include <linux/timekeeping.h> 26 #include <linux/ctype.h> 27 #include <linux/nospec.h> 28 #include <linux/audit.h> 29 #include <uapi/linux/btf.h> 30 #include <linux/pgtable.h> 31 #include <linux/bpf_lsm.h> 32 #include <linux/poll.h> 33 #include <linux/sort.h> 34 #include <linux/bpf-netns.h> 35 #include <linux/rcupdate_trace.h> 36 #include <linux/memcontrol.h> 37 #include <linux/trace_events.h> 38 39 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 40 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 41 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 42 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 43 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 44 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 45 IS_FD_HASH(map)) 46 47 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 48 49 DEFINE_PER_CPU(int, bpf_prog_active); 50 static DEFINE_IDR(prog_idr); 51 static DEFINE_SPINLOCK(prog_idr_lock); 52 static DEFINE_IDR(map_idr); 53 static DEFINE_SPINLOCK(map_idr_lock); 54 static DEFINE_IDR(link_idr); 55 static DEFINE_SPINLOCK(link_idr_lock); 56 57 int sysctl_unprivileged_bpf_disabled __read_mostly = 58 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 59 60 static const struct bpf_map_ops * const bpf_map_types[] = { 61 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 62 #define BPF_MAP_TYPE(_id, _ops) \ 63 [_id] = &_ops, 64 #define BPF_LINK_TYPE(_id, _name) 65 #include <linux/bpf_types.h> 66 #undef BPF_PROG_TYPE 67 #undef BPF_MAP_TYPE 68 #undef BPF_LINK_TYPE 69 }; 70 71 /* 72 * If we're handed a bigger struct than we know of, ensure all the unknown bits 73 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 74 * we don't know about yet. 75 * 76 * There is a ToCToU between this function call and the following 77 * copy_from_user() call. However, this is not a concern since this function is 78 * meant to be a future-proofing of bits. 79 */ 80 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 81 size_t expected_size, 82 size_t actual_size) 83 { 84 int res; 85 86 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 87 return -E2BIG; 88 89 if (actual_size <= expected_size) 90 return 0; 91 92 if (uaddr.is_kernel) 93 res = memchr_inv(uaddr.kernel + expected_size, 0, 94 actual_size - expected_size) == NULL; 95 else 96 res = check_zeroed_user(uaddr.user + expected_size, 97 actual_size - expected_size); 98 if (res < 0) 99 return res; 100 return res ? 0 : -E2BIG; 101 } 102 103 const struct bpf_map_ops bpf_map_offload_ops = { 104 .map_meta_equal = bpf_map_meta_equal, 105 .map_alloc = bpf_map_offload_map_alloc, 106 .map_free = bpf_map_offload_map_free, 107 .map_check_btf = map_check_no_btf, 108 }; 109 110 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 111 { 112 const struct bpf_map_ops *ops; 113 u32 type = attr->map_type; 114 struct bpf_map *map; 115 int err; 116 117 if (type >= ARRAY_SIZE(bpf_map_types)) 118 return ERR_PTR(-EINVAL); 119 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); 120 ops = bpf_map_types[type]; 121 if (!ops) 122 return ERR_PTR(-EINVAL); 123 124 if (ops->map_alloc_check) { 125 err = ops->map_alloc_check(attr); 126 if (err) 127 return ERR_PTR(err); 128 } 129 if (attr->map_ifindex) 130 ops = &bpf_map_offload_ops; 131 map = ops->map_alloc(attr); 132 if (IS_ERR(map)) 133 return map; 134 map->ops = ops; 135 map->map_type = type; 136 return map; 137 } 138 139 static void bpf_map_write_active_inc(struct bpf_map *map) 140 { 141 atomic64_inc(&map->writecnt); 142 } 143 144 static void bpf_map_write_active_dec(struct bpf_map *map) 145 { 146 atomic64_dec(&map->writecnt); 147 } 148 149 bool bpf_map_write_active(const struct bpf_map *map) 150 { 151 return atomic64_read(&map->writecnt) != 0; 152 } 153 154 static u32 bpf_map_value_size(const struct bpf_map *map) 155 { 156 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 157 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 158 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 159 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 160 return round_up(map->value_size, 8) * num_possible_cpus(); 161 else if (IS_FD_MAP(map)) 162 return sizeof(u32); 163 else 164 return map->value_size; 165 } 166 167 static void maybe_wait_bpf_programs(struct bpf_map *map) 168 { 169 /* Wait for any running BPF programs to complete so that 170 * userspace, when we return to it, knows that all programs 171 * that could be running use the new map value. 172 */ 173 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 174 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 175 synchronize_rcu(); 176 } 177 178 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, 179 void *key, void *value, __u64 flags) 180 { 181 int err; 182 183 /* Need to create a kthread, thus must support schedule */ 184 if (bpf_map_is_offloaded(map)) { 185 return bpf_map_offload_update_elem(map, key, value, flags); 186 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 187 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 188 return map->ops->map_update_elem(map, key, value, flags); 189 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 190 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 191 return sock_map_update_elem_sys(map, key, value, flags); 192 } else if (IS_FD_PROG_ARRAY(map)) { 193 return bpf_fd_array_map_update_elem(map, map_file, key, value, 194 flags); 195 } 196 197 bpf_disable_instrumentation(); 198 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 199 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 200 err = bpf_percpu_hash_update(map, key, value, flags); 201 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 202 err = bpf_percpu_array_update(map, key, value, flags); 203 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 204 err = bpf_percpu_cgroup_storage_update(map, key, value, 205 flags); 206 } else if (IS_FD_ARRAY(map)) { 207 rcu_read_lock(); 208 err = bpf_fd_array_map_update_elem(map, map_file, key, value, 209 flags); 210 rcu_read_unlock(); 211 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 212 rcu_read_lock(); 213 err = bpf_fd_htab_map_update_elem(map, map_file, key, value, 214 flags); 215 rcu_read_unlock(); 216 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 217 /* rcu_read_lock() is not needed */ 218 err = bpf_fd_reuseport_array_update_elem(map, key, value, 219 flags); 220 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 221 map->map_type == BPF_MAP_TYPE_STACK || 222 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 223 err = map->ops->map_push_elem(map, value, flags); 224 } else { 225 rcu_read_lock(); 226 err = map->ops->map_update_elem(map, key, value, flags); 227 rcu_read_unlock(); 228 } 229 bpf_enable_instrumentation(); 230 maybe_wait_bpf_programs(map); 231 232 return err; 233 } 234 235 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 236 __u64 flags) 237 { 238 void *ptr; 239 int err; 240 241 if (bpf_map_is_offloaded(map)) 242 return bpf_map_offload_lookup_elem(map, key, value); 243 244 bpf_disable_instrumentation(); 245 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 246 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 247 err = bpf_percpu_hash_copy(map, key, value); 248 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 249 err = bpf_percpu_array_copy(map, key, value); 250 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 251 err = bpf_percpu_cgroup_storage_copy(map, key, value); 252 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 253 err = bpf_stackmap_copy(map, key, value); 254 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 255 err = bpf_fd_array_map_lookup_elem(map, key, value); 256 } else if (IS_FD_HASH(map)) { 257 err = bpf_fd_htab_map_lookup_elem(map, key, value); 258 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 259 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 260 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 261 map->map_type == BPF_MAP_TYPE_STACK || 262 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 263 err = map->ops->map_peek_elem(map, value); 264 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 265 /* struct_ops map requires directly updating "value" */ 266 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 267 } else { 268 rcu_read_lock(); 269 if (map->ops->map_lookup_elem_sys_only) 270 ptr = map->ops->map_lookup_elem_sys_only(map, key); 271 else 272 ptr = map->ops->map_lookup_elem(map, key); 273 if (IS_ERR(ptr)) { 274 err = PTR_ERR(ptr); 275 } else if (!ptr) { 276 err = -ENOENT; 277 } else { 278 err = 0; 279 if (flags & BPF_F_LOCK) 280 /* lock 'ptr' and copy everything but lock */ 281 copy_map_value_locked(map, value, ptr, true); 282 else 283 copy_map_value(map, value, ptr); 284 /* mask lock and timer, since value wasn't zero inited */ 285 check_and_init_map_value(map, value); 286 } 287 rcu_read_unlock(); 288 } 289 290 bpf_enable_instrumentation(); 291 maybe_wait_bpf_programs(map); 292 293 return err; 294 } 295 296 /* Please, do not use this function outside from the map creation path 297 * (e.g. in map update path) without taking care of setting the active 298 * memory cgroup (see at bpf_map_kmalloc_node() for example). 299 */ 300 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 301 { 302 /* We really just want to fail instead of triggering OOM killer 303 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 304 * which is used for lower order allocation requests. 305 * 306 * It has been observed that higher order allocation requests done by 307 * vmalloc with __GFP_NORETRY being set might fail due to not trying 308 * to reclaim memory from the page cache, thus we set 309 * __GFP_RETRY_MAYFAIL to avoid such situations. 310 */ 311 312 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO); 313 unsigned int flags = 0; 314 unsigned long align = 1; 315 void *area; 316 317 if (size >= SIZE_MAX) 318 return NULL; 319 320 /* kmalloc()'ed memory can't be mmap()'ed */ 321 if (mmapable) { 322 BUG_ON(!PAGE_ALIGNED(size)); 323 align = SHMLBA; 324 flags = VM_USERMAP; 325 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 326 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 327 numa_node); 328 if (area != NULL) 329 return area; 330 } 331 332 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 333 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 334 flags, numa_node, __builtin_return_address(0)); 335 } 336 337 void *bpf_map_area_alloc(u64 size, int numa_node) 338 { 339 return __bpf_map_area_alloc(size, numa_node, false); 340 } 341 342 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 343 { 344 return __bpf_map_area_alloc(size, numa_node, true); 345 } 346 347 void bpf_map_area_free(void *area) 348 { 349 kvfree(area); 350 } 351 352 static u32 bpf_map_flags_retain_permanent(u32 flags) 353 { 354 /* Some map creation flags are not tied to the map object but 355 * rather to the map fd instead, so they have no meaning upon 356 * map object inspection since multiple file descriptors with 357 * different (access) properties can exist here. Thus, given 358 * this has zero meaning for the map itself, lets clear these 359 * from here. 360 */ 361 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 362 } 363 364 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 365 { 366 map->map_type = attr->map_type; 367 map->key_size = attr->key_size; 368 map->value_size = attr->value_size; 369 map->max_entries = attr->max_entries; 370 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 371 map->numa_node = bpf_map_attr_numa_node(attr); 372 map->map_extra = attr->map_extra; 373 } 374 375 static int bpf_map_alloc_id(struct bpf_map *map) 376 { 377 int id; 378 379 idr_preload(GFP_KERNEL); 380 spin_lock_bh(&map_idr_lock); 381 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 382 if (id > 0) 383 map->id = id; 384 spin_unlock_bh(&map_idr_lock); 385 idr_preload_end(); 386 387 if (WARN_ON_ONCE(!id)) 388 return -ENOSPC; 389 390 return id > 0 ? 0 : id; 391 } 392 393 void bpf_map_free_id(struct bpf_map *map) 394 { 395 unsigned long flags; 396 397 /* Offloaded maps are removed from the IDR store when their device 398 * disappears - even if someone holds an fd to them they are unusable, 399 * the memory is gone, all ops will fail; they are simply waiting for 400 * refcnt to drop to be freed. 401 */ 402 if (!map->id) 403 return; 404 405 spin_lock_irqsave(&map_idr_lock, flags); 406 407 idr_remove(&map_idr, map->id); 408 map->id = 0; 409 410 spin_unlock_irqrestore(&map_idr_lock, flags); 411 } 412 413 #ifdef CONFIG_MEMCG_KMEM 414 static void bpf_map_save_memcg(struct bpf_map *map) 415 { 416 /* Currently if a map is created by a process belonging to the root 417 * memory cgroup, get_obj_cgroup_from_current() will return NULL. 418 * So we have to check map->objcg for being NULL each time it's 419 * being used. 420 */ 421 if (memcg_bpf_enabled()) 422 map->objcg = get_obj_cgroup_from_current(); 423 } 424 425 static void bpf_map_release_memcg(struct bpf_map *map) 426 { 427 if (map->objcg) 428 obj_cgroup_put(map->objcg); 429 } 430 431 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) 432 { 433 if (map->objcg) 434 return get_mem_cgroup_from_objcg(map->objcg); 435 436 return root_mem_cgroup; 437 } 438 439 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 440 int node) 441 { 442 struct mem_cgroup *memcg, *old_memcg; 443 void *ptr; 444 445 memcg = bpf_map_get_memcg(map); 446 old_memcg = set_active_memcg(memcg); 447 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 448 set_active_memcg(old_memcg); 449 mem_cgroup_put(memcg); 450 451 return ptr; 452 } 453 454 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 455 { 456 struct mem_cgroup *memcg, *old_memcg; 457 void *ptr; 458 459 memcg = bpf_map_get_memcg(map); 460 old_memcg = set_active_memcg(memcg); 461 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 462 set_active_memcg(old_memcg); 463 mem_cgroup_put(memcg); 464 465 return ptr; 466 } 467 468 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 469 gfp_t flags) 470 { 471 struct mem_cgroup *memcg, *old_memcg; 472 void *ptr; 473 474 memcg = bpf_map_get_memcg(map); 475 old_memcg = set_active_memcg(memcg); 476 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 477 set_active_memcg(old_memcg); 478 mem_cgroup_put(memcg); 479 480 return ptr; 481 } 482 483 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 484 size_t align, gfp_t flags) 485 { 486 struct mem_cgroup *memcg, *old_memcg; 487 void __percpu *ptr; 488 489 memcg = bpf_map_get_memcg(map); 490 old_memcg = set_active_memcg(memcg); 491 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 492 set_active_memcg(old_memcg); 493 mem_cgroup_put(memcg); 494 495 return ptr; 496 } 497 498 #else 499 static void bpf_map_save_memcg(struct bpf_map *map) 500 { 501 } 502 503 static void bpf_map_release_memcg(struct bpf_map *map) 504 { 505 } 506 #endif 507 508 static int btf_field_cmp(const void *a, const void *b) 509 { 510 const struct btf_field *f1 = a, *f2 = b; 511 512 if (f1->offset < f2->offset) 513 return -1; 514 else if (f1->offset > f2->offset) 515 return 1; 516 return 0; 517 } 518 519 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset, 520 enum btf_field_type type) 521 { 522 struct btf_field *field; 523 524 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type)) 525 return NULL; 526 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp); 527 if (!field || !(field->type & type)) 528 return NULL; 529 return field; 530 } 531 532 void btf_record_free(struct btf_record *rec) 533 { 534 int i; 535 536 if (IS_ERR_OR_NULL(rec)) 537 return; 538 for (i = 0; i < rec->cnt; i++) { 539 switch (rec->fields[i].type) { 540 case BPF_KPTR_UNREF: 541 case BPF_KPTR_REF: 542 if (rec->fields[i].kptr.module) 543 module_put(rec->fields[i].kptr.module); 544 btf_put(rec->fields[i].kptr.btf); 545 break; 546 case BPF_LIST_HEAD: 547 case BPF_LIST_NODE: 548 case BPF_RB_ROOT: 549 case BPF_RB_NODE: 550 case BPF_SPIN_LOCK: 551 case BPF_TIMER: 552 /* Nothing to release */ 553 break; 554 default: 555 WARN_ON_ONCE(1); 556 continue; 557 } 558 } 559 kfree(rec); 560 } 561 562 void bpf_map_free_record(struct bpf_map *map) 563 { 564 btf_record_free(map->record); 565 map->record = NULL; 566 } 567 568 struct btf_record *btf_record_dup(const struct btf_record *rec) 569 { 570 const struct btf_field *fields; 571 struct btf_record *new_rec; 572 int ret, size, i; 573 574 if (IS_ERR_OR_NULL(rec)) 575 return NULL; 576 size = offsetof(struct btf_record, fields[rec->cnt]); 577 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN); 578 if (!new_rec) 579 return ERR_PTR(-ENOMEM); 580 /* Do a deep copy of the btf_record */ 581 fields = rec->fields; 582 new_rec->cnt = 0; 583 for (i = 0; i < rec->cnt; i++) { 584 switch (fields[i].type) { 585 case BPF_KPTR_UNREF: 586 case BPF_KPTR_REF: 587 btf_get(fields[i].kptr.btf); 588 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { 589 ret = -ENXIO; 590 goto free; 591 } 592 break; 593 case BPF_LIST_HEAD: 594 case BPF_LIST_NODE: 595 case BPF_RB_ROOT: 596 case BPF_RB_NODE: 597 case BPF_SPIN_LOCK: 598 case BPF_TIMER: 599 /* Nothing to acquire */ 600 break; 601 default: 602 ret = -EFAULT; 603 WARN_ON_ONCE(1); 604 goto free; 605 } 606 new_rec->cnt++; 607 } 608 return new_rec; 609 free: 610 btf_record_free(new_rec); 611 return ERR_PTR(ret); 612 } 613 614 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b) 615 { 616 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b); 617 int size; 618 619 if (!a_has_fields && !b_has_fields) 620 return true; 621 if (a_has_fields != b_has_fields) 622 return false; 623 if (rec_a->cnt != rec_b->cnt) 624 return false; 625 size = offsetof(struct btf_record, fields[rec_a->cnt]); 626 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused 627 * members are zeroed out. So memcmp is safe to do without worrying 628 * about padding/unused fields. 629 * 630 * While spin_lock, timer, and kptr have no relation to map BTF, 631 * list_head metadata is specific to map BTF, the btf and value_rec 632 * members in particular. btf is the map BTF, while value_rec points to 633 * btf_record in that map BTF. 634 * 635 * So while by default, we don't rely on the map BTF (which the records 636 * were parsed from) matching for both records, which is not backwards 637 * compatible, in case list_head is part of it, we implicitly rely on 638 * that by way of depending on memcmp succeeding for it. 639 */ 640 return !memcmp(rec_a, rec_b, size); 641 } 642 643 void bpf_obj_free_timer(const struct btf_record *rec, void *obj) 644 { 645 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER))) 646 return; 647 bpf_timer_cancel_and_free(obj + rec->timer_off); 648 } 649 650 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 651 { 652 const struct btf_field *fields; 653 int i; 654 655 if (IS_ERR_OR_NULL(rec)) 656 return; 657 fields = rec->fields; 658 for (i = 0; i < rec->cnt; i++) { 659 const struct btf_field *field = &fields[i]; 660 void *field_ptr = obj + field->offset; 661 662 switch (fields[i].type) { 663 case BPF_SPIN_LOCK: 664 break; 665 case BPF_TIMER: 666 bpf_timer_cancel_and_free(field_ptr); 667 break; 668 case BPF_KPTR_UNREF: 669 WRITE_ONCE(*(u64 *)field_ptr, 0); 670 break; 671 case BPF_KPTR_REF: 672 field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0)); 673 break; 674 case BPF_LIST_HEAD: 675 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 676 continue; 677 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); 678 break; 679 case BPF_RB_ROOT: 680 if (WARN_ON_ONCE(rec->spin_lock_off < 0)) 681 continue; 682 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off); 683 break; 684 case BPF_LIST_NODE: 685 case BPF_RB_NODE: 686 break; 687 default: 688 WARN_ON_ONCE(1); 689 continue; 690 } 691 } 692 } 693 694 /* called from workqueue */ 695 static void bpf_map_free_deferred(struct work_struct *work) 696 { 697 struct bpf_map *map = container_of(work, struct bpf_map, work); 698 struct btf_field_offs *foffs = map->field_offs; 699 struct btf_record *rec = map->record; 700 701 security_bpf_map_free(map); 702 bpf_map_release_memcg(map); 703 /* implementation dependent freeing */ 704 map->ops->map_free(map); 705 /* Delay freeing of field_offs and btf_record for maps, as map_free 706 * callback usually needs access to them. It is better to do it here 707 * than require each callback to do the free itself manually. 708 * 709 * Note that the btf_record stashed in map->inner_map_meta->record was 710 * already freed using the map_free callback for map in map case which 711 * eventually calls bpf_map_free_meta, since inner_map_meta is only a 712 * template bpf_map struct used during verification. 713 */ 714 kfree(foffs); 715 btf_record_free(rec); 716 } 717 718 static void bpf_map_put_uref(struct bpf_map *map) 719 { 720 if (atomic64_dec_and_test(&map->usercnt)) { 721 if (map->ops->map_release_uref) 722 map->ops->map_release_uref(map); 723 } 724 } 725 726 /* decrement map refcnt and schedule it for freeing via workqueue 727 * (underlying map implementation ops->map_free() might sleep) 728 */ 729 void bpf_map_put(struct bpf_map *map) 730 { 731 if (atomic64_dec_and_test(&map->refcnt)) { 732 /* bpf_map_free_id() must be called first */ 733 bpf_map_free_id(map); 734 btf_put(map->btf); 735 INIT_WORK(&map->work, bpf_map_free_deferred); 736 /* Avoid spawning kworkers, since they all might contend 737 * for the same mutex like slab_mutex. 738 */ 739 queue_work(system_unbound_wq, &map->work); 740 } 741 } 742 EXPORT_SYMBOL_GPL(bpf_map_put); 743 744 void bpf_map_put_with_uref(struct bpf_map *map) 745 { 746 bpf_map_put_uref(map); 747 bpf_map_put(map); 748 } 749 750 static int bpf_map_release(struct inode *inode, struct file *filp) 751 { 752 struct bpf_map *map = filp->private_data; 753 754 if (map->ops->map_release) 755 map->ops->map_release(map, filp); 756 757 bpf_map_put_with_uref(map); 758 return 0; 759 } 760 761 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 762 { 763 fmode_t mode = f.file->f_mode; 764 765 /* Our file permissions may have been overridden by global 766 * map permissions facing syscall side. 767 */ 768 if (READ_ONCE(map->frozen)) 769 mode &= ~FMODE_CAN_WRITE; 770 return mode; 771 } 772 773 #ifdef CONFIG_PROC_FS 774 /* Provides an approximation of the map's memory footprint. 775 * Used only to provide a backward compatibility and display 776 * a reasonable "memlock" info. 777 */ 778 static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) 779 { 780 unsigned long size; 781 782 size = round_up(map->key_size + bpf_map_value_size(map), 8); 783 784 return round_up(map->max_entries * size, PAGE_SIZE); 785 } 786 787 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 788 { 789 struct bpf_map *map = filp->private_data; 790 u32 type = 0, jited = 0; 791 792 if (map_type_contains_progs(map)) { 793 spin_lock(&map->owner.lock); 794 type = map->owner.type; 795 jited = map->owner.jited; 796 spin_unlock(&map->owner.lock); 797 } 798 799 seq_printf(m, 800 "map_type:\t%u\n" 801 "key_size:\t%u\n" 802 "value_size:\t%u\n" 803 "max_entries:\t%u\n" 804 "map_flags:\t%#x\n" 805 "map_extra:\t%#llx\n" 806 "memlock:\t%lu\n" 807 "map_id:\t%u\n" 808 "frozen:\t%u\n", 809 map->map_type, 810 map->key_size, 811 map->value_size, 812 map->max_entries, 813 map->map_flags, 814 (unsigned long long)map->map_extra, 815 bpf_map_memory_footprint(map), 816 map->id, 817 READ_ONCE(map->frozen)); 818 if (type) { 819 seq_printf(m, "owner_prog_type:\t%u\n", type); 820 seq_printf(m, "owner_jited:\t%u\n", jited); 821 } 822 } 823 #endif 824 825 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 826 loff_t *ppos) 827 { 828 /* We need this handler such that alloc_file() enables 829 * f_mode with FMODE_CAN_READ. 830 */ 831 return -EINVAL; 832 } 833 834 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 835 size_t siz, loff_t *ppos) 836 { 837 /* We need this handler such that alloc_file() enables 838 * f_mode with FMODE_CAN_WRITE. 839 */ 840 return -EINVAL; 841 } 842 843 /* called for any extra memory-mapped regions (except initial) */ 844 static void bpf_map_mmap_open(struct vm_area_struct *vma) 845 { 846 struct bpf_map *map = vma->vm_file->private_data; 847 848 if (vma->vm_flags & VM_MAYWRITE) 849 bpf_map_write_active_inc(map); 850 } 851 852 /* called for all unmapped memory region (including initial) */ 853 static void bpf_map_mmap_close(struct vm_area_struct *vma) 854 { 855 struct bpf_map *map = vma->vm_file->private_data; 856 857 if (vma->vm_flags & VM_MAYWRITE) 858 bpf_map_write_active_dec(map); 859 } 860 861 static const struct vm_operations_struct bpf_map_default_vmops = { 862 .open = bpf_map_mmap_open, 863 .close = bpf_map_mmap_close, 864 }; 865 866 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 867 { 868 struct bpf_map *map = filp->private_data; 869 int err; 870 871 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) 872 return -ENOTSUPP; 873 874 if (!(vma->vm_flags & VM_SHARED)) 875 return -EINVAL; 876 877 mutex_lock(&map->freeze_mutex); 878 879 if (vma->vm_flags & VM_WRITE) { 880 if (map->frozen) { 881 err = -EPERM; 882 goto out; 883 } 884 /* map is meant to be read-only, so do not allow mapping as 885 * writable, because it's possible to leak a writable page 886 * reference and allows user-space to still modify it after 887 * freezing, while verifier will assume contents do not change 888 */ 889 if (map->map_flags & BPF_F_RDONLY_PROG) { 890 err = -EACCES; 891 goto out; 892 } 893 } 894 895 /* set default open/close callbacks */ 896 vma->vm_ops = &bpf_map_default_vmops; 897 vma->vm_private_data = map; 898 vm_flags_clear(vma, VM_MAYEXEC); 899 if (!(vma->vm_flags & VM_WRITE)) 900 /* disallow re-mapping with PROT_WRITE */ 901 vm_flags_clear(vma, VM_MAYWRITE); 902 903 err = map->ops->map_mmap(map, vma); 904 if (err) 905 goto out; 906 907 if (vma->vm_flags & VM_MAYWRITE) 908 bpf_map_write_active_inc(map); 909 out: 910 mutex_unlock(&map->freeze_mutex); 911 return err; 912 } 913 914 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 915 { 916 struct bpf_map *map = filp->private_data; 917 918 if (map->ops->map_poll) 919 return map->ops->map_poll(map, filp, pts); 920 921 return EPOLLERR; 922 } 923 924 const struct file_operations bpf_map_fops = { 925 #ifdef CONFIG_PROC_FS 926 .show_fdinfo = bpf_map_show_fdinfo, 927 #endif 928 .release = bpf_map_release, 929 .read = bpf_dummy_read, 930 .write = bpf_dummy_write, 931 .mmap = bpf_map_mmap, 932 .poll = bpf_map_poll, 933 }; 934 935 int bpf_map_new_fd(struct bpf_map *map, int flags) 936 { 937 int ret; 938 939 ret = security_bpf_map(map, OPEN_FMODE(flags)); 940 if (ret < 0) 941 return ret; 942 943 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 944 flags | O_CLOEXEC); 945 } 946 947 int bpf_get_file_flag(int flags) 948 { 949 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 950 return -EINVAL; 951 if (flags & BPF_F_RDONLY) 952 return O_RDONLY; 953 if (flags & BPF_F_WRONLY) 954 return O_WRONLY; 955 return O_RDWR; 956 } 957 958 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 959 #define CHECK_ATTR(CMD) \ 960 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 961 sizeof(attr->CMD##_LAST_FIELD), 0, \ 962 sizeof(*attr) - \ 963 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 964 sizeof(attr->CMD##_LAST_FIELD)) != NULL 965 966 /* dst and src must have at least "size" number of bytes. 967 * Return strlen on success and < 0 on error. 968 */ 969 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 970 { 971 const char *end = src + size; 972 const char *orig_src = src; 973 974 memset(dst, 0, size); 975 /* Copy all isalnum(), '_' and '.' chars. */ 976 while (src < end && *src) { 977 if (!isalnum(*src) && 978 *src != '_' && *src != '.') 979 return -EINVAL; 980 *dst++ = *src++; 981 } 982 983 /* No '\0' found in "size" number of bytes */ 984 if (src == end) 985 return -EINVAL; 986 987 return src - orig_src; 988 } 989 990 int map_check_no_btf(const struct bpf_map *map, 991 const struct btf *btf, 992 const struct btf_type *key_type, 993 const struct btf_type *value_type) 994 { 995 return -ENOTSUPP; 996 } 997 998 static int map_check_btf(struct bpf_map *map, const struct btf *btf, 999 u32 btf_key_id, u32 btf_value_id) 1000 { 1001 const struct btf_type *key_type, *value_type; 1002 u32 key_size, value_size; 1003 int ret = 0; 1004 1005 /* Some maps allow key to be unspecified. */ 1006 if (btf_key_id) { 1007 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 1008 if (!key_type || key_size != map->key_size) 1009 return -EINVAL; 1010 } else { 1011 key_type = btf_type_by_id(btf, 0); 1012 if (!map->ops->map_check_btf) 1013 return -EINVAL; 1014 } 1015 1016 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 1017 if (!value_type || value_size != map->value_size) 1018 return -EINVAL; 1019 1020 map->record = btf_parse_fields(btf, value_type, 1021 BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1022 BPF_RB_ROOT, 1023 map->value_size); 1024 if (!IS_ERR_OR_NULL(map->record)) { 1025 int i; 1026 1027 if (!bpf_capable()) { 1028 ret = -EPERM; 1029 goto free_map_tab; 1030 } 1031 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 1032 ret = -EACCES; 1033 goto free_map_tab; 1034 } 1035 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) { 1036 switch (map->record->field_mask & (1 << i)) { 1037 case 0: 1038 continue; 1039 case BPF_SPIN_LOCK: 1040 if (map->map_type != BPF_MAP_TYPE_HASH && 1041 map->map_type != BPF_MAP_TYPE_ARRAY && 1042 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 1043 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1044 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1045 map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1046 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1047 ret = -EOPNOTSUPP; 1048 goto free_map_tab; 1049 } 1050 break; 1051 case BPF_TIMER: 1052 if (map->map_type != BPF_MAP_TYPE_HASH && 1053 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1054 map->map_type != BPF_MAP_TYPE_ARRAY) { 1055 ret = -EOPNOTSUPP; 1056 goto free_map_tab; 1057 } 1058 break; 1059 case BPF_KPTR_UNREF: 1060 case BPF_KPTR_REF: 1061 if (map->map_type != BPF_MAP_TYPE_HASH && 1062 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1063 map->map_type != BPF_MAP_TYPE_ARRAY && 1064 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) { 1065 ret = -EOPNOTSUPP; 1066 goto free_map_tab; 1067 } 1068 break; 1069 case BPF_LIST_HEAD: 1070 case BPF_RB_ROOT: 1071 if (map->map_type != BPF_MAP_TYPE_HASH && 1072 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1073 map->map_type != BPF_MAP_TYPE_ARRAY) { 1074 ret = -EOPNOTSUPP; 1075 goto free_map_tab; 1076 } 1077 break; 1078 default: 1079 /* Fail if map_type checks are missing for a field type */ 1080 ret = -EOPNOTSUPP; 1081 goto free_map_tab; 1082 } 1083 } 1084 } 1085 1086 ret = btf_check_and_fixup_fields(btf, map->record); 1087 if (ret < 0) 1088 goto free_map_tab; 1089 1090 if (map->ops->map_check_btf) { 1091 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 1092 if (ret < 0) 1093 goto free_map_tab; 1094 } 1095 1096 return ret; 1097 free_map_tab: 1098 bpf_map_free_record(map); 1099 return ret; 1100 } 1101 1102 #define BPF_MAP_CREATE_LAST_FIELD map_extra 1103 /* called via syscall */ 1104 static int map_create(union bpf_attr *attr) 1105 { 1106 int numa_node = bpf_map_attr_numa_node(attr); 1107 struct btf_field_offs *foffs; 1108 struct bpf_map *map; 1109 int f_flags; 1110 int err; 1111 1112 err = CHECK_ATTR(BPF_MAP_CREATE); 1113 if (err) 1114 return -EINVAL; 1115 1116 if (attr->btf_vmlinux_value_type_id) { 1117 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 1118 attr->btf_key_type_id || attr->btf_value_type_id) 1119 return -EINVAL; 1120 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 1121 return -EINVAL; 1122 } 1123 1124 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 1125 attr->map_extra != 0) 1126 return -EINVAL; 1127 1128 f_flags = bpf_get_file_flag(attr->map_flags); 1129 if (f_flags < 0) 1130 return f_flags; 1131 1132 if (numa_node != NUMA_NO_NODE && 1133 ((unsigned int)numa_node >= nr_node_ids || 1134 !node_online(numa_node))) 1135 return -EINVAL; 1136 1137 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1138 map = find_and_alloc_map(attr); 1139 if (IS_ERR(map)) 1140 return PTR_ERR(map); 1141 1142 err = bpf_obj_name_cpy(map->name, attr->map_name, 1143 sizeof(attr->map_name)); 1144 if (err < 0) 1145 goto free_map; 1146 1147 atomic64_set(&map->refcnt, 1); 1148 atomic64_set(&map->usercnt, 1); 1149 mutex_init(&map->freeze_mutex); 1150 spin_lock_init(&map->owner.lock); 1151 1152 if (attr->btf_key_type_id || attr->btf_value_type_id || 1153 /* Even the map's value is a kernel's struct, 1154 * the bpf_prog.o must have BTF to begin with 1155 * to figure out the corresponding kernel's 1156 * counter part. Thus, attr->btf_fd has 1157 * to be valid also. 1158 */ 1159 attr->btf_vmlinux_value_type_id) { 1160 struct btf *btf; 1161 1162 btf = btf_get_by_fd(attr->btf_fd); 1163 if (IS_ERR(btf)) { 1164 err = PTR_ERR(btf); 1165 goto free_map; 1166 } 1167 if (btf_is_kernel(btf)) { 1168 btf_put(btf); 1169 err = -EACCES; 1170 goto free_map; 1171 } 1172 map->btf = btf; 1173 1174 if (attr->btf_value_type_id) { 1175 err = map_check_btf(map, btf, attr->btf_key_type_id, 1176 attr->btf_value_type_id); 1177 if (err) 1178 goto free_map; 1179 } 1180 1181 map->btf_key_type_id = attr->btf_key_type_id; 1182 map->btf_value_type_id = attr->btf_value_type_id; 1183 map->btf_vmlinux_value_type_id = 1184 attr->btf_vmlinux_value_type_id; 1185 } 1186 1187 1188 foffs = btf_parse_field_offs(map->record); 1189 if (IS_ERR(foffs)) { 1190 err = PTR_ERR(foffs); 1191 goto free_map; 1192 } 1193 map->field_offs = foffs; 1194 1195 err = security_bpf_map_alloc(map); 1196 if (err) 1197 goto free_map_field_offs; 1198 1199 err = bpf_map_alloc_id(map); 1200 if (err) 1201 goto free_map_sec; 1202 1203 bpf_map_save_memcg(map); 1204 1205 err = bpf_map_new_fd(map, f_flags); 1206 if (err < 0) { 1207 /* failed to allocate fd. 1208 * bpf_map_put_with_uref() is needed because the above 1209 * bpf_map_alloc_id() has published the map 1210 * to the userspace and the userspace may 1211 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 1212 */ 1213 bpf_map_put_with_uref(map); 1214 return err; 1215 } 1216 1217 return err; 1218 1219 free_map_sec: 1220 security_bpf_map_free(map); 1221 free_map_field_offs: 1222 kfree(map->field_offs); 1223 free_map: 1224 btf_put(map->btf); 1225 map->ops->map_free(map); 1226 return err; 1227 } 1228 1229 /* if error is returned, fd is released. 1230 * On success caller should complete fd access with matching fdput() 1231 */ 1232 struct bpf_map *__bpf_map_get(struct fd f) 1233 { 1234 if (!f.file) 1235 return ERR_PTR(-EBADF); 1236 if (f.file->f_op != &bpf_map_fops) { 1237 fdput(f); 1238 return ERR_PTR(-EINVAL); 1239 } 1240 1241 return f.file->private_data; 1242 } 1243 1244 void bpf_map_inc(struct bpf_map *map) 1245 { 1246 atomic64_inc(&map->refcnt); 1247 } 1248 EXPORT_SYMBOL_GPL(bpf_map_inc); 1249 1250 void bpf_map_inc_with_uref(struct bpf_map *map) 1251 { 1252 atomic64_inc(&map->refcnt); 1253 atomic64_inc(&map->usercnt); 1254 } 1255 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 1256 1257 struct bpf_map *bpf_map_get(u32 ufd) 1258 { 1259 struct fd f = fdget(ufd); 1260 struct bpf_map *map; 1261 1262 map = __bpf_map_get(f); 1263 if (IS_ERR(map)) 1264 return map; 1265 1266 bpf_map_inc(map); 1267 fdput(f); 1268 1269 return map; 1270 } 1271 EXPORT_SYMBOL(bpf_map_get); 1272 1273 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 1274 { 1275 struct fd f = fdget(ufd); 1276 struct bpf_map *map; 1277 1278 map = __bpf_map_get(f); 1279 if (IS_ERR(map)) 1280 return map; 1281 1282 bpf_map_inc_with_uref(map); 1283 fdput(f); 1284 1285 return map; 1286 } 1287 1288 /* map_idr_lock should have been held */ 1289 static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1290 { 1291 int refold; 1292 1293 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1294 if (!refold) 1295 return ERR_PTR(-ENOENT); 1296 if (uref) 1297 atomic64_inc(&map->usercnt); 1298 1299 return map; 1300 } 1301 1302 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1303 { 1304 spin_lock_bh(&map_idr_lock); 1305 map = __bpf_map_inc_not_zero(map, false); 1306 spin_unlock_bh(&map_idr_lock); 1307 1308 return map; 1309 } 1310 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1311 1312 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 1313 { 1314 return -ENOTSUPP; 1315 } 1316 1317 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1318 { 1319 if (key_size) 1320 return vmemdup_user(ukey, key_size); 1321 1322 if (ukey) 1323 return ERR_PTR(-EINVAL); 1324 1325 return NULL; 1326 } 1327 1328 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1329 { 1330 if (key_size) 1331 return kvmemdup_bpfptr(ukey, key_size); 1332 1333 if (!bpfptr_is_null(ukey)) 1334 return ERR_PTR(-EINVAL); 1335 1336 return NULL; 1337 } 1338 1339 /* last field in 'union bpf_attr' used by this command */ 1340 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1341 1342 static int map_lookup_elem(union bpf_attr *attr) 1343 { 1344 void __user *ukey = u64_to_user_ptr(attr->key); 1345 void __user *uvalue = u64_to_user_ptr(attr->value); 1346 int ufd = attr->map_fd; 1347 struct bpf_map *map; 1348 void *key, *value; 1349 u32 value_size; 1350 struct fd f; 1351 int err; 1352 1353 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1354 return -EINVAL; 1355 1356 if (attr->flags & ~BPF_F_LOCK) 1357 return -EINVAL; 1358 1359 f = fdget(ufd); 1360 map = __bpf_map_get(f); 1361 if (IS_ERR(map)) 1362 return PTR_ERR(map); 1363 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1364 err = -EPERM; 1365 goto err_put; 1366 } 1367 1368 if ((attr->flags & BPF_F_LOCK) && 1369 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1370 err = -EINVAL; 1371 goto err_put; 1372 } 1373 1374 key = __bpf_copy_key(ukey, map->key_size); 1375 if (IS_ERR(key)) { 1376 err = PTR_ERR(key); 1377 goto err_put; 1378 } 1379 1380 value_size = bpf_map_value_size(map); 1381 1382 err = -ENOMEM; 1383 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1384 if (!value) 1385 goto free_key; 1386 1387 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1388 if (copy_from_user(value, uvalue, value_size)) 1389 err = -EFAULT; 1390 else 1391 err = bpf_map_copy_value(map, key, value, attr->flags); 1392 goto free_value; 1393 } 1394 1395 err = bpf_map_copy_value(map, key, value, attr->flags); 1396 if (err) 1397 goto free_value; 1398 1399 err = -EFAULT; 1400 if (copy_to_user(uvalue, value, value_size) != 0) 1401 goto free_value; 1402 1403 err = 0; 1404 1405 free_value: 1406 kvfree(value); 1407 free_key: 1408 kvfree(key); 1409 err_put: 1410 fdput(f); 1411 return err; 1412 } 1413 1414 1415 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1416 1417 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1418 { 1419 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1420 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1421 int ufd = attr->map_fd; 1422 struct bpf_map *map; 1423 void *key, *value; 1424 u32 value_size; 1425 struct fd f; 1426 int err; 1427 1428 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1429 return -EINVAL; 1430 1431 f = fdget(ufd); 1432 map = __bpf_map_get(f); 1433 if (IS_ERR(map)) 1434 return PTR_ERR(map); 1435 bpf_map_write_active_inc(map); 1436 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1437 err = -EPERM; 1438 goto err_put; 1439 } 1440 1441 if ((attr->flags & BPF_F_LOCK) && 1442 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1443 err = -EINVAL; 1444 goto err_put; 1445 } 1446 1447 key = ___bpf_copy_key(ukey, map->key_size); 1448 if (IS_ERR(key)) { 1449 err = PTR_ERR(key); 1450 goto err_put; 1451 } 1452 1453 value_size = bpf_map_value_size(map); 1454 value = kvmemdup_bpfptr(uvalue, value_size); 1455 if (IS_ERR(value)) { 1456 err = PTR_ERR(value); 1457 goto free_key; 1458 } 1459 1460 err = bpf_map_update_value(map, f.file, key, value, attr->flags); 1461 1462 kvfree(value); 1463 free_key: 1464 kvfree(key); 1465 err_put: 1466 bpf_map_write_active_dec(map); 1467 fdput(f); 1468 return err; 1469 } 1470 1471 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1472 1473 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) 1474 { 1475 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1476 int ufd = attr->map_fd; 1477 struct bpf_map *map; 1478 struct fd f; 1479 void *key; 1480 int err; 1481 1482 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1483 return -EINVAL; 1484 1485 f = fdget(ufd); 1486 map = __bpf_map_get(f); 1487 if (IS_ERR(map)) 1488 return PTR_ERR(map); 1489 bpf_map_write_active_inc(map); 1490 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1491 err = -EPERM; 1492 goto err_put; 1493 } 1494 1495 key = ___bpf_copy_key(ukey, map->key_size); 1496 if (IS_ERR(key)) { 1497 err = PTR_ERR(key); 1498 goto err_put; 1499 } 1500 1501 if (bpf_map_is_offloaded(map)) { 1502 err = bpf_map_offload_delete_elem(map, key); 1503 goto out; 1504 } else if (IS_FD_PROG_ARRAY(map) || 1505 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1506 /* These maps require sleepable context */ 1507 err = map->ops->map_delete_elem(map, key); 1508 goto out; 1509 } 1510 1511 bpf_disable_instrumentation(); 1512 rcu_read_lock(); 1513 err = map->ops->map_delete_elem(map, key); 1514 rcu_read_unlock(); 1515 bpf_enable_instrumentation(); 1516 maybe_wait_bpf_programs(map); 1517 out: 1518 kvfree(key); 1519 err_put: 1520 bpf_map_write_active_dec(map); 1521 fdput(f); 1522 return err; 1523 } 1524 1525 /* last field in 'union bpf_attr' used by this command */ 1526 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1527 1528 static int map_get_next_key(union bpf_attr *attr) 1529 { 1530 void __user *ukey = u64_to_user_ptr(attr->key); 1531 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1532 int ufd = attr->map_fd; 1533 struct bpf_map *map; 1534 void *key, *next_key; 1535 struct fd f; 1536 int err; 1537 1538 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1539 return -EINVAL; 1540 1541 f = fdget(ufd); 1542 map = __bpf_map_get(f); 1543 if (IS_ERR(map)) 1544 return PTR_ERR(map); 1545 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1546 err = -EPERM; 1547 goto err_put; 1548 } 1549 1550 if (ukey) { 1551 key = __bpf_copy_key(ukey, map->key_size); 1552 if (IS_ERR(key)) { 1553 err = PTR_ERR(key); 1554 goto err_put; 1555 } 1556 } else { 1557 key = NULL; 1558 } 1559 1560 err = -ENOMEM; 1561 next_key = kvmalloc(map->key_size, GFP_USER); 1562 if (!next_key) 1563 goto free_key; 1564 1565 if (bpf_map_is_offloaded(map)) { 1566 err = bpf_map_offload_get_next_key(map, key, next_key); 1567 goto out; 1568 } 1569 1570 rcu_read_lock(); 1571 err = map->ops->map_get_next_key(map, key, next_key); 1572 rcu_read_unlock(); 1573 out: 1574 if (err) 1575 goto free_next_key; 1576 1577 err = -EFAULT; 1578 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1579 goto free_next_key; 1580 1581 err = 0; 1582 1583 free_next_key: 1584 kvfree(next_key); 1585 free_key: 1586 kvfree(key); 1587 err_put: 1588 fdput(f); 1589 return err; 1590 } 1591 1592 int generic_map_delete_batch(struct bpf_map *map, 1593 const union bpf_attr *attr, 1594 union bpf_attr __user *uattr) 1595 { 1596 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1597 u32 cp, max_count; 1598 int err = 0; 1599 void *key; 1600 1601 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1602 return -EINVAL; 1603 1604 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1605 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1606 return -EINVAL; 1607 } 1608 1609 max_count = attr->batch.count; 1610 if (!max_count) 1611 return 0; 1612 1613 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1614 if (!key) 1615 return -ENOMEM; 1616 1617 for (cp = 0; cp < max_count; cp++) { 1618 err = -EFAULT; 1619 if (copy_from_user(key, keys + cp * map->key_size, 1620 map->key_size)) 1621 break; 1622 1623 if (bpf_map_is_offloaded(map)) { 1624 err = bpf_map_offload_delete_elem(map, key); 1625 break; 1626 } 1627 1628 bpf_disable_instrumentation(); 1629 rcu_read_lock(); 1630 err = map->ops->map_delete_elem(map, key); 1631 rcu_read_unlock(); 1632 bpf_enable_instrumentation(); 1633 if (err) 1634 break; 1635 cond_resched(); 1636 } 1637 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1638 err = -EFAULT; 1639 1640 kvfree(key); 1641 1642 maybe_wait_bpf_programs(map); 1643 return err; 1644 } 1645 1646 int generic_map_update_batch(struct bpf_map *map, struct file *map_file, 1647 const union bpf_attr *attr, 1648 union bpf_attr __user *uattr) 1649 { 1650 void __user *values = u64_to_user_ptr(attr->batch.values); 1651 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1652 u32 value_size, cp, max_count; 1653 void *key, *value; 1654 int err = 0; 1655 1656 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1657 return -EINVAL; 1658 1659 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1660 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1661 return -EINVAL; 1662 } 1663 1664 value_size = bpf_map_value_size(map); 1665 1666 max_count = attr->batch.count; 1667 if (!max_count) 1668 return 0; 1669 1670 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1671 if (!key) 1672 return -ENOMEM; 1673 1674 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1675 if (!value) { 1676 kvfree(key); 1677 return -ENOMEM; 1678 } 1679 1680 for (cp = 0; cp < max_count; cp++) { 1681 err = -EFAULT; 1682 if (copy_from_user(key, keys + cp * map->key_size, 1683 map->key_size) || 1684 copy_from_user(value, values + cp * value_size, value_size)) 1685 break; 1686 1687 err = bpf_map_update_value(map, map_file, key, value, 1688 attr->batch.elem_flags); 1689 1690 if (err) 1691 break; 1692 cond_resched(); 1693 } 1694 1695 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1696 err = -EFAULT; 1697 1698 kvfree(value); 1699 kvfree(key); 1700 return err; 1701 } 1702 1703 #define MAP_LOOKUP_RETRIES 3 1704 1705 int generic_map_lookup_batch(struct bpf_map *map, 1706 const union bpf_attr *attr, 1707 union bpf_attr __user *uattr) 1708 { 1709 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 1710 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1711 void __user *values = u64_to_user_ptr(attr->batch.values); 1712 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1713 void *buf, *buf_prevkey, *prev_key, *key, *value; 1714 int err, retry = MAP_LOOKUP_RETRIES; 1715 u32 value_size, cp, max_count; 1716 1717 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1718 return -EINVAL; 1719 1720 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1721 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) 1722 return -EINVAL; 1723 1724 value_size = bpf_map_value_size(map); 1725 1726 max_count = attr->batch.count; 1727 if (!max_count) 1728 return 0; 1729 1730 if (put_user(0, &uattr->batch.count)) 1731 return -EFAULT; 1732 1733 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1734 if (!buf_prevkey) 1735 return -ENOMEM; 1736 1737 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 1738 if (!buf) { 1739 kvfree(buf_prevkey); 1740 return -ENOMEM; 1741 } 1742 1743 err = -EFAULT; 1744 prev_key = NULL; 1745 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 1746 goto free_buf; 1747 key = buf; 1748 value = key + map->key_size; 1749 if (ubatch) 1750 prev_key = buf_prevkey; 1751 1752 for (cp = 0; cp < max_count;) { 1753 rcu_read_lock(); 1754 err = map->ops->map_get_next_key(map, prev_key, key); 1755 rcu_read_unlock(); 1756 if (err) 1757 break; 1758 err = bpf_map_copy_value(map, key, value, 1759 attr->batch.elem_flags); 1760 1761 if (err == -ENOENT) { 1762 if (retry) { 1763 retry--; 1764 continue; 1765 } 1766 err = -EINTR; 1767 break; 1768 } 1769 1770 if (err) 1771 goto free_buf; 1772 1773 if (copy_to_user(keys + cp * map->key_size, key, 1774 map->key_size)) { 1775 err = -EFAULT; 1776 goto free_buf; 1777 } 1778 if (copy_to_user(values + cp * value_size, value, value_size)) { 1779 err = -EFAULT; 1780 goto free_buf; 1781 } 1782 1783 if (!prev_key) 1784 prev_key = buf_prevkey; 1785 1786 swap(prev_key, key); 1787 retry = MAP_LOOKUP_RETRIES; 1788 cp++; 1789 cond_resched(); 1790 } 1791 1792 if (err == -EFAULT) 1793 goto free_buf; 1794 1795 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 1796 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 1797 err = -EFAULT; 1798 1799 free_buf: 1800 kvfree(buf_prevkey); 1801 kvfree(buf); 1802 return err; 1803 } 1804 1805 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 1806 1807 static int map_lookup_and_delete_elem(union bpf_attr *attr) 1808 { 1809 void __user *ukey = u64_to_user_ptr(attr->key); 1810 void __user *uvalue = u64_to_user_ptr(attr->value); 1811 int ufd = attr->map_fd; 1812 struct bpf_map *map; 1813 void *key, *value; 1814 u32 value_size; 1815 struct fd f; 1816 int err; 1817 1818 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 1819 return -EINVAL; 1820 1821 if (attr->flags & ~BPF_F_LOCK) 1822 return -EINVAL; 1823 1824 f = fdget(ufd); 1825 map = __bpf_map_get(f); 1826 if (IS_ERR(map)) 1827 return PTR_ERR(map); 1828 bpf_map_write_active_inc(map); 1829 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 1830 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1831 err = -EPERM; 1832 goto err_put; 1833 } 1834 1835 if (attr->flags && 1836 (map->map_type == BPF_MAP_TYPE_QUEUE || 1837 map->map_type == BPF_MAP_TYPE_STACK)) { 1838 err = -EINVAL; 1839 goto err_put; 1840 } 1841 1842 if ((attr->flags & BPF_F_LOCK) && 1843 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 1844 err = -EINVAL; 1845 goto err_put; 1846 } 1847 1848 key = __bpf_copy_key(ukey, map->key_size); 1849 if (IS_ERR(key)) { 1850 err = PTR_ERR(key); 1851 goto err_put; 1852 } 1853 1854 value_size = bpf_map_value_size(map); 1855 1856 err = -ENOMEM; 1857 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1858 if (!value) 1859 goto free_key; 1860 1861 err = -ENOTSUPP; 1862 if (map->map_type == BPF_MAP_TYPE_QUEUE || 1863 map->map_type == BPF_MAP_TYPE_STACK) { 1864 err = map->ops->map_pop_elem(map, value); 1865 } else if (map->map_type == BPF_MAP_TYPE_HASH || 1866 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1867 map->map_type == BPF_MAP_TYPE_LRU_HASH || 1868 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 1869 if (!bpf_map_is_offloaded(map)) { 1870 bpf_disable_instrumentation(); 1871 rcu_read_lock(); 1872 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 1873 rcu_read_unlock(); 1874 bpf_enable_instrumentation(); 1875 } 1876 } 1877 1878 if (err) 1879 goto free_value; 1880 1881 if (copy_to_user(uvalue, value, value_size) != 0) { 1882 err = -EFAULT; 1883 goto free_value; 1884 } 1885 1886 err = 0; 1887 1888 free_value: 1889 kvfree(value); 1890 free_key: 1891 kvfree(key); 1892 err_put: 1893 bpf_map_write_active_dec(map); 1894 fdput(f); 1895 return err; 1896 } 1897 1898 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 1899 1900 static int map_freeze(const union bpf_attr *attr) 1901 { 1902 int err = 0, ufd = attr->map_fd; 1903 struct bpf_map *map; 1904 struct fd f; 1905 1906 if (CHECK_ATTR(BPF_MAP_FREEZE)) 1907 return -EINVAL; 1908 1909 f = fdget(ufd); 1910 map = __bpf_map_get(f); 1911 if (IS_ERR(map)) 1912 return PTR_ERR(map); 1913 1914 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) { 1915 fdput(f); 1916 return -ENOTSUPP; 1917 } 1918 1919 mutex_lock(&map->freeze_mutex); 1920 if (bpf_map_write_active(map)) { 1921 err = -EBUSY; 1922 goto err_put; 1923 } 1924 if (READ_ONCE(map->frozen)) { 1925 err = -EBUSY; 1926 goto err_put; 1927 } 1928 if (!bpf_capable()) { 1929 err = -EPERM; 1930 goto err_put; 1931 } 1932 1933 WRITE_ONCE(map->frozen, true); 1934 err_put: 1935 mutex_unlock(&map->freeze_mutex); 1936 fdput(f); 1937 return err; 1938 } 1939 1940 static const struct bpf_prog_ops * const bpf_prog_types[] = { 1941 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 1942 [_id] = & _name ## _prog_ops, 1943 #define BPF_MAP_TYPE(_id, _ops) 1944 #define BPF_LINK_TYPE(_id, _name) 1945 #include <linux/bpf_types.h> 1946 #undef BPF_PROG_TYPE 1947 #undef BPF_MAP_TYPE 1948 #undef BPF_LINK_TYPE 1949 }; 1950 1951 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 1952 { 1953 const struct bpf_prog_ops *ops; 1954 1955 if (type >= ARRAY_SIZE(bpf_prog_types)) 1956 return -EINVAL; 1957 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 1958 ops = bpf_prog_types[type]; 1959 if (!ops) 1960 return -EINVAL; 1961 1962 if (!bpf_prog_is_offloaded(prog->aux)) 1963 prog->aux->ops = ops; 1964 else 1965 prog->aux->ops = &bpf_offload_prog_ops; 1966 prog->type = type; 1967 return 0; 1968 } 1969 1970 enum bpf_audit { 1971 BPF_AUDIT_LOAD, 1972 BPF_AUDIT_UNLOAD, 1973 BPF_AUDIT_MAX, 1974 }; 1975 1976 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 1977 [BPF_AUDIT_LOAD] = "LOAD", 1978 [BPF_AUDIT_UNLOAD] = "UNLOAD", 1979 }; 1980 1981 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 1982 { 1983 struct audit_context *ctx = NULL; 1984 struct audit_buffer *ab; 1985 1986 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 1987 return; 1988 if (audit_enabled == AUDIT_OFF) 1989 return; 1990 if (!in_irq() && !irqs_disabled()) 1991 ctx = audit_context(); 1992 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 1993 if (unlikely(!ab)) 1994 return; 1995 audit_log_format(ab, "prog-id=%u op=%s", 1996 prog->aux->id, bpf_audit_str[op]); 1997 audit_log_end(ab); 1998 } 1999 2000 static int bpf_prog_alloc_id(struct bpf_prog *prog) 2001 { 2002 int id; 2003 2004 idr_preload(GFP_KERNEL); 2005 spin_lock_bh(&prog_idr_lock); 2006 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 2007 if (id > 0) 2008 prog->aux->id = id; 2009 spin_unlock_bh(&prog_idr_lock); 2010 idr_preload_end(); 2011 2012 /* id is in [1, INT_MAX) */ 2013 if (WARN_ON_ONCE(!id)) 2014 return -ENOSPC; 2015 2016 return id > 0 ? 0 : id; 2017 } 2018 2019 void bpf_prog_free_id(struct bpf_prog *prog) 2020 { 2021 unsigned long flags; 2022 2023 /* cBPF to eBPF migrations are currently not in the idr store. 2024 * Offloaded programs are removed from the store when their device 2025 * disappears - even if someone grabs an fd to them they are unusable, 2026 * simply waiting for refcnt to drop to be freed. 2027 */ 2028 if (!prog->aux->id) 2029 return; 2030 2031 spin_lock_irqsave(&prog_idr_lock, flags); 2032 idr_remove(&prog_idr, prog->aux->id); 2033 prog->aux->id = 0; 2034 spin_unlock_irqrestore(&prog_idr_lock, flags); 2035 } 2036 2037 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 2038 { 2039 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 2040 2041 kvfree(aux->func_info); 2042 kfree(aux->func_info_aux); 2043 free_uid(aux->user); 2044 security_bpf_prog_free(aux); 2045 bpf_prog_free(aux->prog); 2046 } 2047 2048 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 2049 { 2050 bpf_prog_kallsyms_del_all(prog); 2051 btf_put(prog->aux->btf); 2052 kvfree(prog->aux->jited_linfo); 2053 kvfree(prog->aux->linfo); 2054 kfree(prog->aux->kfunc_tab); 2055 if (prog->aux->attach_btf) 2056 btf_put(prog->aux->attach_btf); 2057 2058 if (deferred) { 2059 if (prog->aux->sleepable) 2060 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 2061 else 2062 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 2063 } else { 2064 __bpf_prog_put_rcu(&prog->aux->rcu); 2065 } 2066 } 2067 2068 static void bpf_prog_put_deferred(struct work_struct *work) 2069 { 2070 struct bpf_prog_aux *aux; 2071 struct bpf_prog *prog; 2072 2073 aux = container_of(work, struct bpf_prog_aux, work); 2074 prog = aux->prog; 2075 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 2076 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 2077 bpf_prog_free_id(prog); 2078 __bpf_prog_put_noref(prog, true); 2079 } 2080 2081 static void __bpf_prog_put(struct bpf_prog *prog) 2082 { 2083 struct bpf_prog_aux *aux = prog->aux; 2084 2085 if (atomic64_dec_and_test(&aux->refcnt)) { 2086 if (in_irq() || irqs_disabled()) { 2087 INIT_WORK(&aux->work, bpf_prog_put_deferred); 2088 schedule_work(&aux->work); 2089 } else { 2090 bpf_prog_put_deferred(&aux->work); 2091 } 2092 } 2093 } 2094 2095 void bpf_prog_put(struct bpf_prog *prog) 2096 { 2097 __bpf_prog_put(prog); 2098 } 2099 EXPORT_SYMBOL_GPL(bpf_prog_put); 2100 2101 static int bpf_prog_release(struct inode *inode, struct file *filp) 2102 { 2103 struct bpf_prog *prog = filp->private_data; 2104 2105 bpf_prog_put(prog); 2106 return 0; 2107 } 2108 2109 struct bpf_prog_kstats { 2110 u64 nsecs; 2111 u64 cnt; 2112 u64 misses; 2113 }; 2114 2115 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) 2116 { 2117 struct bpf_prog_stats *stats; 2118 unsigned int flags; 2119 2120 stats = this_cpu_ptr(prog->stats); 2121 flags = u64_stats_update_begin_irqsave(&stats->syncp); 2122 u64_stats_inc(&stats->misses); 2123 u64_stats_update_end_irqrestore(&stats->syncp, flags); 2124 } 2125 2126 static void bpf_prog_get_stats(const struct bpf_prog *prog, 2127 struct bpf_prog_kstats *stats) 2128 { 2129 u64 nsecs = 0, cnt = 0, misses = 0; 2130 int cpu; 2131 2132 for_each_possible_cpu(cpu) { 2133 const struct bpf_prog_stats *st; 2134 unsigned int start; 2135 u64 tnsecs, tcnt, tmisses; 2136 2137 st = per_cpu_ptr(prog->stats, cpu); 2138 do { 2139 start = u64_stats_fetch_begin(&st->syncp); 2140 tnsecs = u64_stats_read(&st->nsecs); 2141 tcnt = u64_stats_read(&st->cnt); 2142 tmisses = u64_stats_read(&st->misses); 2143 } while (u64_stats_fetch_retry(&st->syncp, start)); 2144 nsecs += tnsecs; 2145 cnt += tcnt; 2146 misses += tmisses; 2147 } 2148 stats->nsecs = nsecs; 2149 stats->cnt = cnt; 2150 stats->misses = misses; 2151 } 2152 2153 #ifdef CONFIG_PROC_FS 2154 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 2155 { 2156 const struct bpf_prog *prog = filp->private_data; 2157 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2158 struct bpf_prog_kstats stats; 2159 2160 bpf_prog_get_stats(prog, &stats); 2161 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2162 seq_printf(m, 2163 "prog_type:\t%u\n" 2164 "prog_jited:\t%u\n" 2165 "prog_tag:\t%s\n" 2166 "memlock:\t%llu\n" 2167 "prog_id:\t%u\n" 2168 "run_time_ns:\t%llu\n" 2169 "run_cnt:\t%llu\n" 2170 "recursion_misses:\t%llu\n" 2171 "verified_insns:\t%u\n", 2172 prog->type, 2173 prog->jited, 2174 prog_tag, 2175 prog->pages * 1ULL << PAGE_SHIFT, 2176 prog->aux->id, 2177 stats.nsecs, 2178 stats.cnt, 2179 stats.misses, 2180 prog->aux->verified_insns); 2181 } 2182 #endif 2183 2184 const struct file_operations bpf_prog_fops = { 2185 #ifdef CONFIG_PROC_FS 2186 .show_fdinfo = bpf_prog_show_fdinfo, 2187 #endif 2188 .release = bpf_prog_release, 2189 .read = bpf_dummy_read, 2190 .write = bpf_dummy_write, 2191 }; 2192 2193 int bpf_prog_new_fd(struct bpf_prog *prog) 2194 { 2195 int ret; 2196 2197 ret = security_bpf_prog(prog); 2198 if (ret < 0) 2199 return ret; 2200 2201 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 2202 O_RDWR | O_CLOEXEC); 2203 } 2204 2205 static struct bpf_prog *____bpf_prog_get(struct fd f) 2206 { 2207 if (!f.file) 2208 return ERR_PTR(-EBADF); 2209 if (f.file->f_op != &bpf_prog_fops) { 2210 fdput(f); 2211 return ERR_PTR(-EINVAL); 2212 } 2213 2214 return f.file->private_data; 2215 } 2216 2217 void bpf_prog_add(struct bpf_prog *prog, int i) 2218 { 2219 atomic64_add(i, &prog->aux->refcnt); 2220 } 2221 EXPORT_SYMBOL_GPL(bpf_prog_add); 2222 2223 void bpf_prog_sub(struct bpf_prog *prog, int i) 2224 { 2225 /* Only to be used for undoing previous bpf_prog_add() in some 2226 * error path. We still know that another entity in our call 2227 * path holds a reference to the program, thus atomic_sub() can 2228 * be safely used in such cases! 2229 */ 2230 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 2231 } 2232 EXPORT_SYMBOL_GPL(bpf_prog_sub); 2233 2234 void bpf_prog_inc(struct bpf_prog *prog) 2235 { 2236 atomic64_inc(&prog->aux->refcnt); 2237 } 2238 EXPORT_SYMBOL_GPL(bpf_prog_inc); 2239 2240 /* prog_idr_lock should have been held */ 2241 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 2242 { 2243 int refold; 2244 2245 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 2246 2247 if (!refold) 2248 return ERR_PTR(-ENOENT); 2249 2250 return prog; 2251 } 2252 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 2253 2254 bool bpf_prog_get_ok(struct bpf_prog *prog, 2255 enum bpf_prog_type *attach_type, bool attach_drv) 2256 { 2257 /* not an attachment, just a refcount inc, always allow */ 2258 if (!attach_type) 2259 return true; 2260 2261 if (prog->type != *attach_type) 2262 return false; 2263 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv) 2264 return false; 2265 2266 return true; 2267 } 2268 2269 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 2270 bool attach_drv) 2271 { 2272 struct fd f = fdget(ufd); 2273 struct bpf_prog *prog; 2274 2275 prog = ____bpf_prog_get(f); 2276 if (IS_ERR(prog)) 2277 return prog; 2278 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 2279 prog = ERR_PTR(-EINVAL); 2280 goto out; 2281 } 2282 2283 bpf_prog_inc(prog); 2284 out: 2285 fdput(f); 2286 return prog; 2287 } 2288 2289 struct bpf_prog *bpf_prog_get(u32 ufd) 2290 { 2291 return __bpf_prog_get(ufd, NULL, false); 2292 } 2293 2294 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2295 bool attach_drv) 2296 { 2297 return __bpf_prog_get(ufd, &type, attach_drv); 2298 } 2299 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2300 2301 /* Initially all BPF programs could be loaded w/o specifying 2302 * expected_attach_type. Later for some of them specifying expected_attach_type 2303 * at load time became required so that program could be validated properly. 2304 * Programs of types that are allowed to be loaded both w/ and w/o (for 2305 * backward compatibility) expected_attach_type, should have the default attach 2306 * type assigned to expected_attach_type for the latter case, so that it can be 2307 * validated later at attach time. 2308 * 2309 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2310 * prog type requires it but has some attach types that have to be backward 2311 * compatible. 2312 */ 2313 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2314 { 2315 switch (attr->prog_type) { 2316 case BPF_PROG_TYPE_CGROUP_SOCK: 2317 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2318 * exist so checking for non-zero is the way to go here. 2319 */ 2320 if (!attr->expected_attach_type) 2321 attr->expected_attach_type = 2322 BPF_CGROUP_INET_SOCK_CREATE; 2323 break; 2324 case BPF_PROG_TYPE_SK_REUSEPORT: 2325 if (!attr->expected_attach_type) 2326 attr->expected_attach_type = 2327 BPF_SK_REUSEPORT_SELECT; 2328 break; 2329 } 2330 } 2331 2332 static int 2333 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2334 enum bpf_attach_type expected_attach_type, 2335 struct btf *attach_btf, u32 btf_id, 2336 struct bpf_prog *dst_prog) 2337 { 2338 if (btf_id) { 2339 if (btf_id > BTF_MAX_TYPE) 2340 return -EINVAL; 2341 2342 if (!attach_btf && !dst_prog) 2343 return -EINVAL; 2344 2345 switch (prog_type) { 2346 case BPF_PROG_TYPE_TRACING: 2347 case BPF_PROG_TYPE_LSM: 2348 case BPF_PROG_TYPE_STRUCT_OPS: 2349 case BPF_PROG_TYPE_EXT: 2350 break; 2351 default: 2352 return -EINVAL; 2353 } 2354 } 2355 2356 if (attach_btf && (!btf_id || dst_prog)) 2357 return -EINVAL; 2358 2359 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2360 prog_type != BPF_PROG_TYPE_EXT) 2361 return -EINVAL; 2362 2363 switch (prog_type) { 2364 case BPF_PROG_TYPE_CGROUP_SOCK: 2365 switch (expected_attach_type) { 2366 case BPF_CGROUP_INET_SOCK_CREATE: 2367 case BPF_CGROUP_INET_SOCK_RELEASE: 2368 case BPF_CGROUP_INET4_POST_BIND: 2369 case BPF_CGROUP_INET6_POST_BIND: 2370 return 0; 2371 default: 2372 return -EINVAL; 2373 } 2374 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2375 switch (expected_attach_type) { 2376 case BPF_CGROUP_INET4_BIND: 2377 case BPF_CGROUP_INET6_BIND: 2378 case BPF_CGROUP_INET4_CONNECT: 2379 case BPF_CGROUP_INET6_CONNECT: 2380 case BPF_CGROUP_INET4_GETPEERNAME: 2381 case BPF_CGROUP_INET6_GETPEERNAME: 2382 case BPF_CGROUP_INET4_GETSOCKNAME: 2383 case BPF_CGROUP_INET6_GETSOCKNAME: 2384 case BPF_CGROUP_UDP4_SENDMSG: 2385 case BPF_CGROUP_UDP6_SENDMSG: 2386 case BPF_CGROUP_UDP4_RECVMSG: 2387 case BPF_CGROUP_UDP6_RECVMSG: 2388 return 0; 2389 default: 2390 return -EINVAL; 2391 } 2392 case BPF_PROG_TYPE_CGROUP_SKB: 2393 switch (expected_attach_type) { 2394 case BPF_CGROUP_INET_INGRESS: 2395 case BPF_CGROUP_INET_EGRESS: 2396 return 0; 2397 default: 2398 return -EINVAL; 2399 } 2400 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2401 switch (expected_attach_type) { 2402 case BPF_CGROUP_SETSOCKOPT: 2403 case BPF_CGROUP_GETSOCKOPT: 2404 return 0; 2405 default: 2406 return -EINVAL; 2407 } 2408 case BPF_PROG_TYPE_SK_LOOKUP: 2409 if (expected_attach_type == BPF_SK_LOOKUP) 2410 return 0; 2411 return -EINVAL; 2412 case BPF_PROG_TYPE_SK_REUSEPORT: 2413 switch (expected_attach_type) { 2414 case BPF_SK_REUSEPORT_SELECT: 2415 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2416 return 0; 2417 default: 2418 return -EINVAL; 2419 } 2420 case BPF_PROG_TYPE_SYSCALL: 2421 case BPF_PROG_TYPE_EXT: 2422 if (expected_attach_type) 2423 return -EINVAL; 2424 fallthrough; 2425 default: 2426 return 0; 2427 } 2428 } 2429 2430 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2431 { 2432 switch (prog_type) { 2433 case BPF_PROG_TYPE_SCHED_CLS: 2434 case BPF_PROG_TYPE_SCHED_ACT: 2435 case BPF_PROG_TYPE_XDP: 2436 case BPF_PROG_TYPE_LWT_IN: 2437 case BPF_PROG_TYPE_LWT_OUT: 2438 case BPF_PROG_TYPE_LWT_XMIT: 2439 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2440 case BPF_PROG_TYPE_SK_SKB: 2441 case BPF_PROG_TYPE_SK_MSG: 2442 case BPF_PROG_TYPE_LIRC_MODE2: 2443 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2444 case BPF_PROG_TYPE_CGROUP_DEVICE: 2445 case BPF_PROG_TYPE_CGROUP_SOCK: 2446 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2447 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2448 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2449 case BPF_PROG_TYPE_SOCK_OPS: 2450 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2451 return true; 2452 case BPF_PROG_TYPE_CGROUP_SKB: 2453 /* always unpriv */ 2454 case BPF_PROG_TYPE_SK_REUSEPORT: 2455 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2456 default: 2457 return false; 2458 } 2459 } 2460 2461 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2462 { 2463 switch (prog_type) { 2464 case BPF_PROG_TYPE_KPROBE: 2465 case BPF_PROG_TYPE_TRACEPOINT: 2466 case BPF_PROG_TYPE_PERF_EVENT: 2467 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2468 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2469 case BPF_PROG_TYPE_TRACING: 2470 case BPF_PROG_TYPE_LSM: 2471 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2472 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2473 return true; 2474 default: 2475 return false; 2476 } 2477 } 2478 2479 /* last field in 'union bpf_attr' used by this command */ 2480 #define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size 2481 2482 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) 2483 { 2484 enum bpf_prog_type type = attr->prog_type; 2485 struct bpf_prog *prog, *dst_prog = NULL; 2486 struct btf *attach_btf = NULL; 2487 int err; 2488 char license[128]; 2489 bool is_gpl; 2490 2491 if (CHECK_ATTR(BPF_PROG_LOAD)) 2492 return -EINVAL; 2493 2494 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2495 BPF_F_ANY_ALIGNMENT | 2496 BPF_F_TEST_STATE_FREQ | 2497 BPF_F_SLEEPABLE | 2498 BPF_F_TEST_RND_HI32 | 2499 BPF_F_XDP_HAS_FRAGS | 2500 BPF_F_XDP_DEV_BOUND_ONLY)) 2501 return -EINVAL; 2502 2503 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2504 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2505 !bpf_capable()) 2506 return -EPERM; 2507 2508 /* copy eBPF program license from user space */ 2509 if (strncpy_from_bpfptr(license, 2510 make_bpfptr(attr->license, uattr.is_kernel), 2511 sizeof(license) - 1) < 0) 2512 return -EFAULT; 2513 license[sizeof(license) - 1] = 0; 2514 2515 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2516 is_gpl = license_is_gpl_compatible(license); 2517 2518 if (attr->insn_cnt == 0 || 2519 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) 2520 return -E2BIG; 2521 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2522 type != BPF_PROG_TYPE_CGROUP_SKB && 2523 !bpf_capable()) 2524 return -EPERM; 2525 2526 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) 2527 return -EPERM; 2528 if (is_perfmon_prog_type(type) && !perfmon_capable()) 2529 return -EPERM; 2530 2531 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2532 * or btf, we need to check which one it is 2533 */ 2534 if (attr->attach_prog_fd) { 2535 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2536 if (IS_ERR(dst_prog)) { 2537 dst_prog = NULL; 2538 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2539 if (IS_ERR(attach_btf)) 2540 return -EINVAL; 2541 if (!btf_is_kernel(attach_btf)) { 2542 /* attaching through specifying bpf_prog's BTF 2543 * objects directly might be supported eventually 2544 */ 2545 btf_put(attach_btf); 2546 return -ENOTSUPP; 2547 } 2548 } 2549 } else if (attr->attach_btf_id) { 2550 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2551 attach_btf = bpf_get_btf_vmlinux(); 2552 if (IS_ERR(attach_btf)) 2553 return PTR_ERR(attach_btf); 2554 if (!attach_btf) 2555 return -EINVAL; 2556 btf_get(attach_btf); 2557 } 2558 2559 bpf_prog_load_fixup_attach_type(attr); 2560 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2561 attach_btf, attr->attach_btf_id, 2562 dst_prog)) { 2563 if (dst_prog) 2564 bpf_prog_put(dst_prog); 2565 if (attach_btf) 2566 btf_put(attach_btf); 2567 return -EINVAL; 2568 } 2569 2570 /* plain bpf_prog allocation */ 2571 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2572 if (!prog) { 2573 if (dst_prog) 2574 bpf_prog_put(dst_prog); 2575 if (attach_btf) 2576 btf_put(attach_btf); 2577 return -ENOMEM; 2578 } 2579 2580 prog->expected_attach_type = attr->expected_attach_type; 2581 prog->aux->attach_btf = attach_btf; 2582 prog->aux->attach_btf_id = attr->attach_btf_id; 2583 prog->aux->dst_prog = dst_prog; 2584 prog->aux->dev_bound = !!attr->prog_ifindex; 2585 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2586 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2587 2588 err = security_bpf_prog_alloc(prog->aux); 2589 if (err) 2590 goto free_prog; 2591 2592 prog->aux->user = get_current_user(); 2593 prog->len = attr->insn_cnt; 2594 2595 err = -EFAULT; 2596 if (copy_from_bpfptr(prog->insns, 2597 make_bpfptr(attr->insns, uattr.is_kernel), 2598 bpf_prog_insn_size(prog)) != 0) 2599 goto free_prog_sec; 2600 2601 prog->orig_prog = NULL; 2602 prog->jited = 0; 2603 2604 atomic64_set(&prog->aux->refcnt, 1); 2605 prog->gpl_compatible = is_gpl ? 1 : 0; 2606 2607 if (bpf_prog_is_dev_bound(prog->aux)) { 2608 err = bpf_prog_dev_bound_init(prog, attr); 2609 if (err) 2610 goto free_prog_sec; 2611 } 2612 2613 if (type == BPF_PROG_TYPE_EXT && dst_prog && 2614 bpf_prog_is_dev_bound(dst_prog->aux)) { 2615 err = bpf_prog_dev_bound_inherit(prog, dst_prog); 2616 if (err) 2617 goto free_prog_sec; 2618 } 2619 2620 /* find program type: socket_filter vs tracing_filter */ 2621 err = find_prog_type(type, prog); 2622 if (err < 0) 2623 goto free_prog_sec; 2624 2625 prog->aux->load_time = ktime_get_boottime_ns(); 2626 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 2627 sizeof(attr->prog_name)); 2628 if (err < 0) 2629 goto free_prog_sec; 2630 2631 /* run eBPF verifier */ 2632 err = bpf_check(&prog, attr, uattr); 2633 if (err < 0) 2634 goto free_used_maps; 2635 2636 prog = bpf_prog_select_runtime(prog, &err); 2637 if (err < 0) 2638 goto free_used_maps; 2639 2640 err = bpf_prog_alloc_id(prog); 2641 if (err) 2642 goto free_used_maps; 2643 2644 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 2645 * effectively publicly exposed. However, retrieving via 2646 * bpf_prog_get_fd_by_id() will take another reference, 2647 * therefore it cannot be gone underneath us. 2648 * 2649 * Only for the time /after/ successful bpf_prog_new_fd() 2650 * and before returning to userspace, we might just hold 2651 * one reference and any parallel close on that fd could 2652 * rip everything out. Hence, below notifications must 2653 * happen before bpf_prog_new_fd(). 2654 * 2655 * Also, any failure handling from this point onwards must 2656 * be using bpf_prog_put() given the program is exposed. 2657 */ 2658 bpf_prog_kallsyms_add(prog); 2659 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 2660 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 2661 2662 err = bpf_prog_new_fd(prog); 2663 if (err < 0) 2664 bpf_prog_put(prog); 2665 return err; 2666 2667 free_used_maps: 2668 /* In case we have subprogs, we need to wait for a grace 2669 * period before we can tear down JIT memory since symbols 2670 * are already exposed under kallsyms. 2671 */ 2672 __bpf_prog_put_noref(prog, prog->aux->func_cnt); 2673 return err; 2674 free_prog_sec: 2675 free_uid(prog->aux->user); 2676 security_bpf_prog_free(prog->aux); 2677 free_prog: 2678 if (prog->aux->attach_btf) 2679 btf_put(prog->aux->attach_btf); 2680 bpf_prog_free(prog); 2681 return err; 2682 } 2683 2684 #define BPF_OBJ_LAST_FIELD file_flags 2685 2686 static int bpf_obj_pin(const union bpf_attr *attr) 2687 { 2688 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 2689 return -EINVAL; 2690 2691 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2692 } 2693 2694 static int bpf_obj_get(const union bpf_attr *attr) 2695 { 2696 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 2697 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 2698 return -EINVAL; 2699 2700 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 2701 attr->file_flags); 2702 } 2703 2704 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 2705 const struct bpf_link_ops *ops, struct bpf_prog *prog) 2706 { 2707 atomic64_set(&link->refcnt, 1); 2708 link->type = type; 2709 link->id = 0; 2710 link->ops = ops; 2711 link->prog = prog; 2712 } 2713 2714 static void bpf_link_free_id(int id) 2715 { 2716 if (!id) 2717 return; 2718 2719 spin_lock_bh(&link_idr_lock); 2720 idr_remove(&link_idr, id); 2721 spin_unlock_bh(&link_idr_lock); 2722 } 2723 2724 /* Clean up bpf_link and corresponding anon_inode file and FD. After 2725 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2726 * anon_inode's release() call. This helper marksbpf_link as 2727 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2728 * is not decremented, it's the responsibility of a calling code that failed 2729 * to complete bpf_link initialization. 2730 */ 2731 void bpf_link_cleanup(struct bpf_link_primer *primer) 2732 { 2733 primer->link->prog = NULL; 2734 bpf_link_free_id(primer->id); 2735 fput(primer->file); 2736 put_unused_fd(primer->fd); 2737 } 2738 2739 void bpf_link_inc(struct bpf_link *link) 2740 { 2741 atomic64_inc(&link->refcnt); 2742 } 2743 2744 /* bpf_link_free is guaranteed to be called from process context */ 2745 static void bpf_link_free(struct bpf_link *link) 2746 { 2747 bpf_link_free_id(link->id); 2748 if (link->prog) { 2749 /* detach BPF program, clean up used resources */ 2750 link->ops->release(link); 2751 bpf_prog_put(link->prog); 2752 } 2753 /* free bpf_link and its containing memory */ 2754 link->ops->dealloc(link); 2755 } 2756 2757 static void bpf_link_put_deferred(struct work_struct *work) 2758 { 2759 struct bpf_link *link = container_of(work, struct bpf_link, work); 2760 2761 bpf_link_free(link); 2762 } 2763 2764 /* bpf_link_put can be called from atomic context, but ensures that resources 2765 * are freed from process context 2766 */ 2767 void bpf_link_put(struct bpf_link *link) 2768 { 2769 if (!atomic64_dec_and_test(&link->refcnt)) 2770 return; 2771 2772 if (in_atomic()) { 2773 INIT_WORK(&link->work, bpf_link_put_deferred); 2774 schedule_work(&link->work); 2775 } else { 2776 bpf_link_free(link); 2777 } 2778 } 2779 EXPORT_SYMBOL(bpf_link_put); 2780 2781 static int bpf_link_release(struct inode *inode, struct file *filp) 2782 { 2783 struct bpf_link *link = filp->private_data; 2784 2785 bpf_link_put(link); 2786 return 0; 2787 } 2788 2789 #ifdef CONFIG_PROC_FS 2790 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 2791 #define BPF_MAP_TYPE(_id, _ops) 2792 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 2793 static const char *bpf_link_type_strs[] = { 2794 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 2795 #include <linux/bpf_types.h> 2796 }; 2797 #undef BPF_PROG_TYPE 2798 #undef BPF_MAP_TYPE 2799 #undef BPF_LINK_TYPE 2800 2801 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 2802 { 2803 const struct bpf_link *link = filp->private_data; 2804 const struct bpf_prog *prog = link->prog; 2805 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2806 2807 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2808 seq_printf(m, 2809 "link_type:\t%s\n" 2810 "link_id:\t%u\n" 2811 "prog_tag:\t%s\n" 2812 "prog_id:\t%u\n", 2813 bpf_link_type_strs[link->type], 2814 link->id, 2815 prog_tag, 2816 prog->aux->id); 2817 if (link->ops->show_fdinfo) 2818 link->ops->show_fdinfo(link, m); 2819 } 2820 #endif 2821 2822 static const struct file_operations bpf_link_fops = { 2823 #ifdef CONFIG_PROC_FS 2824 .show_fdinfo = bpf_link_show_fdinfo, 2825 #endif 2826 .release = bpf_link_release, 2827 .read = bpf_dummy_read, 2828 .write = bpf_dummy_write, 2829 }; 2830 2831 static int bpf_link_alloc_id(struct bpf_link *link) 2832 { 2833 int id; 2834 2835 idr_preload(GFP_KERNEL); 2836 spin_lock_bh(&link_idr_lock); 2837 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 2838 spin_unlock_bh(&link_idr_lock); 2839 idr_preload_end(); 2840 2841 return id; 2842 } 2843 2844 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 2845 * reserving unused FD and allocating ID from link_idr. This is to be paired 2846 * with bpf_link_settle() to install FD and ID and expose bpf_link to 2847 * user-space, if bpf_link is successfully attached. If not, bpf_link and 2848 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 2849 * transient state is passed around in struct bpf_link_primer. 2850 * This is preferred way to create and initialize bpf_link, especially when 2851 * there are complicated and expensive operations in between creating bpf_link 2852 * itself and attaching it to BPF hook. By using bpf_link_prime() and 2853 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 2854 * expensive (and potentially failing) roll back operations in a rare case 2855 * that file, FD, or ID can't be allocated. 2856 */ 2857 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 2858 { 2859 struct file *file; 2860 int fd, id; 2861 2862 fd = get_unused_fd_flags(O_CLOEXEC); 2863 if (fd < 0) 2864 return fd; 2865 2866 2867 id = bpf_link_alloc_id(link); 2868 if (id < 0) { 2869 put_unused_fd(fd); 2870 return id; 2871 } 2872 2873 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); 2874 if (IS_ERR(file)) { 2875 bpf_link_free_id(id); 2876 put_unused_fd(fd); 2877 return PTR_ERR(file); 2878 } 2879 2880 primer->link = link; 2881 primer->file = file; 2882 primer->fd = fd; 2883 primer->id = id; 2884 return 0; 2885 } 2886 2887 int bpf_link_settle(struct bpf_link_primer *primer) 2888 { 2889 /* make bpf_link fetchable by ID */ 2890 spin_lock_bh(&link_idr_lock); 2891 primer->link->id = primer->id; 2892 spin_unlock_bh(&link_idr_lock); 2893 /* make bpf_link fetchable by FD */ 2894 fd_install(primer->fd, primer->file); 2895 /* pass through installed FD */ 2896 return primer->fd; 2897 } 2898 2899 int bpf_link_new_fd(struct bpf_link *link) 2900 { 2901 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); 2902 } 2903 2904 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 2905 { 2906 struct fd f = fdget(ufd); 2907 struct bpf_link *link; 2908 2909 if (!f.file) 2910 return ERR_PTR(-EBADF); 2911 if (f.file->f_op != &bpf_link_fops) { 2912 fdput(f); 2913 return ERR_PTR(-EINVAL); 2914 } 2915 2916 link = f.file->private_data; 2917 bpf_link_inc(link); 2918 fdput(f); 2919 2920 return link; 2921 } 2922 EXPORT_SYMBOL(bpf_link_get_from_fd); 2923 2924 static void bpf_tracing_link_release(struct bpf_link *link) 2925 { 2926 struct bpf_tracing_link *tr_link = 2927 container_of(link, struct bpf_tracing_link, link.link); 2928 2929 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 2930 tr_link->trampoline)); 2931 2932 bpf_trampoline_put(tr_link->trampoline); 2933 2934 /* tgt_prog is NULL if target is a kernel function */ 2935 if (tr_link->tgt_prog) 2936 bpf_prog_put(tr_link->tgt_prog); 2937 } 2938 2939 static void bpf_tracing_link_dealloc(struct bpf_link *link) 2940 { 2941 struct bpf_tracing_link *tr_link = 2942 container_of(link, struct bpf_tracing_link, link.link); 2943 2944 kfree(tr_link); 2945 } 2946 2947 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 2948 struct seq_file *seq) 2949 { 2950 struct bpf_tracing_link *tr_link = 2951 container_of(link, struct bpf_tracing_link, link.link); 2952 2953 seq_printf(seq, 2954 "attach_type:\t%d\n", 2955 tr_link->attach_type); 2956 } 2957 2958 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 2959 struct bpf_link_info *info) 2960 { 2961 struct bpf_tracing_link *tr_link = 2962 container_of(link, struct bpf_tracing_link, link.link); 2963 2964 info->tracing.attach_type = tr_link->attach_type; 2965 bpf_trampoline_unpack_key(tr_link->trampoline->key, 2966 &info->tracing.target_obj_id, 2967 &info->tracing.target_btf_id); 2968 2969 return 0; 2970 } 2971 2972 static const struct bpf_link_ops bpf_tracing_link_lops = { 2973 .release = bpf_tracing_link_release, 2974 .dealloc = bpf_tracing_link_dealloc, 2975 .show_fdinfo = bpf_tracing_link_show_fdinfo, 2976 .fill_link_info = bpf_tracing_link_fill_link_info, 2977 }; 2978 2979 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 2980 int tgt_prog_fd, 2981 u32 btf_id, 2982 u64 bpf_cookie) 2983 { 2984 struct bpf_link_primer link_primer; 2985 struct bpf_prog *tgt_prog = NULL; 2986 struct bpf_trampoline *tr = NULL; 2987 struct bpf_tracing_link *link; 2988 u64 key = 0; 2989 int err; 2990 2991 switch (prog->type) { 2992 case BPF_PROG_TYPE_TRACING: 2993 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 2994 prog->expected_attach_type != BPF_TRACE_FEXIT && 2995 prog->expected_attach_type != BPF_MODIFY_RETURN) { 2996 err = -EINVAL; 2997 goto out_put_prog; 2998 } 2999 break; 3000 case BPF_PROG_TYPE_EXT: 3001 if (prog->expected_attach_type != 0) { 3002 err = -EINVAL; 3003 goto out_put_prog; 3004 } 3005 break; 3006 case BPF_PROG_TYPE_LSM: 3007 if (prog->expected_attach_type != BPF_LSM_MAC) { 3008 err = -EINVAL; 3009 goto out_put_prog; 3010 } 3011 break; 3012 default: 3013 err = -EINVAL; 3014 goto out_put_prog; 3015 } 3016 3017 if (!!tgt_prog_fd != !!btf_id) { 3018 err = -EINVAL; 3019 goto out_put_prog; 3020 } 3021 3022 if (tgt_prog_fd) { 3023 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ 3024 if (prog->type != BPF_PROG_TYPE_EXT) { 3025 err = -EINVAL; 3026 goto out_put_prog; 3027 } 3028 3029 tgt_prog = bpf_prog_get(tgt_prog_fd); 3030 if (IS_ERR(tgt_prog)) { 3031 err = PTR_ERR(tgt_prog); 3032 tgt_prog = NULL; 3033 goto out_put_prog; 3034 } 3035 3036 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 3037 } 3038 3039 link = kzalloc(sizeof(*link), GFP_USER); 3040 if (!link) { 3041 err = -ENOMEM; 3042 goto out_put_prog; 3043 } 3044 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, 3045 &bpf_tracing_link_lops, prog); 3046 link->attach_type = prog->expected_attach_type; 3047 link->link.cookie = bpf_cookie; 3048 3049 mutex_lock(&prog->aux->dst_mutex); 3050 3051 /* There are a few possible cases here: 3052 * 3053 * - if prog->aux->dst_trampoline is set, the program was just loaded 3054 * and not yet attached to anything, so we can use the values stored 3055 * in prog->aux 3056 * 3057 * - if prog->aux->dst_trampoline is NULL, the program has already been 3058 * attached to a target and its initial target was cleared (below) 3059 * 3060 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3061 * target_btf_id using the link_create API. 3062 * 3063 * - if tgt_prog == NULL when this function was called using the old 3064 * raw_tracepoint_open API, and we need a target from prog->aux 3065 * 3066 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 3067 * was detached and is going for re-attachment. 3068 */ 3069 if (!prog->aux->dst_trampoline && !tgt_prog) { 3070 /* 3071 * Allow re-attach for TRACING and LSM programs. If it's 3072 * currently linked, bpf_trampoline_link_prog will fail. 3073 * EXT programs need to specify tgt_prog_fd, so they 3074 * re-attach in separate code path. 3075 */ 3076 if (prog->type != BPF_PROG_TYPE_TRACING && 3077 prog->type != BPF_PROG_TYPE_LSM) { 3078 err = -EINVAL; 3079 goto out_unlock; 3080 } 3081 btf_id = prog->aux->attach_btf_id; 3082 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 3083 } 3084 3085 if (!prog->aux->dst_trampoline || 3086 (key && key != prog->aux->dst_trampoline->key)) { 3087 /* If there is no saved target, or the specified target is 3088 * different from the destination specified at load time, we 3089 * need a new trampoline and a check for compatibility 3090 */ 3091 struct bpf_attach_target_info tgt_info = {}; 3092 3093 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 3094 &tgt_info); 3095 if (err) 3096 goto out_unlock; 3097 3098 tr = bpf_trampoline_get(key, &tgt_info); 3099 if (!tr) { 3100 err = -ENOMEM; 3101 goto out_unlock; 3102 } 3103 } else { 3104 /* The caller didn't specify a target, or the target was the 3105 * same as the destination supplied during program load. This 3106 * means we can reuse the trampoline and reference from program 3107 * load time, and there is no need to allocate a new one. This 3108 * can only happen once for any program, as the saved values in 3109 * prog->aux are cleared below. 3110 */ 3111 tr = prog->aux->dst_trampoline; 3112 tgt_prog = prog->aux->dst_prog; 3113 } 3114 3115 err = bpf_link_prime(&link->link.link, &link_primer); 3116 if (err) 3117 goto out_unlock; 3118 3119 err = bpf_trampoline_link_prog(&link->link, tr); 3120 if (err) { 3121 bpf_link_cleanup(&link_primer); 3122 link = NULL; 3123 goto out_unlock; 3124 } 3125 3126 link->tgt_prog = tgt_prog; 3127 link->trampoline = tr; 3128 3129 /* Always clear the trampoline and target prog from prog->aux to make 3130 * sure the original attach destination is not kept alive after a 3131 * program is (re-)attached to another target. 3132 */ 3133 if (prog->aux->dst_prog && 3134 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 3135 /* got extra prog ref from syscall, or attaching to different prog */ 3136 bpf_prog_put(prog->aux->dst_prog); 3137 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 3138 /* we allocated a new trampoline, so free the old one */ 3139 bpf_trampoline_put(prog->aux->dst_trampoline); 3140 3141 prog->aux->dst_prog = NULL; 3142 prog->aux->dst_trampoline = NULL; 3143 mutex_unlock(&prog->aux->dst_mutex); 3144 3145 return bpf_link_settle(&link_primer); 3146 out_unlock: 3147 if (tr && tr != prog->aux->dst_trampoline) 3148 bpf_trampoline_put(tr); 3149 mutex_unlock(&prog->aux->dst_mutex); 3150 kfree(link); 3151 out_put_prog: 3152 if (tgt_prog_fd && tgt_prog) 3153 bpf_prog_put(tgt_prog); 3154 return err; 3155 } 3156 3157 struct bpf_raw_tp_link { 3158 struct bpf_link link; 3159 struct bpf_raw_event_map *btp; 3160 }; 3161 3162 static void bpf_raw_tp_link_release(struct bpf_link *link) 3163 { 3164 struct bpf_raw_tp_link *raw_tp = 3165 container_of(link, struct bpf_raw_tp_link, link); 3166 3167 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); 3168 bpf_put_raw_tracepoint(raw_tp->btp); 3169 } 3170 3171 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 3172 { 3173 struct bpf_raw_tp_link *raw_tp = 3174 container_of(link, struct bpf_raw_tp_link, link); 3175 3176 kfree(raw_tp); 3177 } 3178 3179 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 3180 struct seq_file *seq) 3181 { 3182 struct bpf_raw_tp_link *raw_tp_link = 3183 container_of(link, struct bpf_raw_tp_link, link); 3184 3185 seq_printf(seq, 3186 "tp_name:\t%s\n", 3187 raw_tp_link->btp->tp->name); 3188 } 3189 3190 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 3191 struct bpf_link_info *info) 3192 { 3193 struct bpf_raw_tp_link *raw_tp_link = 3194 container_of(link, struct bpf_raw_tp_link, link); 3195 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 3196 const char *tp_name = raw_tp_link->btp->tp->name; 3197 u32 ulen = info->raw_tracepoint.tp_name_len; 3198 size_t tp_len = strlen(tp_name); 3199 3200 if (!ulen ^ !ubuf) 3201 return -EINVAL; 3202 3203 info->raw_tracepoint.tp_name_len = tp_len + 1; 3204 3205 if (!ubuf) 3206 return 0; 3207 3208 if (ulen >= tp_len + 1) { 3209 if (copy_to_user(ubuf, tp_name, tp_len + 1)) 3210 return -EFAULT; 3211 } else { 3212 char zero = '\0'; 3213 3214 if (copy_to_user(ubuf, tp_name, ulen - 1)) 3215 return -EFAULT; 3216 if (put_user(zero, ubuf + ulen - 1)) 3217 return -EFAULT; 3218 return -ENOSPC; 3219 } 3220 3221 return 0; 3222 } 3223 3224 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 3225 .release = bpf_raw_tp_link_release, 3226 .dealloc = bpf_raw_tp_link_dealloc, 3227 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 3228 .fill_link_info = bpf_raw_tp_link_fill_link_info, 3229 }; 3230 3231 #ifdef CONFIG_PERF_EVENTS 3232 struct bpf_perf_link { 3233 struct bpf_link link; 3234 struct file *perf_file; 3235 }; 3236 3237 static void bpf_perf_link_release(struct bpf_link *link) 3238 { 3239 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3240 struct perf_event *event = perf_link->perf_file->private_data; 3241 3242 perf_event_free_bpf_prog(event); 3243 fput(perf_link->perf_file); 3244 } 3245 3246 static void bpf_perf_link_dealloc(struct bpf_link *link) 3247 { 3248 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 3249 3250 kfree(perf_link); 3251 } 3252 3253 static const struct bpf_link_ops bpf_perf_link_lops = { 3254 .release = bpf_perf_link_release, 3255 .dealloc = bpf_perf_link_dealloc, 3256 }; 3257 3258 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3259 { 3260 struct bpf_link_primer link_primer; 3261 struct bpf_perf_link *link; 3262 struct perf_event *event; 3263 struct file *perf_file; 3264 int err; 3265 3266 if (attr->link_create.flags) 3267 return -EINVAL; 3268 3269 perf_file = perf_event_get(attr->link_create.target_fd); 3270 if (IS_ERR(perf_file)) 3271 return PTR_ERR(perf_file); 3272 3273 link = kzalloc(sizeof(*link), GFP_USER); 3274 if (!link) { 3275 err = -ENOMEM; 3276 goto out_put_file; 3277 } 3278 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog); 3279 link->perf_file = perf_file; 3280 3281 err = bpf_link_prime(&link->link, &link_primer); 3282 if (err) { 3283 kfree(link); 3284 goto out_put_file; 3285 } 3286 3287 event = perf_file->private_data; 3288 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 3289 if (err) { 3290 bpf_link_cleanup(&link_primer); 3291 goto out_put_file; 3292 } 3293 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 3294 bpf_prog_inc(prog); 3295 3296 return bpf_link_settle(&link_primer); 3297 3298 out_put_file: 3299 fput(perf_file); 3300 return err; 3301 } 3302 #else 3303 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3304 { 3305 return -EOPNOTSUPP; 3306 } 3307 #endif /* CONFIG_PERF_EVENTS */ 3308 3309 static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 3310 const char __user *user_tp_name) 3311 { 3312 struct bpf_link_primer link_primer; 3313 struct bpf_raw_tp_link *link; 3314 struct bpf_raw_event_map *btp; 3315 const char *tp_name; 3316 char buf[128]; 3317 int err; 3318 3319 switch (prog->type) { 3320 case BPF_PROG_TYPE_TRACING: 3321 case BPF_PROG_TYPE_EXT: 3322 case BPF_PROG_TYPE_LSM: 3323 if (user_tp_name) 3324 /* The attach point for this category of programs 3325 * should be specified via btf_id during program load. 3326 */ 3327 return -EINVAL; 3328 if (prog->type == BPF_PROG_TYPE_TRACING && 3329 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 3330 tp_name = prog->aux->attach_func_name; 3331 break; 3332 } 3333 return bpf_tracing_prog_attach(prog, 0, 0, 0); 3334 case BPF_PROG_TYPE_RAW_TRACEPOINT: 3335 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 3336 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 3337 return -EFAULT; 3338 buf[sizeof(buf) - 1] = 0; 3339 tp_name = buf; 3340 break; 3341 default: 3342 return -EINVAL; 3343 } 3344 3345 btp = bpf_get_raw_tracepoint(tp_name); 3346 if (!btp) 3347 return -ENOENT; 3348 3349 link = kzalloc(sizeof(*link), GFP_USER); 3350 if (!link) { 3351 err = -ENOMEM; 3352 goto out_put_btp; 3353 } 3354 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 3355 &bpf_raw_tp_link_lops, prog); 3356 link->btp = btp; 3357 3358 err = bpf_link_prime(&link->link, &link_primer); 3359 if (err) { 3360 kfree(link); 3361 goto out_put_btp; 3362 } 3363 3364 err = bpf_probe_register(link->btp, prog); 3365 if (err) { 3366 bpf_link_cleanup(&link_primer); 3367 goto out_put_btp; 3368 } 3369 3370 return bpf_link_settle(&link_primer); 3371 3372 out_put_btp: 3373 bpf_put_raw_tracepoint(btp); 3374 return err; 3375 } 3376 3377 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 3378 3379 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 3380 { 3381 struct bpf_prog *prog; 3382 int fd; 3383 3384 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 3385 return -EINVAL; 3386 3387 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 3388 if (IS_ERR(prog)) 3389 return PTR_ERR(prog); 3390 3391 fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name)); 3392 if (fd < 0) 3393 bpf_prog_put(prog); 3394 return fd; 3395 } 3396 3397 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3398 enum bpf_attach_type attach_type) 3399 { 3400 switch (prog->type) { 3401 case BPF_PROG_TYPE_CGROUP_SOCK: 3402 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3403 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3404 case BPF_PROG_TYPE_SK_LOOKUP: 3405 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3406 case BPF_PROG_TYPE_CGROUP_SKB: 3407 if (!capable(CAP_NET_ADMIN)) 3408 /* cg-skb progs can be loaded by unpriv user. 3409 * check permissions at attach time. 3410 */ 3411 return -EPERM; 3412 return prog->enforce_expected_attach_type && 3413 prog->expected_attach_type != attach_type ? 3414 -EINVAL : 0; 3415 default: 3416 return 0; 3417 } 3418 } 3419 3420 static enum bpf_prog_type 3421 attach_type_to_prog_type(enum bpf_attach_type attach_type) 3422 { 3423 switch (attach_type) { 3424 case BPF_CGROUP_INET_INGRESS: 3425 case BPF_CGROUP_INET_EGRESS: 3426 return BPF_PROG_TYPE_CGROUP_SKB; 3427 case BPF_CGROUP_INET_SOCK_CREATE: 3428 case BPF_CGROUP_INET_SOCK_RELEASE: 3429 case BPF_CGROUP_INET4_POST_BIND: 3430 case BPF_CGROUP_INET6_POST_BIND: 3431 return BPF_PROG_TYPE_CGROUP_SOCK; 3432 case BPF_CGROUP_INET4_BIND: 3433 case BPF_CGROUP_INET6_BIND: 3434 case BPF_CGROUP_INET4_CONNECT: 3435 case BPF_CGROUP_INET6_CONNECT: 3436 case BPF_CGROUP_INET4_GETPEERNAME: 3437 case BPF_CGROUP_INET6_GETPEERNAME: 3438 case BPF_CGROUP_INET4_GETSOCKNAME: 3439 case BPF_CGROUP_INET6_GETSOCKNAME: 3440 case BPF_CGROUP_UDP4_SENDMSG: 3441 case BPF_CGROUP_UDP6_SENDMSG: 3442 case BPF_CGROUP_UDP4_RECVMSG: 3443 case BPF_CGROUP_UDP6_RECVMSG: 3444 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 3445 case BPF_CGROUP_SOCK_OPS: 3446 return BPF_PROG_TYPE_SOCK_OPS; 3447 case BPF_CGROUP_DEVICE: 3448 return BPF_PROG_TYPE_CGROUP_DEVICE; 3449 case BPF_SK_MSG_VERDICT: 3450 return BPF_PROG_TYPE_SK_MSG; 3451 case BPF_SK_SKB_STREAM_PARSER: 3452 case BPF_SK_SKB_STREAM_VERDICT: 3453 case BPF_SK_SKB_VERDICT: 3454 return BPF_PROG_TYPE_SK_SKB; 3455 case BPF_LIRC_MODE2: 3456 return BPF_PROG_TYPE_LIRC_MODE2; 3457 case BPF_FLOW_DISSECTOR: 3458 return BPF_PROG_TYPE_FLOW_DISSECTOR; 3459 case BPF_CGROUP_SYSCTL: 3460 return BPF_PROG_TYPE_CGROUP_SYSCTL; 3461 case BPF_CGROUP_GETSOCKOPT: 3462 case BPF_CGROUP_SETSOCKOPT: 3463 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 3464 case BPF_TRACE_ITER: 3465 case BPF_TRACE_RAW_TP: 3466 case BPF_TRACE_FENTRY: 3467 case BPF_TRACE_FEXIT: 3468 case BPF_MODIFY_RETURN: 3469 return BPF_PROG_TYPE_TRACING; 3470 case BPF_LSM_MAC: 3471 return BPF_PROG_TYPE_LSM; 3472 case BPF_SK_LOOKUP: 3473 return BPF_PROG_TYPE_SK_LOOKUP; 3474 case BPF_XDP: 3475 return BPF_PROG_TYPE_XDP; 3476 case BPF_LSM_CGROUP: 3477 return BPF_PROG_TYPE_LSM; 3478 default: 3479 return BPF_PROG_TYPE_UNSPEC; 3480 } 3481 } 3482 3483 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd 3484 3485 #define BPF_F_ATTACH_MASK \ 3486 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) 3487 3488 static int bpf_prog_attach(const union bpf_attr *attr) 3489 { 3490 enum bpf_prog_type ptype; 3491 struct bpf_prog *prog; 3492 int ret; 3493 3494 if (CHECK_ATTR(BPF_PROG_ATTACH)) 3495 return -EINVAL; 3496 3497 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 3498 return -EINVAL; 3499 3500 ptype = attach_type_to_prog_type(attr->attach_type); 3501 if (ptype == BPF_PROG_TYPE_UNSPEC) 3502 return -EINVAL; 3503 3504 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 3505 if (IS_ERR(prog)) 3506 return PTR_ERR(prog); 3507 3508 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 3509 bpf_prog_put(prog); 3510 return -EINVAL; 3511 } 3512 3513 switch (ptype) { 3514 case BPF_PROG_TYPE_SK_SKB: 3515 case BPF_PROG_TYPE_SK_MSG: 3516 ret = sock_map_get_from_fd(attr, prog); 3517 break; 3518 case BPF_PROG_TYPE_LIRC_MODE2: 3519 ret = lirc_prog_attach(attr, prog); 3520 break; 3521 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3522 ret = netns_bpf_prog_attach(attr, prog); 3523 break; 3524 case BPF_PROG_TYPE_CGROUP_DEVICE: 3525 case BPF_PROG_TYPE_CGROUP_SKB: 3526 case BPF_PROG_TYPE_CGROUP_SOCK: 3527 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3528 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3529 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3530 case BPF_PROG_TYPE_SOCK_OPS: 3531 case BPF_PROG_TYPE_LSM: 3532 if (ptype == BPF_PROG_TYPE_LSM && 3533 prog->expected_attach_type != BPF_LSM_CGROUP) 3534 ret = -EINVAL; 3535 else 3536 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 3537 break; 3538 default: 3539 ret = -EINVAL; 3540 } 3541 3542 if (ret) 3543 bpf_prog_put(prog); 3544 return ret; 3545 } 3546 3547 #define BPF_PROG_DETACH_LAST_FIELD attach_type 3548 3549 static int bpf_prog_detach(const union bpf_attr *attr) 3550 { 3551 enum bpf_prog_type ptype; 3552 3553 if (CHECK_ATTR(BPF_PROG_DETACH)) 3554 return -EINVAL; 3555 3556 ptype = attach_type_to_prog_type(attr->attach_type); 3557 3558 switch (ptype) { 3559 case BPF_PROG_TYPE_SK_MSG: 3560 case BPF_PROG_TYPE_SK_SKB: 3561 return sock_map_prog_detach(attr, ptype); 3562 case BPF_PROG_TYPE_LIRC_MODE2: 3563 return lirc_prog_detach(attr); 3564 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3565 return netns_bpf_prog_detach(attr, ptype); 3566 case BPF_PROG_TYPE_CGROUP_DEVICE: 3567 case BPF_PROG_TYPE_CGROUP_SKB: 3568 case BPF_PROG_TYPE_CGROUP_SOCK: 3569 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3570 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3571 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3572 case BPF_PROG_TYPE_SOCK_OPS: 3573 case BPF_PROG_TYPE_LSM: 3574 return cgroup_bpf_prog_detach(attr, ptype); 3575 default: 3576 return -EINVAL; 3577 } 3578 } 3579 3580 #define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags 3581 3582 static int bpf_prog_query(const union bpf_attr *attr, 3583 union bpf_attr __user *uattr) 3584 { 3585 if (!capable(CAP_NET_ADMIN)) 3586 return -EPERM; 3587 if (CHECK_ATTR(BPF_PROG_QUERY)) 3588 return -EINVAL; 3589 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 3590 return -EINVAL; 3591 3592 switch (attr->query.attach_type) { 3593 case BPF_CGROUP_INET_INGRESS: 3594 case BPF_CGROUP_INET_EGRESS: 3595 case BPF_CGROUP_INET_SOCK_CREATE: 3596 case BPF_CGROUP_INET_SOCK_RELEASE: 3597 case BPF_CGROUP_INET4_BIND: 3598 case BPF_CGROUP_INET6_BIND: 3599 case BPF_CGROUP_INET4_POST_BIND: 3600 case BPF_CGROUP_INET6_POST_BIND: 3601 case BPF_CGROUP_INET4_CONNECT: 3602 case BPF_CGROUP_INET6_CONNECT: 3603 case BPF_CGROUP_INET4_GETPEERNAME: 3604 case BPF_CGROUP_INET6_GETPEERNAME: 3605 case BPF_CGROUP_INET4_GETSOCKNAME: 3606 case BPF_CGROUP_INET6_GETSOCKNAME: 3607 case BPF_CGROUP_UDP4_SENDMSG: 3608 case BPF_CGROUP_UDP6_SENDMSG: 3609 case BPF_CGROUP_UDP4_RECVMSG: 3610 case BPF_CGROUP_UDP6_RECVMSG: 3611 case BPF_CGROUP_SOCK_OPS: 3612 case BPF_CGROUP_DEVICE: 3613 case BPF_CGROUP_SYSCTL: 3614 case BPF_CGROUP_GETSOCKOPT: 3615 case BPF_CGROUP_SETSOCKOPT: 3616 case BPF_LSM_CGROUP: 3617 return cgroup_bpf_prog_query(attr, uattr); 3618 case BPF_LIRC_MODE2: 3619 return lirc_prog_query(attr, uattr); 3620 case BPF_FLOW_DISSECTOR: 3621 case BPF_SK_LOOKUP: 3622 return netns_bpf_prog_query(attr, uattr); 3623 case BPF_SK_SKB_STREAM_PARSER: 3624 case BPF_SK_SKB_STREAM_VERDICT: 3625 case BPF_SK_MSG_VERDICT: 3626 case BPF_SK_SKB_VERDICT: 3627 return sock_map_bpf_prog_query(attr, uattr); 3628 default: 3629 return -EINVAL; 3630 } 3631 } 3632 3633 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 3634 3635 static int bpf_prog_test_run(const union bpf_attr *attr, 3636 union bpf_attr __user *uattr) 3637 { 3638 struct bpf_prog *prog; 3639 int ret = -ENOTSUPP; 3640 3641 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 3642 return -EINVAL; 3643 3644 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 3645 (!attr->test.ctx_size_in && attr->test.ctx_in)) 3646 return -EINVAL; 3647 3648 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 3649 (!attr->test.ctx_size_out && attr->test.ctx_out)) 3650 return -EINVAL; 3651 3652 prog = bpf_prog_get(attr->test.prog_fd); 3653 if (IS_ERR(prog)) 3654 return PTR_ERR(prog); 3655 3656 if (prog->aux->ops->test_run) 3657 ret = prog->aux->ops->test_run(prog, attr, uattr); 3658 3659 bpf_prog_put(prog); 3660 return ret; 3661 } 3662 3663 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 3664 3665 static int bpf_obj_get_next_id(const union bpf_attr *attr, 3666 union bpf_attr __user *uattr, 3667 struct idr *idr, 3668 spinlock_t *lock) 3669 { 3670 u32 next_id = attr->start_id; 3671 int err = 0; 3672 3673 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 3674 return -EINVAL; 3675 3676 if (!capable(CAP_SYS_ADMIN)) 3677 return -EPERM; 3678 3679 next_id++; 3680 spin_lock_bh(lock); 3681 if (!idr_get_next(idr, &next_id)) 3682 err = -ENOENT; 3683 spin_unlock_bh(lock); 3684 3685 if (!err) 3686 err = put_user(next_id, &uattr->next_id); 3687 3688 return err; 3689 } 3690 3691 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 3692 { 3693 struct bpf_map *map; 3694 3695 spin_lock_bh(&map_idr_lock); 3696 again: 3697 map = idr_get_next(&map_idr, id); 3698 if (map) { 3699 map = __bpf_map_inc_not_zero(map, false); 3700 if (IS_ERR(map)) { 3701 (*id)++; 3702 goto again; 3703 } 3704 } 3705 spin_unlock_bh(&map_idr_lock); 3706 3707 return map; 3708 } 3709 3710 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 3711 { 3712 struct bpf_prog *prog; 3713 3714 spin_lock_bh(&prog_idr_lock); 3715 again: 3716 prog = idr_get_next(&prog_idr, id); 3717 if (prog) { 3718 prog = bpf_prog_inc_not_zero(prog); 3719 if (IS_ERR(prog)) { 3720 (*id)++; 3721 goto again; 3722 } 3723 } 3724 spin_unlock_bh(&prog_idr_lock); 3725 3726 return prog; 3727 } 3728 3729 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 3730 3731 struct bpf_prog *bpf_prog_by_id(u32 id) 3732 { 3733 struct bpf_prog *prog; 3734 3735 if (!id) 3736 return ERR_PTR(-ENOENT); 3737 3738 spin_lock_bh(&prog_idr_lock); 3739 prog = idr_find(&prog_idr, id); 3740 if (prog) 3741 prog = bpf_prog_inc_not_zero(prog); 3742 else 3743 prog = ERR_PTR(-ENOENT); 3744 spin_unlock_bh(&prog_idr_lock); 3745 return prog; 3746 } 3747 3748 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 3749 { 3750 struct bpf_prog *prog; 3751 u32 id = attr->prog_id; 3752 int fd; 3753 3754 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 3755 return -EINVAL; 3756 3757 if (!capable(CAP_SYS_ADMIN)) 3758 return -EPERM; 3759 3760 prog = bpf_prog_by_id(id); 3761 if (IS_ERR(prog)) 3762 return PTR_ERR(prog); 3763 3764 fd = bpf_prog_new_fd(prog); 3765 if (fd < 0) 3766 bpf_prog_put(prog); 3767 3768 return fd; 3769 } 3770 3771 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 3772 3773 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 3774 { 3775 struct bpf_map *map; 3776 u32 id = attr->map_id; 3777 int f_flags; 3778 int fd; 3779 3780 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 3781 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 3782 return -EINVAL; 3783 3784 if (!capable(CAP_SYS_ADMIN)) 3785 return -EPERM; 3786 3787 f_flags = bpf_get_file_flag(attr->open_flags); 3788 if (f_flags < 0) 3789 return f_flags; 3790 3791 spin_lock_bh(&map_idr_lock); 3792 map = idr_find(&map_idr, id); 3793 if (map) 3794 map = __bpf_map_inc_not_zero(map, true); 3795 else 3796 map = ERR_PTR(-ENOENT); 3797 spin_unlock_bh(&map_idr_lock); 3798 3799 if (IS_ERR(map)) 3800 return PTR_ERR(map); 3801 3802 fd = bpf_map_new_fd(map, f_flags); 3803 if (fd < 0) 3804 bpf_map_put_with_uref(map); 3805 3806 return fd; 3807 } 3808 3809 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 3810 unsigned long addr, u32 *off, 3811 u32 *type) 3812 { 3813 const struct bpf_map *map; 3814 int i; 3815 3816 mutex_lock(&prog->aux->used_maps_mutex); 3817 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 3818 map = prog->aux->used_maps[i]; 3819 if (map == (void *)addr) { 3820 *type = BPF_PSEUDO_MAP_FD; 3821 goto out; 3822 } 3823 if (!map->ops->map_direct_value_meta) 3824 continue; 3825 if (!map->ops->map_direct_value_meta(map, addr, off)) { 3826 *type = BPF_PSEUDO_MAP_VALUE; 3827 goto out; 3828 } 3829 } 3830 map = NULL; 3831 3832 out: 3833 mutex_unlock(&prog->aux->used_maps_mutex); 3834 return map; 3835 } 3836 3837 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 3838 const struct cred *f_cred) 3839 { 3840 const struct bpf_map *map; 3841 struct bpf_insn *insns; 3842 u32 off, type; 3843 u64 imm; 3844 u8 code; 3845 int i; 3846 3847 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 3848 GFP_USER); 3849 if (!insns) 3850 return insns; 3851 3852 for (i = 0; i < prog->len; i++) { 3853 code = insns[i].code; 3854 3855 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 3856 insns[i].code = BPF_JMP | BPF_CALL; 3857 insns[i].imm = BPF_FUNC_tail_call; 3858 /* fall-through */ 3859 } 3860 if (code == (BPF_JMP | BPF_CALL) || 3861 code == (BPF_JMP | BPF_CALL_ARGS)) { 3862 if (code == (BPF_JMP | BPF_CALL_ARGS)) 3863 insns[i].code = BPF_JMP | BPF_CALL; 3864 if (!bpf_dump_raw_ok(f_cred)) 3865 insns[i].imm = 0; 3866 continue; 3867 } 3868 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 3869 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 3870 continue; 3871 } 3872 3873 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 3874 continue; 3875 3876 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 3877 map = bpf_map_from_imm(prog, imm, &off, &type); 3878 if (map) { 3879 insns[i].src_reg = type; 3880 insns[i].imm = map->id; 3881 insns[i + 1].imm = off; 3882 continue; 3883 } 3884 } 3885 3886 return insns; 3887 } 3888 3889 static int set_info_rec_size(struct bpf_prog_info *info) 3890 { 3891 /* 3892 * Ensure info.*_rec_size is the same as kernel expected size 3893 * 3894 * or 3895 * 3896 * Only allow zero *_rec_size if both _rec_size and _cnt are 3897 * zero. In this case, the kernel will set the expected 3898 * _rec_size back to the info. 3899 */ 3900 3901 if ((info->nr_func_info || info->func_info_rec_size) && 3902 info->func_info_rec_size != sizeof(struct bpf_func_info)) 3903 return -EINVAL; 3904 3905 if ((info->nr_line_info || info->line_info_rec_size) && 3906 info->line_info_rec_size != sizeof(struct bpf_line_info)) 3907 return -EINVAL; 3908 3909 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 3910 info->jited_line_info_rec_size != sizeof(__u64)) 3911 return -EINVAL; 3912 3913 info->func_info_rec_size = sizeof(struct bpf_func_info); 3914 info->line_info_rec_size = sizeof(struct bpf_line_info); 3915 info->jited_line_info_rec_size = sizeof(__u64); 3916 3917 return 0; 3918 } 3919 3920 static int bpf_prog_get_info_by_fd(struct file *file, 3921 struct bpf_prog *prog, 3922 const union bpf_attr *attr, 3923 union bpf_attr __user *uattr) 3924 { 3925 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3926 struct btf *attach_btf = bpf_prog_get_target_btf(prog); 3927 struct bpf_prog_info info; 3928 u32 info_len = attr->info.info_len; 3929 struct bpf_prog_kstats stats; 3930 char __user *uinsns; 3931 u32 ulen; 3932 int err; 3933 3934 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 3935 if (err) 3936 return err; 3937 info_len = min_t(u32, sizeof(info), info_len); 3938 3939 memset(&info, 0, sizeof(info)); 3940 if (copy_from_user(&info, uinfo, info_len)) 3941 return -EFAULT; 3942 3943 info.type = prog->type; 3944 info.id = prog->aux->id; 3945 info.load_time = prog->aux->load_time; 3946 info.created_by_uid = from_kuid_munged(current_user_ns(), 3947 prog->aux->user->uid); 3948 info.gpl_compatible = prog->gpl_compatible; 3949 3950 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 3951 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 3952 3953 mutex_lock(&prog->aux->used_maps_mutex); 3954 ulen = info.nr_map_ids; 3955 info.nr_map_ids = prog->aux->used_map_cnt; 3956 ulen = min_t(u32, info.nr_map_ids, ulen); 3957 if (ulen) { 3958 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 3959 u32 i; 3960 3961 for (i = 0; i < ulen; i++) 3962 if (put_user(prog->aux->used_maps[i]->id, 3963 &user_map_ids[i])) { 3964 mutex_unlock(&prog->aux->used_maps_mutex); 3965 return -EFAULT; 3966 } 3967 } 3968 mutex_unlock(&prog->aux->used_maps_mutex); 3969 3970 err = set_info_rec_size(&info); 3971 if (err) 3972 return err; 3973 3974 bpf_prog_get_stats(prog, &stats); 3975 info.run_time_ns = stats.nsecs; 3976 info.run_cnt = stats.cnt; 3977 info.recursion_misses = stats.misses; 3978 3979 info.verified_insns = prog->aux->verified_insns; 3980 3981 if (!bpf_capable()) { 3982 info.jited_prog_len = 0; 3983 info.xlated_prog_len = 0; 3984 info.nr_jited_ksyms = 0; 3985 info.nr_jited_func_lens = 0; 3986 info.nr_func_info = 0; 3987 info.nr_line_info = 0; 3988 info.nr_jited_line_info = 0; 3989 goto done; 3990 } 3991 3992 ulen = info.xlated_prog_len; 3993 info.xlated_prog_len = bpf_prog_insn_size(prog); 3994 if (info.xlated_prog_len && ulen) { 3995 struct bpf_insn *insns_sanitized; 3996 bool fault; 3997 3998 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { 3999 info.xlated_prog_insns = 0; 4000 goto done; 4001 } 4002 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 4003 if (!insns_sanitized) 4004 return -ENOMEM; 4005 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 4006 ulen = min_t(u32, info.xlated_prog_len, ulen); 4007 fault = copy_to_user(uinsns, insns_sanitized, ulen); 4008 kfree(insns_sanitized); 4009 if (fault) 4010 return -EFAULT; 4011 } 4012 4013 if (bpf_prog_is_offloaded(prog->aux)) { 4014 err = bpf_prog_offload_info_fill(&info, prog); 4015 if (err) 4016 return err; 4017 goto done; 4018 } 4019 4020 /* NOTE: the following code is supposed to be skipped for offload. 4021 * bpf_prog_offload_info_fill() is the place to fill similar fields 4022 * for offload. 4023 */ 4024 ulen = info.jited_prog_len; 4025 if (prog->aux->func_cnt) { 4026 u32 i; 4027 4028 info.jited_prog_len = 0; 4029 for (i = 0; i < prog->aux->func_cnt; i++) 4030 info.jited_prog_len += prog->aux->func[i]->jited_len; 4031 } else { 4032 info.jited_prog_len = prog->jited_len; 4033 } 4034 4035 if (info.jited_prog_len && ulen) { 4036 if (bpf_dump_raw_ok(file->f_cred)) { 4037 uinsns = u64_to_user_ptr(info.jited_prog_insns); 4038 ulen = min_t(u32, info.jited_prog_len, ulen); 4039 4040 /* for multi-function programs, copy the JITed 4041 * instructions for all the functions 4042 */ 4043 if (prog->aux->func_cnt) { 4044 u32 len, free, i; 4045 u8 *img; 4046 4047 free = ulen; 4048 for (i = 0; i < prog->aux->func_cnt; i++) { 4049 len = prog->aux->func[i]->jited_len; 4050 len = min_t(u32, len, free); 4051 img = (u8 *) prog->aux->func[i]->bpf_func; 4052 if (copy_to_user(uinsns, img, len)) 4053 return -EFAULT; 4054 uinsns += len; 4055 free -= len; 4056 if (!free) 4057 break; 4058 } 4059 } else { 4060 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 4061 return -EFAULT; 4062 } 4063 } else { 4064 info.jited_prog_insns = 0; 4065 } 4066 } 4067 4068 ulen = info.nr_jited_ksyms; 4069 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 4070 if (ulen) { 4071 if (bpf_dump_raw_ok(file->f_cred)) { 4072 unsigned long ksym_addr; 4073 u64 __user *user_ksyms; 4074 u32 i; 4075 4076 /* copy the address of the kernel symbol 4077 * corresponding to each function 4078 */ 4079 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 4080 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 4081 if (prog->aux->func_cnt) { 4082 for (i = 0; i < ulen; i++) { 4083 ksym_addr = (unsigned long) 4084 prog->aux->func[i]->bpf_func; 4085 if (put_user((u64) ksym_addr, 4086 &user_ksyms[i])) 4087 return -EFAULT; 4088 } 4089 } else { 4090 ksym_addr = (unsigned long) prog->bpf_func; 4091 if (put_user((u64) ksym_addr, &user_ksyms[0])) 4092 return -EFAULT; 4093 } 4094 } else { 4095 info.jited_ksyms = 0; 4096 } 4097 } 4098 4099 ulen = info.nr_jited_func_lens; 4100 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 4101 if (ulen) { 4102 if (bpf_dump_raw_ok(file->f_cred)) { 4103 u32 __user *user_lens; 4104 u32 func_len, i; 4105 4106 /* copy the JITed image lengths for each function */ 4107 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 4108 user_lens = u64_to_user_ptr(info.jited_func_lens); 4109 if (prog->aux->func_cnt) { 4110 for (i = 0; i < ulen; i++) { 4111 func_len = 4112 prog->aux->func[i]->jited_len; 4113 if (put_user(func_len, &user_lens[i])) 4114 return -EFAULT; 4115 } 4116 } else { 4117 func_len = prog->jited_len; 4118 if (put_user(func_len, &user_lens[0])) 4119 return -EFAULT; 4120 } 4121 } else { 4122 info.jited_func_lens = 0; 4123 } 4124 } 4125 4126 if (prog->aux->btf) 4127 info.btf_id = btf_obj_id(prog->aux->btf); 4128 info.attach_btf_id = prog->aux->attach_btf_id; 4129 if (attach_btf) 4130 info.attach_btf_obj_id = btf_obj_id(attach_btf); 4131 4132 ulen = info.nr_func_info; 4133 info.nr_func_info = prog->aux->func_info_cnt; 4134 if (info.nr_func_info && ulen) { 4135 char __user *user_finfo; 4136 4137 user_finfo = u64_to_user_ptr(info.func_info); 4138 ulen = min_t(u32, info.nr_func_info, ulen); 4139 if (copy_to_user(user_finfo, prog->aux->func_info, 4140 info.func_info_rec_size * ulen)) 4141 return -EFAULT; 4142 } 4143 4144 ulen = info.nr_line_info; 4145 info.nr_line_info = prog->aux->nr_linfo; 4146 if (info.nr_line_info && ulen) { 4147 __u8 __user *user_linfo; 4148 4149 user_linfo = u64_to_user_ptr(info.line_info); 4150 ulen = min_t(u32, info.nr_line_info, ulen); 4151 if (copy_to_user(user_linfo, prog->aux->linfo, 4152 info.line_info_rec_size * ulen)) 4153 return -EFAULT; 4154 } 4155 4156 ulen = info.nr_jited_line_info; 4157 if (prog->aux->jited_linfo) 4158 info.nr_jited_line_info = prog->aux->nr_linfo; 4159 else 4160 info.nr_jited_line_info = 0; 4161 if (info.nr_jited_line_info && ulen) { 4162 if (bpf_dump_raw_ok(file->f_cred)) { 4163 unsigned long line_addr; 4164 __u64 __user *user_linfo; 4165 u32 i; 4166 4167 user_linfo = u64_to_user_ptr(info.jited_line_info); 4168 ulen = min_t(u32, info.nr_jited_line_info, ulen); 4169 for (i = 0; i < ulen; i++) { 4170 line_addr = (unsigned long)prog->aux->jited_linfo[i]; 4171 if (put_user((__u64)line_addr, &user_linfo[i])) 4172 return -EFAULT; 4173 } 4174 } else { 4175 info.jited_line_info = 0; 4176 } 4177 } 4178 4179 ulen = info.nr_prog_tags; 4180 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 4181 if (ulen) { 4182 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 4183 u32 i; 4184 4185 user_prog_tags = u64_to_user_ptr(info.prog_tags); 4186 ulen = min_t(u32, info.nr_prog_tags, ulen); 4187 if (prog->aux->func_cnt) { 4188 for (i = 0; i < ulen; i++) { 4189 if (copy_to_user(user_prog_tags[i], 4190 prog->aux->func[i]->tag, 4191 BPF_TAG_SIZE)) 4192 return -EFAULT; 4193 } 4194 } else { 4195 if (copy_to_user(user_prog_tags[0], 4196 prog->tag, BPF_TAG_SIZE)) 4197 return -EFAULT; 4198 } 4199 } 4200 4201 done: 4202 if (copy_to_user(uinfo, &info, info_len) || 4203 put_user(info_len, &uattr->info.info_len)) 4204 return -EFAULT; 4205 4206 return 0; 4207 } 4208 4209 static int bpf_map_get_info_by_fd(struct file *file, 4210 struct bpf_map *map, 4211 const union bpf_attr *attr, 4212 union bpf_attr __user *uattr) 4213 { 4214 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4215 struct bpf_map_info info; 4216 u32 info_len = attr->info.info_len; 4217 int err; 4218 4219 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4220 if (err) 4221 return err; 4222 info_len = min_t(u32, sizeof(info), info_len); 4223 4224 memset(&info, 0, sizeof(info)); 4225 info.type = map->map_type; 4226 info.id = map->id; 4227 info.key_size = map->key_size; 4228 info.value_size = map->value_size; 4229 info.max_entries = map->max_entries; 4230 info.map_flags = map->map_flags; 4231 info.map_extra = map->map_extra; 4232 memcpy(info.name, map->name, sizeof(map->name)); 4233 4234 if (map->btf) { 4235 info.btf_id = btf_obj_id(map->btf); 4236 info.btf_key_type_id = map->btf_key_type_id; 4237 info.btf_value_type_id = map->btf_value_type_id; 4238 } 4239 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 4240 4241 if (bpf_map_is_offloaded(map)) { 4242 err = bpf_map_offload_info_fill(&info, map); 4243 if (err) 4244 return err; 4245 } 4246 4247 if (copy_to_user(uinfo, &info, info_len) || 4248 put_user(info_len, &uattr->info.info_len)) 4249 return -EFAULT; 4250 4251 return 0; 4252 } 4253 4254 static int bpf_btf_get_info_by_fd(struct file *file, 4255 struct btf *btf, 4256 const union bpf_attr *attr, 4257 union bpf_attr __user *uattr) 4258 { 4259 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4260 u32 info_len = attr->info.info_len; 4261 int err; 4262 4263 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 4264 if (err) 4265 return err; 4266 4267 return btf_get_info_by_fd(btf, attr, uattr); 4268 } 4269 4270 static int bpf_link_get_info_by_fd(struct file *file, 4271 struct bpf_link *link, 4272 const union bpf_attr *attr, 4273 union bpf_attr __user *uattr) 4274 { 4275 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 4276 struct bpf_link_info info; 4277 u32 info_len = attr->info.info_len; 4278 int err; 4279 4280 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 4281 if (err) 4282 return err; 4283 info_len = min_t(u32, sizeof(info), info_len); 4284 4285 memset(&info, 0, sizeof(info)); 4286 if (copy_from_user(&info, uinfo, info_len)) 4287 return -EFAULT; 4288 4289 info.type = link->type; 4290 info.id = link->id; 4291 info.prog_id = link->prog->aux->id; 4292 4293 if (link->ops->fill_link_info) { 4294 err = link->ops->fill_link_info(link, &info); 4295 if (err) 4296 return err; 4297 } 4298 4299 if (copy_to_user(uinfo, &info, info_len) || 4300 put_user(info_len, &uattr->info.info_len)) 4301 return -EFAULT; 4302 4303 return 0; 4304 } 4305 4306 4307 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 4308 4309 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 4310 union bpf_attr __user *uattr) 4311 { 4312 int ufd = attr->info.bpf_fd; 4313 struct fd f; 4314 int err; 4315 4316 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 4317 return -EINVAL; 4318 4319 f = fdget(ufd); 4320 if (!f.file) 4321 return -EBADFD; 4322 4323 if (f.file->f_op == &bpf_prog_fops) 4324 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, 4325 uattr); 4326 else if (f.file->f_op == &bpf_map_fops) 4327 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, 4328 uattr); 4329 else if (f.file->f_op == &btf_fops) 4330 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 4331 else if (f.file->f_op == &bpf_link_fops) 4332 err = bpf_link_get_info_by_fd(f.file, f.file->private_data, 4333 attr, uattr); 4334 else 4335 err = -EINVAL; 4336 4337 fdput(f); 4338 return err; 4339 } 4340 4341 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level 4342 4343 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr) 4344 { 4345 if (CHECK_ATTR(BPF_BTF_LOAD)) 4346 return -EINVAL; 4347 4348 if (!bpf_capable()) 4349 return -EPERM; 4350 4351 return btf_new_fd(attr, uattr); 4352 } 4353 4354 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id 4355 4356 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 4357 { 4358 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 4359 return -EINVAL; 4360 4361 if (!capable(CAP_SYS_ADMIN)) 4362 return -EPERM; 4363 4364 return btf_get_fd_by_id(attr->btf_id); 4365 } 4366 4367 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 4368 union bpf_attr __user *uattr, 4369 u32 prog_id, u32 fd_type, 4370 const char *buf, u64 probe_offset, 4371 u64 probe_addr) 4372 { 4373 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 4374 u32 len = buf ? strlen(buf) : 0, input_len; 4375 int err = 0; 4376 4377 if (put_user(len, &uattr->task_fd_query.buf_len)) 4378 return -EFAULT; 4379 input_len = attr->task_fd_query.buf_len; 4380 if (input_len && ubuf) { 4381 if (!len) { 4382 /* nothing to copy, just make ubuf NULL terminated */ 4383 char zero = '\0'; 4384 4385 if (put_user(zero, ubuf)) 4386 return -EFAULT; 4387 } else if (input_len >= len + 1) { 4388 /* ubuf can hold the string with NULL terminator */ 4389 if (copy_to_user(ubuf, buf, len + 1)) 4390 return -EFAULT; 4391 } else { 4392 /* ubuf cannot hold the string with NULL terminator, 4393 * do a partial copy with NULL terminator. 4394 */ 4395 char zero = '\0'; 4396 4397 err = -ENOSPC; 4398 if (copy_to_user(ubuf, buf, input_len - 1)) 4399 return -EFAULT; 4400 if (put_user(zero, ubuf + input_len - 1)) 4401 return -EFAULT; 4402 } 4403 } 4404 4405 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 4406 put_user(fd_type, &uattr->task_fd_query.fd_type) || 4407 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 4408 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 4409 return -EFAULT; 4410 4411 return err; 4412 } 4413 4414 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 4415 4416 static int bpf_task_fd_query(const union bpf_attr *attr, 4417 union bpf_attr __user *uattr) 4418 { 4419 pid_t pid = attr->task_fd_query.pid; 4420 u32 fd = attr->task_fd_query.fd; 4421 const struct perf_event *event; 4422 struct task_struct *task; 4423 struct file *file; 4424 int err; 4425 4426 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 4427 return -EINVAL; 4428 4429 if (!capable(CAP_SYS_ADMIN)) 4430 return -EPERM; 4431 4432 if (attr->task_fd_query.flags != 0) 4433 return -EINVAL; 4434 4435 rcu_read_lock(); 4436 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 4437 rcu_read_unlock(); 4438 if (!task) 4439 return -ENOENT; 4440 4441 err = 0; 4442 file = fget_task(task, fd); 4443 put_task_struct(task); 4444 if (!file) 4445 return -EBADF; 4446 4447 if (file->f_op == &bpf_link_fops) { 4448 struct bpf_link *link = file->private_data; 4449 4450 if (link->ops == &bpf_raw_tp_link_lops) { 4451 struct bpf_raw_tp_link *raw_tp = 4452 container_of(link, struct bpf_raw_tp_link, link); 4453 struct bpf_raw_event_map *btp = raw_tp->btp; 4454 4455 err = bpf_task_fd_query_copy(attr, uattr, 4456 raw_tp->link.prog->aux->id, 4457 BPF_FD_TYPE_RAW_TRACEPOINT, 4458 btp->tp->name, 0, 0); 4459 goto put_file; 4460 } 4461 goto out_not_supp; 4462 } 4463 4464 event = perf_get_event(file); 4465 if (!IS_ERR(event)) { 4466 u64 probe_offset, probe_addr; 4467 u32 prog_id, fd_type; 4468 const char *buf; 4469 4470 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 4471 &buf, &probe_offset, 4472 &probe_addr); 4473 if (!err) 4474 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 4475 fd_type, buf, 4476 probe_offset, 4477 probe_addr); 4478 goto put_file; 4479 } 4480 4481 out_not_supp: 4482 err = -ENOTSUPP; 4483 put_file: 4484 fput(file); 4485 return err; 4486 } 4487 4488 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 4489 4490 #define BPF_DO_BATCH(fn, ...) \ 4491 do { \ 4492 if (!fn) { \ 4493 err = -ENOTSUPP; \ 4494 goto err_put; \ 4495 } \ 4496 err = fn(__VA_ARGS__); \ 4497 } while (0) 4498 4499 static int bpf_map_do_batch(const union bpf_attr *attr, 4500 union bpf_attr __user *uattr, 4501 int cmd) 4502 { 4503 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 4504 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 4505 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 4506 struct bpf_map *map; 4507 int err, ufd; 4508 struct fd f; 4509 4510 if (CHECK_ATTR(BPF_MAP_BATCH)) 4511 return -EINVAL; 4512 4513 ufd = attr->batch.map_fd; 4514 f = fdget(ufd); 4515 map = __bpf_map_get(f); 4516 if (IS_ERR(map)) 4517 return PTR_ERR(map); 4518 if (has_write) 4519 bpf_map_write_active_inc(map); 4520 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 4521 err = -EPERM; 4522 goto err_put; 4523 } 4524 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 4525 err = -EPERM; 4526 goto err_put; 4527 } 4528 4529 if (cmd == BPF_MAP_LOOKUP_BATCH) 4530 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); 4531 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 4532 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); 4533 else if (cmd == BPF_MAP_UPDATE_BATCH) 4534 BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr); 4535 else 4536 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); 4537 err_put: 4538 if (has_write) 4539 bpf_map_write_active_dec(map); 4540 fdput(f); 4541 return err; 4542 } 4543 4544 #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4545 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4546 { 4547 enum bpf_prog_type ptype; 4548 struct bpf_prog *prog; 4549 int ret; 4550 4551 if (CHECK_ATTR(BPF_LINK_CREATE)) 4552 return -EINVAL; 4553 4554 prog = bpf_prog_get(attr->link_create.prog_fd); 4555 if (IS_ERR(prog)) 4556 return PTR_ERR(prog); 4557 4558 ret = bpf_prog_attach_check_attach_type(prog, 4559 attr->link_create.attach_type); 4560 if (ret) 4561 goto out; 4562 4563 switch (prog->type) { 4564 case BPF_PROG_TYPE_EXT: 4565 break; 4566 case BPF_PROG_TYPE_PERF_EVENT: 4567 case BPF_PROG_TYPE_TRACEPOINT: 4568 if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4569 ret = -EINVAL; 4570 goto out; 4571 } 4572 break; 4573 case BPF_PROG_TYPE_KPROBE: 4574 if (attr->link_create.attach_type != BPF_PERF_EVENT && 4575 attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { 4576 ret = -EINVAL; 4577 goto out; 4578 } 4579 break; 4580 default: 4581 ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4582 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4583 ret = -EINVAL; 4584 goto out; 4585 } 4586 break; 4587 } 4588 4589 switch (prog->type) { 4590 case BPF_PROG_TYPE_CGROUP_SKB: 4591 case BPF_PROG_TYPE_CGROUP_SOCK: 4592 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4593 case BPF_PROG_TYPE_SOCK_OPS: 4594 case BPF_PROG_TYPE_CGROUP_DEVICE: 4595 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4596 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4597 ret = cgroup_bpf_link_attach(attr, prog); 4598 break; 4599 case BPF_PROG_TYPE_EXT: 4600 ret = bpf_tracing_prog_attach(prog, 4601 attr->link_create.target_fd, 4602 attr->link_create.target_btf_id, 4603 attr->link_create.tracing.cookie); 4604 break; 4605 case BPF_PROG_TYPE_LSM: 4606 case BPF_PROG_TYPE_TRACING: 4607 if (attr->link_create.attach_type != prog->expected_attach_type) { 4608 ret = -EINVAL; 4609 goto out; 4610 } 4611 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 4612 ret = bpf_raw_tp_link_attach(prog, NULL); 4613 else if (prog->expected_attach_type == BPF_TRACE_ITER) 4614 ret = bpf_iter_link_attach(attr, uattr, prog); 4615 else if (prog->expected_attach_type == BPF_LSM_CGROUP) 4616 ret = cgroup_bpf_link_attach(attr, prog); 4617 else 4618 ret = bpf_tracing_prog_attach(prog, 4619 attr->link_create.target_fd, 4620 attr->link_create.target_btf_id, 4621 attr->link_create.tracing.cookie); 4622 break; 4623 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4624 case BPF_PROG_TYPE_SK_LOOKUP: 4625 ret = netns_bpf_link_create(attr, prog); 4626 break; 4627 #ifdef CONFIG_NET 4628 case BPF_PROG_TYPE_XDP: 4629 ret = bpf_xdp_link_attach(attr, prog); 4630 break; 4631 #endif 4632 case BPF_PROG_TYPE_PERF_EVENT: 4633 case BPF_PROG_TYPE_TRACEPOINT: 4634 ret = bpf_perf_link_attach(attr, prog); 4635 break; 4636 case BPF_PROG_TYPE_KPROBE: 4637 if (attr->link_create.attach_type == BPF_PERF_EVENT) 4638 ret = bpf_perf_link_attach(attr, prog); 4639 else 4640 ret = bpf_kprobe_multi_link_attach(attr, prog); 4641 break; 4642 default: 4643 ret = -EINVAL; 4644 } 4645 4646 out: 4647 if (ret < 0) 4648 bpf_prog_put(prog); 4649 return ret; 4650 } 4651 4652 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 4653 4654 static int link_update(union bpf_attr *attr) 4655 { 4656 struct bpf_prog *old_prog = NULL, *new_prog; 4657 struct bpf_link *link; 4658 u32 flags; 4659 int ret; 4660 4661 if (CHECK_ATTR(BPF_LINK_UPDATE)) 4662 return -EINVAL; 4663 4664 flags = attr->link_update.flags; 4665 if (flags & ~BPF_F_REPLACE) 4666 return -EINVAL; 4667 4668 link = bpf_link_get_from_fd(attr->link_update.link_fd); 4669 if (IS_ERR(link)) 4670 return PTR_ERR(link); 4671 4672 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 4673 if (IS_ERR(new_prog)) { 4674 ret = PTR_ERR(new_prog); 4675 goto out_put_link; 4676 } 4677 4678 if (flags & BPF_F_REPLACE) { 4679 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 4680 if (IS_ERR(old_prog)) { 4681 ret = PTR_ERR(old_prog); 4682 old_prog = NULL; 4683 goto out_put_progs; 4684 } 4685 } else if (attr->link_update.old_prog_fd) { 4686 ret = -EINVAL; 4687 goto out_put_progs; 4688 } 4689 4690 if (link->ops->update_prog) 4691 ret = link->ops->update_prog(link, new_prog, old_prog); 4692 else 4693 ret = -EINVAL; 4694 4695 out_put_progs: 4696 if (old_prog) 4697 bpf_prog_put(old_prog); 4698 if (ret) 4699 bpf_prog_put(new_prog); 4700 out_put_link: 4701 bpf_link_put(link); 4702 return ret; 4703 } 4704 4705 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 4706 4707 static int link_detach(union bpf_attr *attr) 4708 { 4709 struct bpf_link *link; 4710 int ret; 4711 4712 if (CHECK_ATTR(BPF_LINK_DETACH)) 4713 return -EINVAL; 4714 4715 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 4716 if (IS_ERR(link)) 4717 return PTR_ERR(link); 4718 4719 if (link->ops->detach) 4720 ret = link->ops->detach(link); 4721 else 4722 ret = -EOPNOTSUPP; 4723 4724 bpf_link_put(link); 4725 return ret; 4726 } 4727 4728 static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 4729 { 4730 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 4731 } 4732 4733 struct bpf_link *bpf_link_by_id(u32 id) 4734 { 4735 struct bpf_link *link; 4736 4737 if (!id) 4738 return ERR_PTR(-ENOENT); 4739 4740 spin_lock_bh(&link_idr_lock); 4741 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 4742 link = idr_find(&link_idr, id); 4743 if (link) { 4744 if (link->id) 4745 link = bpf_link_inc_not_zero(link); 4746 else 4747 link = ERR_PTR(-EAGAIN); 4748 } else { 4749 link = ERR_PTR(-ENOENT); 4750 } 4751 spin_unlock_bh(&link_idr_lock); 4752 return link; 4753 } 4754 4755 struct bpf_link *bpf_link_get_curr_or_next(u32 *id) 4756 { 4757 struct bpf_link *link; 4758 4759 spin_lock_bh(&link_idr_lock); 4760 again: 4761 link = idr_get_next(&link_idr, id); 4762 if (link) { 4763 link = bpf_link_inc_not_zero(link); 4764 if (IS_ERR(link)) { 4765 (*id)++; 4766 goto again; 4767 } 4768 } 4769 spin_unlock_bh(&link_idr_lock); 4770 4771 return link; 4772 } 4773 4774 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 4775 4776 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 4777 { 4778 struct bpf_link *link; 4779 u32 id = attr->link_id; 4780 int fd; 4781 4782 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 4783 return -EINVAL; 4784 4785 if (!capable(CAP_SYS_ADMIN)) 4786 return -EPERM; 4787 4788 link = bpf_link_by_id(id); 4789 if (IS_ERR(link)) 4790 return PTR_ERR(link); 4791 4792 fd = bpf_link_new_fd(link); 4793 if (fd < 0) 4794 bpf_link_put(link); 4795 4796 return fd; 4797 } 4798 4799 DEFINE_MUTEX(bpf_stats_enabled_mutex); 4800 4801 static int bpf_stats_release(struct inode *inode, struct file *file) 4802 { 4803 mutex_lock(&bpf_stats_enabled_mutex); 4804 static_key_slow_dec(&bpf_stats_enabled_key.key); 4805 mutex_unlock(&bpf_stats_enabled_mutex); 4806 return 0; 4807 } 4808 4809 static const struct file_operations bpf_stats_fops = { 4810 .release = bpf_stats_release, 4811 }; 4812 4813 static int bpf_enable_runtime_stats(void) 4814 { 4815 int fd; 4816 4817 mutex_lock(&bpf_stats_enabled_mutex); 4818 4819 /* Set a very high limit to avoid overflow */ 4820 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 4821 mutex_unlock(&bpf_stats_enabled_mutex); 4822 return -EBUSY; 4823 } 4824 4825 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 4826 if (fd >= 0) 4827 static_key_slow_inc(&bpf_stats_enabled_key.key); 4828 4829 mutex_unlock(&bpf_stats_enabled_mutex); 4830 return fd; 4831 } 4832 4833 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 4834 4835 static int bpf_enable_stats(union bpf_attr *attr) 4836 { 4837 4838 if (CHECK_ATTR(BPF_ENABLE_STATS)) 4839 return -EINVAL; 4840 4841 if (!capable(CAP_SYS_ADMIN)) 4842 return -EPERM; 4843 4844 switch (attr->enable_stats.type) { 4845 case BPF_STATS_RUN_TIME: 4846 return bpf_enable_runtime_stats(); 4847 default: 4848 break; 4849 } 4850 return -EINVAL; 4851 } 4852 4853 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 4854 4855 static int bpf_iter_create(union bpf_attr *attr) 4856 { 4857 struct bpf_link *link; 4858 int err; 4859 4860 if (CHECK_ATTR(BPF_ITER_CREATE)) 4861 return -EINVAL; 4862 4863 if (attr->iter_create.flags) 4864 return -EINVAL; 4865 4866 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 4867 if (IS_ERR(link)) 4868 return PTR_ERR(link); 4869 4870 err = bpf_iter_new_fd(link); 4871 bpf_link_put(link); 4872 4873 return err; 4874 } 4875 4876 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 4877 4878 static int bpf_prog_bind_map(union bpf_attr *attr) 4879 { 4880 struct bpf_prog *prog; 4881 struct bpf_map *map; 4882 struct bpf_map **used_maps_old, **used_maps_new; 4883 int i, ret = 0; 4884 4885 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 4886 return -EINVAL; 4887 4888 if (attr->prog_bind_map.flags) 4889 return -EINVAL; 4890 4891 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 4892 if (IS_ERR(prog)) 4893 return PTR_ERR(prog); 4894 4895 map = bpf_map_get(attr->prog_bind_map.map_fd); 4896 if (IS_ERR(map)) { 4897 ret = PTR_ERR(map); 4898 goto out_prog_put; 4899 } 4900 4901 mutex_lock(&prog->aux->used_maps_mutex); 4902 4903 used_maps_old = prog->aux->used_maps; 4904 4905 for (i = 0; i < prog->aux->used_map_cnt; i++) 4906 if (used_maps_old[i] == map) { 4907 bpf_map_put(map); 4908 goto out_unlock; 4909 } 4910 4911 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 4912 sizeof(used_maps_new[0]), 4913 GFP_KERNEL); 4914 if (!used_maps_new) { 4915 ret = -ENOMEM; 4916 goto out_unlock; 4917 } 4918 4919 memcpy(used_maps_new, used_maps_old, 4920 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 4921 used_maps_new[prog->aux->used_map_cnt] = map; 4922 4923 prog->aux->used_map_cnt++; 4924 prog->aux->used_maps = used_maps_new; 4925 4926 kfree(used_maps_old); 4927 4928 out_unlock: 4929 mutex_unlock(&prog->aux->used_maps_mutex); 4930 4931 if (ret) 4932 bpf_map_put(map); 4933 out_prog_put: 4934 bpf_prog_put(prog); 4935 return ret; 4936 } 4937 4938 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) 4939 { 4940 union bpf_attr attr; 4941 bool capable; 4942 int err; 4943 4944 capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; 4945 4946 /* Intent here is for unprivileged_bpf_disabled to block key object 4947 * creation commands for unprivileged users; other actions depend 4948 * of fd availability and access to bpffs, so are dependent on 4949 * object creation success. Capabilities are later verified for 4950 * operations such as load and map create, so even with unprivileged 4951 * BPF disabled, capability checks are still carried out for these 4952 * and other operations. 4953 */ 4954 if (!capable && 4955 (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) 4956 return -EPERM; 4957 4958 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 4959 if (err) 4960 return err; 4961 size = min_t(u32, size, sizeof(attr)); 4962 4963 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 4964 memset(&attr, 0, sizeof(attr)); 4965 if (copy_from_bpfptr(&attr, uattr, size) != 0) 4966 return -EFAULT; 4967 4968 err = security_bpf(cmd, &attr, size); 4969 if (err < 0) 4970 return err; 4971 4972 switch (cmd) { 4973 case BPF_MAP_CREATE: 4974 err = map_create(&attr); 4975 break; 4976 case BPF_MAP_LOOKUP_ELEM: 4977 err = map_lookup_elem(&attr); 4978 break; 4979 case BPF_MAP_UPDATE_ELEM: 4980 err = map_update_elem(&attr, uattr); 4981 break; 4982 case BPF_MAP_DELETE_ELEM: 4983 err = map_delete_elem(&attr, uattr); 4984 break; 4985 case BPF_MAP_GET_NEXT_KEY: 4986 err = map_get_next_key(&attr); 4987 break; 4988 case BPF_MAP_FREEZE: 4989 err = map_freeze(&attr); 4990 break; 4991 case BPF_PROG_LOAD: 4992 err = bpf_prog_load(&attr, uattr); 4993 break; 4994 case BPF_OBJ_PIN: 4995 err = bpf_obj_pin(&attr); 4996 break; 4997 case BPF_OBJ_GET: 4998 err = bpf_obj_get(&attr); 4999 break; 5000 case BPF_PROG_ATTACH: 5001 err = bpf_prog_attach(&attr); 5002 break; 5003 case BPF_PROG_DETACH: 5004 err = bpf_prog_detach(&attr); 5005 break; 5006 case BPF_PROG_QUERY: 5007 err = bpf_prog_query(&attr, uattr.user); 5008 break; 5009 case BPF_PROG_TEST_RUN: 5010 err = bpf_prog_test_run(&attr, uattr.user); 5011 break; 5012 case BPF_PROG_GET_NEXT_ID: 5013 err = bpf_obj_get_next_id(&attr, uattr.user, 5014 &prog_idr, &prog_idr_lock); 5015 break; 5016 case BPF_MAP_GET_NEXT_ID: 5017 err = bpf_obj_get_next_id(&attr, uattr.user, 5018 &map_idr, &map_idr_lock); 5019 break; 5020 case BPF_BTF_GET_NEXT_ID: 5021 err = bpf_obj_get_next_id(&attr, uattr.user, 5022 &btf_idr, &btf_idr_lock); 5023 break; 5024 case BPF_PROG_GET_FD_BY_ID: 5025 err = bpf_prog_get_fd_by_id(&attr); 5026 break; 5027 case BPF_MAP_GET_FD_BY_ID: 5028 err = bpf_map_get_fd_by_id(&attr); 5029 break; 5030 case BPF_OBJ_GET_INFO_BY_FD: 5031 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 5032 break; 5033 case BPF_RAW_TRACEPOINT_OPEN: 5034 err = bpf_raw_tracepoint_open(&attr); 5035 break; 5036 case BPF_BTF_LOAD: 5037 err = bpf_btf_load(&attr, uattr); 5038 break; 5039 case BPF_BTF_GET_FD_BY_ID: 5040 err = bpf_btf_get_fd_by_id(&attr); 5041 break; 5042 case BPF_TASK_FD_QUERY: 5043 err = bpf_task_fd_query(&attr, uattr.user); 5044 break; 5045 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 5046 err = map_lookup_and_delete_elem(&attr); 5047 break; 5048 case BPF_MAP_LOOKUP_BATCH: 5049 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 5050 break; 5051 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 5052 err = bpf_map_do_batch(&attr, uattr.user, 5053 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 5054 break; 5055 case BPF_MAP_UPDATE_BATCH: 5056 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 5057 break; 5058 case BPF_MAP_DELETE_BATCH: 5059 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 5060 break; 5061 case BPF_LINK_CREATE: 5062 err = link_create(&attr, uattr); 5063 break; 5064 case BPF_LINK_UPDATE: 5065 err = link_update(&attr); 5066 break; 5067 case BPF_LINK_GET_FD_BY_ID: 5068 err = bpf_link_get_fd_by_id(&attr); 5069 break; 5070 case BPF_LINK_GET_NEXT_ID: 5071 err = bpf_obj_get_next_id(&attr, uattr.user, 5072 &link_idr, &link_idr_lock); 5073 break; 5074 case BPF_ENABLE_STATS: 5075 err = bpf_enable_stats(&attr); 5076 break; 5077 case BPF_ITER_CREATE: 5078 err = bpf_iter_create(&attr); 5079 break; 5080 case BPF_LINK_DETACH: 5081 err = link_detach(&attr); 5082 break; 5083 case BPF_PROG_BIND_MAP: 5084 err = bpf_prog_bind_map(&attr); 5085 break; 5086 default: 5087 err = -EINVAL; 5088 break; 5089 } 5090 5091 return err; 5092 } 5093 5094 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 5095 { 5096 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 5097 } 5098 5099 static bool syscall_prog_is_valid_access(int off, int size, 5100 enum bpf_access_type type, 5101 const struct bpf_prog *prog, 5102 struct bpf_insn_access_aux *info) 5103 { 5104 if (off < 0 || off >= U16_MAX) 5105 return false; 5106 if (off % size != 0) 5107 return false; 5108 return true; 5109 } 5110 5111 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 5112 { 5113 switch (cmd) { 5114 case BPF_MAP_CREATE: 5115 case BPF_MAP_DELETE_ELEM: 5116 case BPF_MAP_UPDATE_ELEM: 5117 case BPF_MAP_FREEZE: 5118 case BPF_MAP_GET_FD_BY_ID: 5119 case BPF_PROG_LOAD: 5120 case BPF_BTF_LOAD: 5121 case BPF_LINK_CREATE: 5122 case BPF_RAW_TRACEPOINT_OPEN: 5123 break; 5124 default: 5125 return -EINVAL; 5126 } 5127 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 5128 } 5129 5130 5131 /* To shut up -Wmissing-prototypes. 5132 * This function is used by the kernel light skeleton 5133 * to load bpf programs when modules are loaded or during kernel boot. 5134 * See tools/lib/bpf/skel_internal.h 5135 */ 5136 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); 5137 5138 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) 5139 { 5140 struct bpf_prog * __maybe_unused prog; 5141 struct bpf_tramp_run_ctx __maybe_unused run_ctx; 5142 5143 switch (cmd) { 5144 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 5145 case BPF_PROG_TEST_RUN: 5146 if (attr->test.data_in || attr->test.data_out || 5147 attr->test.ctx_out || attr->test.duration || 5148 attr->test.repeat || attr->test.flags) 5149 return -EINVAL; 5150 5151 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 5152 if (IS_ERR(prog)) 5153 return PTR_ERR(prog); 5154 5155 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 5156 attr->test.ctx_size_in > U16_MAX) { 5157 bpf_prog_put(prog); 5158 return -EINVAL; 5159 } 5160 5161 run_ctx.bpf_cookie = 0; 5162 run_ctx.saved_run_ctx = NULL; 5163 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { 5164 /* recursion detected */ 5165 bpf_prog_put(prog); 5166 return -EBUSY; 5167 } 5168 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 5169 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, 5170 &run_ctx); 5171 bpf_prog_put(prog); 5172 return 0; 5173 #endif 5174 default: 5175 return ____bpf_sys_bpf(cmd, attr, size); 5176 } 5177 } 5178 EXPORT_SYMBOL(kern_sys_bpf); 5179 5180 static const struct bpf_func_proto bpf_sys_bpf_proto = { 5181 .func = bpf_sys_bpf, 5182 .gpl_only = false, 5183 .ret_type = RET_INTEGER, 5184 .arg1_type = ARG_ANYTHING, 5185 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5186 .arg3_type = ARG_CONST_SIZE, 5187 }; 5188 5189 const struct bpf_func_proto * __weak 5190 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5191 { 5192 return bpf_base_func_proto(func_id); 5193 } 5194 5195 BPF_CALL_1(bpf_sys_close, u32, fd) 5196 { 5197 /* When bpf program calls this helper there should not be 5198 * an fdget() without matching completed fdput(). 5199 * This helper is allowed in the following callchain only: 5200 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 5201 */ 5202 return close_fd(fd); 5203 } 5204 5205 static const struct bpf_func_proto bpf_sys_close_proto = { 5206 .func = bpf_sys_close, 5207 .gpl_only = false, 5208 .ret_type = RET_INTEGER, 5209 .arg1_type = ARG_ANYTHING, 5210 }; 5211 5212 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 5213 { 5214 if (flags) 5215 return -EINVAL; 5216 5217 if (name_sz <= 1 || name[name_sz - 1]) 5218 return -EINVAL; 5219 5220 if (!bpf_dump_raw_ok(current_cred())) 5221 return -EPERM; 5222 5223 *res = kallsyms_lookup_name(name); 5224 return *res ? 0 : -ENOENT; 5225 } 5226 5227 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 5228 .func = bpf_kallsyms_lookup_name, 5229 .gpl_only = false, 5230 .ret_type = RET_INTEGER, 5231 .arg1_type = ARG_PTR_TO_MEM, 5232 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 5233 .arg3_type = ARG_ANYTHING, 5234 .arg4_type = ARG_PTR_TO_LONG, 5235 }; 5236 5237 static const struct bpf_func_proto * 5238 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5239 { 5240 switch (func_id) { 5241 case BPF_FUNC_sys_bpf: 5242 return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; 5243 case BPF_FUNC_btf_find_by_name_kind: 5244 return &bpf_btf_find_by_name_kind_proto; 5245 case BPF_FUNC_sys_close: 5246 return &bpf_sys_close_proto; 5247 case BPF_FUNC_kallsyms_lookup_name: 5248 return &bpf_kallsyms_lookup_name_proto; 5249 default: 5250 return tracing_prog_func_proto(func_id, prog); 5251 } 5252 } 5253 5254 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 5255 .get_func_proto = syscall_prog_func_proto, 5256 .is_valid_access = syscall_prog_is_valid_access, 5257 }; 5258 5259 const struct bpf_prog_ops bpf_syscall_prog_ops = { 5260 .test_run = bpf_prog_test_run_syscall, 5261 }; 5262 5263 #ifdef CONFIG_SYSCTL 5264 static int bpf_stats_handler(struct ctl_table *table, int write, 5265 void *buffer, size_t *lenp, loff_t *ppos) 5266 { 5267 struct static_key *key = (struct static_key *)table->data; 5268 static int saved_val; 5269 int val, ret; 5270 struct ctl_table tmp = { 5271 .data = &val, 5272 .maxlen = sizeof(val), 5273 .mode = table->mode, 5274 .extra1 = SYSCTL_ZERO, 5275 .extra2 = SYSCTL_ONE, 5276 }; 5277 5278 if (write && !capable(CAP_SYS_ADMIN)) 5279 return -EPERM; 5280 5281 mutex_lock(&bpf_stats_enabled_mutex); 5282 val = saved_val; 5283 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 5284 if (write && !ret && val != saved_val) { 5285 if (val) 5286 static_key_slow_inc(key); 5287 else 5288 static_key_slow_dec(key); 5289 saved_val = val; 5290 } 5291 mutex_unlock(&bpf_stats_enabled_mutex); 5292 return ret; 5293 } 5294 5295 void __weak unpriv_ebpf_notify(int new_state) 5296 { 5297 } 5298 5299 static int bpf_unpriv_handler(struct ctl_table *table, int write, 5300 void *buffer, size_t *lenp, loff_t *ppos) 5301 { 5302 int ret, unpriv_enable = *(int *)table->data; 5303 bool locked_state = unpriv_enable == 1; 5304 struct ctl_table tmp = *table; 5305 5306 if (write && !capable(CAP_SYS_ADMIN)) 5307 return -EPERM; 5308 5309 tmp.data = &unpriv_enable; 5310 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 5311 if (write && !ret) { 5312 if (locked_state && unpriv_enable != 1) 5313 return -EPERM; 5314 *(int *)table->data = unpriv_enable; 5315 } 5316 5317 unpriv_ebpf_notify(unpriv_enable); 5318 5319 return ret; 5320 } 5321 5322 static struct ctl_table bpf_syscall_table[] = { 5323 { 5324 .procname = "unprivileged_bpf_disabled", 5325 .data = &sysctl_unprivileged_bpf_disabled, 5326 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 5327 .mode = 0644, 5328 .proc_handler = bpf_unpriv_handler, 5329 .extra1 = SYSCTL_ZERO, 5330 .extra2 = SYSCTL_TWO, 5331 }, 5332 { 5333 .procname = "bpf_stats_enabled", 5334 .data = &bpf_stats_enabled_key.key, 5335 .mode = 0644, 5336 .proc_handler = bpf_stats_handler, 5337 }, 5338 { } 5339 }; 5340 5341 static int __init bpf_syscall_sysctl_init(void) 5342 { 5343 register_sysctl_init("kernel", bpf_syscall_table); 5344 return 0; 5345 } 5346 late_initcall(bpf_syscall_sysctl_init); 5347 #endif /* CONFIG_SYSCTL */ 5348