1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 DEFINE_PER_CPU(int, bpf_prog_active); 38 static DEFINE_IDR(prog_idr); 39 static DEFINE_SPINLOCK(prog_idr_lock); 40 static DEFINE_IDR(map_idr); 41 static DEFINE_SPINLOCK(map_idr_lock); 42 43 int sysctl_unprivileged_bpf_disabled __read_mostly; 44 45 static const struct bpf_map_ops * const bpf_map_types[] = { 46 #define BPF_PROG_TYPE(_id, _ops) 47 #define BPF_MAP_TYPE(_id, _ops) \ 48 [_id] = &_ops, 49 #include <linux/bpf_types.h> 50 #undef BPF_PROG_TYPE 51 #undef BPF_MAP_TYPE 52 }; 53 54 /* 55 * If we're handed a bigger struct than we know of, ensure all the unknown bits 56 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 57 * we don't know about yet. 58 * 59 * There is a ToCToU between this function call and the following 60 * copy_from_user() call. However, this is not a concern since this function is 61 * meant to be a future-proofing of bits. 62 */ 63 static int check_uarg_tail_zero(void __user *uaddr, 64 size_t expected_size, 65 size_t actual_size) 66 { 67 unsigned char __user *addr; 68 unsigned char __user *end; 69 unsigned char val; 70 int err; 71 72 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 73 return -E2BIG; 74 75 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 76 return -EFAULT; 77 78 if (actual_size <= expected_size) 79 return 0; 80 81 addr = uaddr + expected_size; 82 end = uaddr + actual_size; 83 84 for (; addr < end; addr++) { 85 err = get_user(val, addr); 86 if (err) 87 return err; 88 if (val) 89 return -E2BIG; 90 } 91 92 return 0; 93 } 94 95 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 96 { 97 struct bpf_map *map; 98 99 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 100 !bpf_map_types[attr->map_type]) 101 return ERR_PTR(-EINVAL); 102 103 map = bpf_map_types[attr->map_type]->map_alloc(attr); 104 if (IS_ERR(map)) 105 return map; 106 map->ops = bpf_map_types[attr->map_type]; 107 map->map_type = attr->map_type; 108 return map; 109 } 110 111 void *bpf_map_area_alloc(size_t size, int numa_node) 112 { 113 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 114 * trigger under memory pressure as we really just want to 115 * fail instead. 116 */ 117 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 118 void *area; 119 120 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 121 area = kmalloc_node(size, GFP_USER | flags, numa_node); 122 if (area != NULL) 123 return area; 124 } 125 126 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 127 __builtin_return_address(0)); 128 } 129 130 void bpf_map_area_free(void *area) 131 { 132 kvfree(area); 133 } 134 135 int bpf_map_precharge_memlock(u32 pages) 136 { 137 struct user_struct *user = get_current_user(); 138 unsigned long memlock_limit, cur; 139 140 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 141 cur = atomic_long_read(&user->locked_vm); 142 free_uid(user); 143 if (cur + pages > memlock_limit) 144 return -EPERM; 145 return 0; 146 } 147 148 static int bpf_map_charge_memlock(struct bpf_map *map) 149 { 150 struct user_struct *user = get_current_user(); 151 unsigned long memlock_limit; 152 153 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 154 155 atomic_long_add(map->pages, &user->locked_vm); 156 157 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 158 atomic_long_sub(map->pages, &user->locked_vm); 159 free_uid(user); 160 return -EPERM; 161 } 162 map->user = user; 163 return 0; 164 } 165 166 static void bpf_map_uncharge_memlock(struct bpf_map *map) 167 { 168 struct user_struct *user = map->user; 169 170 atomic_long_sub(map->pages, &user->locked_vm); 171 free_uid(user); 172 } 173 174 static int bpf_map_alloc_id(struct bpf_map *map) 175 { 176 int id; 177 178 spin_lock_bh(&map_idr_lock); 179 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 180 if (id > 0) 181 map->id = id; 182 spin_unlock_bh(&map_idr_lock); 183 184 if (WARN_ON_ONCE(!id)) 185 return -ENOSPC; 186 187 return id > 0 ? 0 : id; 188 } 189 190 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 191 { 192 unsigned long flags; 193 194 if (do_idr_lock) 195 spin_lock_irqsave(&map_idr_lock, flags); 196 else 197 __acquire(&map_idr_lock); 198 199 idr_remove(&map_idr, map->id); 200 201 if (do_idr_lock) 202 spin_unlock_irqrestore(&map_idr_lock, flags); 203 else 204 __release(&map_idr_lock); 205 } 206 207 /* called from workqueue */ 208 static void bpf_map_free_deferred(struct work_struct *work) 209 { 210 struct bpf_map *map = container_of(work, struct bpf_map, work); 211 212 bpf_map_uncharge_memlock(map); 213 /* implementation dependent freeing */ 214 map->ops->map_free(map); 215 } 216 217 static void bpf_map_put_uref(struct bpf_map *map) 218 { 219 if (atomic_dec_and_test(&map->usercnt)) { 220 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 221 bpf_fd_array_map_clear(map); 222 } 223 } 224 225 /* decrement map refcnt and schedule it for freeing via workqueue 226 * (unrelying map implementation ops->map_free() might sleep) 227 */ 228 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 229 { 230 if (atomic_dec_and_test(&map->refcnt)) { 231 /* bpf_map_free_id() must be called first */ 232 bpf_map_free_id(map, do_idr_lock); 233 INIT_WORK(&map->work, bpf_map_free_deferred); 234 schedule_work(&map->work); 235 } 236 } 237 238 void bpf_map_put(struct bpf_map *map) 239 { 240 __bpf_map_put(map, true); 241 } 242 243 void bpf_map_put_with_uref(struct bpf_map *map) 244 { 245 bpf_map_put_uref(map); 246 bpf_map_put(map); 247 } 248 249 static int bpf_map_release(struct inode *inode, struct file *filp) 250 { 251 struct bpf_map *map = filp->private_data; 252 253 if (map->ops->map_release) 254 map->ops->map_release(map, filp); 255 256 bpf_map_put_with_uref(map); 257 return 0; 258 } 259 260 #ifdef CONFIG_PROC_FS 261 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 262 { 263 const struct bpf_map *map = filp->private_data; 264 const struct bpf_array *array; 265 u32 owner_prog_type = 0; 266 u32 owner_jited = 0; 267 268 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 269 array = container_of(map, struct bpf_array, map); 270 owner_prog_type = array->owner_prog_type; 271 owner_jited = array->owner_jited; 272 } 273 274 seq_printf(m, 275 "map_type:\t%u\n" 276 "key_size:\t%u\n" 277 "value_size:\t%u\n" 278 "max_entries:\t%u\n" 279 "map_flags:\t%#x\n" 280 "memlock:\t%llu\n", 281 map->map_type, 282 map->key_size, 283 map->value_size, 284 map->max_entries, 285 map->map_flags, 286 map->pages * 1ULL << PAGE_SHIFT); 287 288 if (owner_prog_type) { 289 seq_printf(m, "owner_prog_type:\t%u\n", 290 owner_prog_type); 291 seq_printf(m, "owner_jited:\t%u\n", 292 owner_jited); 293 } 294 } 295 #endif 296 297 static const struct file_operations bpf_map_fops = { 298 #ifdef CONFIG_PROC_FS 299 .show_fdinfo = bpf_map_show_fdinfo, 300 #endif 301 .release = bpf_map_release, 302 }; 303 304 int bpf_map_new_fd(struct bpf_map *map) 305 { 306 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 307 O_RDWR | O_CLOEXEC); 308 } 309 310 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 311 #define CHECK_ATTR(CMD) \ 312 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 313 sizeof(attr->CMD##_LAST_FIELD), 0, \ 314 sizeof(*attr) - \ 315 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 316 sizeof(attr->CMD##_LAST_FIELD)) != NULL 317 318 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 319 * Return 0 on success and < 0 on error. 320 */ 321 static int bpf_obj_name_cpy(char *dst, const char *src) 322 { 323 const char *end = src + BPF_OBJ_NAME_LEN; 324 325 /* Copy all isalnum() and '_' char */ 326 while (src < end && *src) { 327 if (!isalnum(*src) && *src != '_') 328 return -EINVAL; 329 *dst++ = *src++; 330 } 331 332 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 333 if (src == end) 334 return -EINVAL; 335 336 /* '\0' terminates dst */ 337 *dst = 0; 338 339 return 0; 340 } 341 342 #define BPF_MAP_CREATE_LAST_FIELD map_name 343 /* called via syscall */ 344 static int map_create(union bpf_attr *attr) 345 { 346 int numa_node = bpf_map_attr_numa_node(attr); 347 struct bpf_map *map; 348 int err; 349 350 err = CHECK_ATTR(BPF_MAP_CREATE); 351 if (err) 352 return -EINVAL; 353 354 if (numa_node != NUMA_NO_NODE && 355 ((unsigned int)numa_node >= nr_node_ids || 356 !node_online(numa_node))) 357 return -EINVAL; 358 359 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 360 map = find_and_alloc_map(attr); 361 if (IS_ERR(map)) 362 return PTR_ERR(map); 363 364 err = bpf_obj_name_cpy(map->name, attr->map_name); 365 if (err) 366 goto free_map_nouncharge; 367 368 atomic_set(&map->refcnt, 1); 369 atomic_set(&map->usercnt, 1); 370 371 err = bpf_map_charge_memlock(map); 372 if (err) 373 goto free_map_nouncharge; 374 375 err = bpf_map_alloc_id(map); 376 if (err) 377 goto free_map; 378 379 err = bpf_map_new_fd(map); 380 if (err < 0) { 381 /* failed to allocate fd. 382 * bpf_map_put() is needed because the above 383 * bpf_map_alloc_id() has published the map 384 * to the userspace and the userspace may 385 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 386 */ 387 bpf_map_put(map); 388 return err; 389 } 390 391 trace_bpf_map_create(map, err); 392 return err; 393 394 free_map: 395 bpf_map_uncharge_memlock(map); 396 free_map_nouncharge: 397 map->ops->map_free(map); 398 return err; 399 } 400 401 /* if error is returned, fd is released. 402 * On success caller should complete fd access with matching fdput() 403 */ 404 struct bpf_map *__bpf_map_get(struct fd f) 405 { 406 if (!f.file) 407 return ERR_PTR(-EBADF); 408 if (f.file->f_op != &bpf_map_fops) { 409 fdput(f); 410 return ERR_PTR(-EINVAL); 411 } 412 413 return f.file->private_data; 414 } 415 416 /* prog's and map's refcnt limit */ 417 #define BPF_MAX_REFCNT 32768 418 419 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 420 { 421 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 422 atomic_dec(&map->refcnt); 423 return ERR_PTR(-EBUSY); 424 } 425 if (uref) 426 atomic_inc(&map->usercnt); 427 return map; 428 } 429 430 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 431 { 432 struct fd f = fdget(ufd); 433 struct bpf_map *map; 434 435 map = __bpf_map_get(f); 436 if (IS_ERR(map)) 437 return map; 438 439 map = bpf_map_inc(map, true); 440 fdput(f); 441 442 return map; 443 } 444 445 /* map_idr_lock should have been held */ 446 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 447 bool uref) 448 { 449 int refold; 450 451 refold = __atomic_add_unless(&map->refcnt, 1, 0); 452 453 if (refold >= BPF_MAX_REFCNT) { 454 __bpf_map_put(map, false); 455 return ERR_PTR(-EBUSY); 456 } 457 458 if (!refold) 459 return ERR_PTR(-ENOENT); 460 461 if (uref) 462 atomic_inc(&map->usercnt); 463 464 return map; 465 } 466 467 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 468 { 469 return -ENOTSUPP; 470 } 471 472 /* last field in 'union bpf_attr' used by this command */ 473 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 474 475 static int map_lookup_elem(union bpf_attr *attr) 476 { 477 void __user *ukey = u64_to_user_ptr(attr->key); 478 void __user *uvalue = u64_to_user_ptr(attr->value); 479 int ufd = attr->map_fd; 480 struct bpf_map *map; 481 void *key, *value, *ptr; 482 u32 value_size; 483 struct fd f; 484 int err; 485 486 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 487 return -EINVAL; 488 489 f = fdget(ufd); 490 map = __bpf_map_get(f); 491 if (IS_ERR(map)) 492 return PTR_ERR(map); 493 494 key = memdup_user(ukey, map->key_size); 495 if (IS_ERR(key)) { 496 err = PTR_ERR(key); 497 goto err_put; 498 } 499 500 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 501 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 502 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 503 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 504 else if (IS_FD_MAP(map)) 505 value_size = sizeof(u32); 506 else 507 value_size = map->value_size; 508 509 err = -ENOMEM; 510 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 511 if (!value) 512 goto free_key; 513 514 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 515 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 516 err = bpf_percpu_hash_copy(map, key, value); 517 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 518 err = bpf_percpu_array_copy(map, key, value); 519 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 520 err = bpf_stackmap_copy(map, key, value); 521 } else if (IS_FD_ARRAY(map)) { 522 err = bpf_fd_array_map_lookup_elem(map, key, value); 523 } else if (IS_FD_HASH(map)) { 524 err = bpf_fd_htab_map_lookup_elem(map, key, value); 525 } else { 526 rcu_read_lock(); 527 ptr = map->ops->map_lookup_elem(map, key); 528 if (ptr) 529 memcpy(value, ptr, value_size); 530 rcu_read_unlock(); 531 err = ptr ? 0 : -ENOENT; 532 } 533 534 if (err) 535 goto free_value; 536 537 err = -EFAULT; 538 if (copy_to_user(uvalue, value, value_size) != 0) 539 goto free_value; 540 541 trace_bpf_map_lookup_elem(map, ufd, key, value); 542 err = 0; 543 544 free_value: 545 kfree(value); 546 free_key: 547 kfree(key); 548 err_put: 549 fdput(f); 550 return err; 551 } 552 553 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 554 555 static int map_update_elem(union bpf_attr *attr) 556 { 557 void __user *ukey = u64_to_user_ptr(attr->key); 558 void __user *uvalue = u64_to_user_ptr(attr->value); 559 int ufd = attr->map_fd; 560 struct bpf_map *map; 561 void *key, *value; 562 u32 value_size; 563 struct fd f; 564 int err; 565 566 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 567 return -EINVAL; 568 569 f = fdget(ufd); 570 map = __bpf_map_get(f); 571 if (IS_ERR(map)) 572 return PTR_ERR(map); 573 574 key = memdup_user(ukey, map->key_size); 575 if (IS_ERR(key)) { 576 err = PTR_ERR(key); 577 goto err_put; 578 } 579 580 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 581 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 582 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 583 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 584 else 585 value_size = map->value_size; 586 587 err = -ENOMEM; 588 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 589 if (!value) 590 goto free_key; 591 592 err = -EFAULT; 593 if (copy_from_user(value, uvalue, value_size) != 0) 594 goto free_value; 595 596 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 597 * inside bpf map update or delete otherwise deadlocks are possible 598 */ 599 preempt_disable(); 600 __this_cpu_inc(bpf_prog_active); 601 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 602 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 603 err = bpf_percpu_hash_update(map, key, value, attr->flags); 604 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 605 err = bpf_percpu_array_update(map, key, value, attr->flags); 606 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 607 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 608 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 609 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 610 rcu_read_lock(); 611 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 612 attr->flags); 613 rcu_read_unlock(); 614 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 615 rcu_read_lock(); 616 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 617 attr->flags); 618 rcu_read_unlock(); 619 } else { 620 rcu_read_lock(); 621 err = map->ops->map_update_elem(map, key, value, attr->flags); 622 rcu_read_unlock(); 623 } 624 __this_cpu_dec(bpf_prog_active); 625 preempt_enable(); 626 627 if (!err) 628 trace_bpf_map_update_elem(map, ufd, key, value); 629 free_value: 630 kfree(value); 631 free_key: 632 kfree(key); 633 err_put: 634 fdput(f); 635 return err; 636 } 637 638 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 639 640 static int map_delete_elem(union bpf_attr *attr) 641 { 642 void __user *ukey = u64_to_user_ptr(attr->key); 643 int ufd = attr->map_fd; 644 struct bpf_map *map; 645 struct fd f; 646 void *key; 647 int err; 648 649 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 650 return -EINVAL; 651 652 f = fdget(ufd); 653 map = __bpf_map_get(f); 654 if (IS_ERR(map)) 655 return PTR_ERR(map); 656 657 key = memdup_user(ukey, map->key_size); 658 if (IS_ERR(key)) { 659 err = PTR_ERR(key); 660 goto err_put; 661 } 662 663 preempt_disable(); 664 __this_cpu_inc(bpf_prog_active); 665 rcu_read_lock(); 666 err = map->ops->map_delete_elem(map, key); 667 rcu_read_unlock(); 668 __this_cpu_dec(bpf_prog_active); 669 preempt_enable(); 670 671 if (!err) 672 trace_bpf_map_delete_elem(map, ufd, key); 673 kfree(key); 674 err_put: 675 fdput(f); 676 return err; 677 } 678 679 /* last field in 'union bpf_attr' used by this command */ 680 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 681 682 static int map_get_next_key(union bpf_attr *attr) 683 { 684 void __user *ukey = u64_to_user_ptr(attr->key); 685 void __user *unext_key = u64_to_user_ptr(attr->next_key); 686 int ufd = attr->map_fd; 687 struct bpf_map *map; 688 void *key, *next_key; 689 struct fd f; 690 int err; 691 692 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 693 return -EINVAL; 694 695 f = fdget(ufd); 696 map = __bpf_map_get(f); 697 if (IS_ERR(map)) 698 return PTR_ERR(map); 699 700 if (ukey) { 701 key = memdup_user(ukey, map->key_size); 702 if (IS_ERR(key)) { 703 err = PTR_ERR(key); 704 goto err_put; 705 } 706 } else { 707 key = NULL; 708 } 709 710 err = -ENOMEM; 711 next_key = kmalloc(map->key_size, GFP_USER); 712 if (!next_key) 713 goto free_key; 714 715 rcu_read_lock(); 716 err = map->ops->map_get_next_key(map, key, next_key); 717 rcu_read_unlock(); 718 if (err) 719 goto free_next_key; 720 721 err = -EFAULT; 722 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 723 goto free_next_key; 724 725 trace_bpf_map_next_key(map, ufd, key, next_key); 726 err = 0; 727 728 free_next_key: 729 kfree(next_key); 730 free_key: 731 kfree(key); 732 err_put: 733 fdput(f); 734 return err; 735 } 736 737 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 738 #define BPF_PROG_TYPE(_id, _ops) \ 739 [_id] = &_ops, 740 #define BPF_MAP_TYPE(_id, _ops) 741 #include <linux/bpf_types.h> 742 #undef BPF_PROG_TYPE 743 #undef BPF_MAP_TYPE 744 }; 745 746 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 747 { 748 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 749 return -EINVAL; 750 751 prog->aux->ops = bpf_prog_types[type]; 752 prog->type = type; 753 return 0; 754 } 755 756 /* drop refcnt on maps used by eBPF program and free auxilary data */ 757 static void free_used_maps(struct bpf_prog_aux *aux) 758 { 759 int i; 760 761 for (i = 0; i < aux->used_map_cnt; i++) 762 bpf_map_put(aux->used_maps[i]); 763 764 kfree(aux->used_maps); 765 } 766 767 int __bpf_prog_charge(struct user_struct *user, u32 pages) 768 { 769 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 770 unsigned long user_bufs; 771 772 if (user) { 773 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 774 if (user_bufs > memlock_limit) { 775 atomic_long_sub(pages, &user->locked_vm); 776 return -EPERM; 777 } 778 } 779 780 return 0; 781 } 782 783 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 784 { 785 if (user) 786 atomic_long_sub(pages, &user->locked_vm); 787 } 788 789 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 790 { 791 struct user_struct *user = get_current_user(); 792 int ret; 793 794 ret = __bpf_prog_charge(user, prog->pages); 795 if (ret) { 796 free_uid(user); 797 return ret; 798 } 799 800 prog->aux->user = user; 801 return 0; 802 } 803 804 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 805 { 806 struct user_struct *user = prog->aux->user; 807 808 __bpf_prog_uncharge(user, prog->pages); 809 free_uid(user); 810 } 811 812 static int bpf_prog_alloc_id(struct bpf_prog *prog) 813 { 814 int id; 815 816 spin_lock_bh(&prog_idr_lock); 817 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 818 if (id > 0) 819 prog->aux->id = id; 820 spin_unlock_bh(&prog_idr_lock); 821 822 /* id is in [1, INT_MAX) */ 823 if (WARN_ON_ONCE(!id)) 824 return -ENOSPC; 825 826 return id > 0 ? 0 : id; 827 } 828 829 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 830 { 831 /* cBPF to eBPF migrations are currently not in the idr store. */ 832 if (!prog->aux->id) 833 return; 834 835 if (do_idr_lock) 836 spin_lock_bh(&prog_idr_lock); 837 else 838 __acquire(&prog_idr_lock); 839 840 idr_remove(&prog_idr, prog->aux->id); 841 842 if (do_idr_lock) 843 spin_unlock_bh(&prog_idr_lock); 844 else 845 __release(&prog_idr_lock); 846 } 847 848 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 849 { 850 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 851 852 free_used_maps(aux); 853 bpf_prog_uncharge_memlock(aux->prog); 854 bpf_prog_free(aux->prog); 855 } 856 857 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 858 { 859 if (atomic_dec_and_test(&prog->aux->refcnt)) { 860 trace_bpf_prog_put_rcu(prog); 861 /* bpf_prog_free_id() must be called first */ 862 bpf_prog_free_id(prog, do_idr_lock); 863 bpf_prog_kallsyms_del(prog); 864 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 865 } 866 } 867 868 void bpf_prog_put(struct bpf_prog *prog) 869 { 870 __bpf_prog_put(prog, true); 871 } 872 EXPORT_SYMBOL_GPL(bpf_prog_put); 873 874 static int bpf_prog_release(struct inode *inode, struct file *filp) 875 { 876 struct bpf_prog *prog = filp->private_data; 877 878 bpf_prog_put(prog); 879 return 0; 880 } 881 882 #ifdef CONFIG_PROC_FS 883 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 884 { 885 const struct bpf_prog *prog = filp->private_data; 886 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 887 888 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 889 seq_printf(m, 890 "prog_type:\t%u\n" 891 "prog_jited:\t%u\n" 892 "prog_tag:\t%s\n" 893 "memlock:\t%llu\n", 894 prog->type, 895 prog->jited, 896 prog_tag, 897 prog->pages * 1ULL << PAGE_SHIFT); 898 } 899 #endif 900 901 static const struct file_operations bpf_prog_fops = { 902 #ifdef CONFIG_PROC_FS 903 .show_fdinfo = bpf_prog_show_fdinfo, 904 #endif 905 .release = bpf_prog_release, 906 }; 907 908 int bpf_prog_new_fd(struct bpf_prog *prog) 909 { 910 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 911 O_RDWR | O_CLOEXEC); 912 } 913 914 static struct bpf_prog *____bpf_prog_get(struct fd f) 915 { 916 if (!f.file) 917 return ERR_PTR(-EBADF); 918 if (f.file->f_op != &bpf_prog_fops) { 919 fdput(f); 920 return ERR_PTR(-EINVAL); 921 } 922 923 return f.file->private_data; 924 } 925 926 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 927 { 928 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 929 atomic_sub(i, &prog->aux->refcnt); 930 return ERR_PTR(-EBUSY); 931 } 932 return prog; 933 } 934 EXPORT_SYMBOL_GPL(bpf_prog_add); 935 936 void bpf_prog_sub(struct bpf_prog *prog, int i) 937 { 938 /* Only to be used for undoing previous bpf_prog_add() in some 939 * error path. We still know that another entity in our call 940 * path holds a reference to the program, thus atomic_sub() can 941 * be safely used in such cases! 942 */ 943 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 944 } 945 EXPORT_SYMBOL_GPL(bpf_prog_sub); 946 947 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 948 { 949 return bpf_prog_add(prog, 1); 950 } 951 EXPORT_SYMBOL_GPL(bpf_prog_inc); 952 953 /* prog_idr_lock should have been held */ 954 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 955 { 956 int refold; 957 958 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 959 960 if (refold >= BPF_MAX_REFCNT) { 961 __bpf_prog_put(prog, false); 962 return ERR_PTR(-EBUSY); 963 } 964 965 if (!refold) 966 return ERR_PTR(-ENOENT); 967 968 return prog; 969 } 970 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 971 972 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 973 { 974 struct fd f = fdget(ufd); 975 struct bpf_prog *prog; 976 977 prog = ____bpf_prog_get(f); 978 if (IS_ERR(prog)) 979 return prog; 980 if (type && prog->type != *type) { 981 prog = ERR_PTR(-EINVAL); 982 goto out; 983 } 984 985 prog = bpf_prog_inc(prog); 986 out: 987 fdput(f); 988 return prog; 989 } 990 991 struct bpf_prog *bpf_prog_get(u32 ufd) 992 { 993 return __bpf_prog_get(ufd, NULL); 994 } 995 996 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 997 { 998 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 999 1000 if (!IS_ERR(prog)) 1001 trace_bpf_prog_get_type(prog); 1002 return prog; 1003 } 1004 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 1005 1006 /* last field in 'union bpf_attr' used by this command */ 1007 #define BPF_PROG_LOAD_LAST_FIELD prog_name 1008 1009 static int bpf_prog_load(union bpf_attr *attr) 1010 { 1011 enum bpf_prog_type type = attr->prog_type; 1012 struct bpf_prog *prog; 1013 int err; 1014 char license[128]; 1015 bool is_gpl; 1016 1017 if (CHECK_ATTR(BPF_PROG_LOAD)) 1018 return -EINVAL; 1019 1020 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1021 return -EINVAL; 1022 1023 /* copy eBPF program license from user space */ 1024 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1025 sizeof(license) - 1) < 0) 1026 return -EFAULT; 1027 license[sizeof(license) - 1] = 0; 1028 1029 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1030 is_gpl = license_is_gpl_compatible(license); 1031 1032 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1033 return -E2BIG; 1034 1035 if (type == BPF_PROG_TYPE_KPROBE && 1036 attr->kern_version != LINUX_VERSION_CODE) 1037 return -EINVAL; 1038 1039 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1040 type != BPF_PROG_TYPE_CGROUP_SKB && 1041 !capable(CAP_SYS_ADMIN)) 1042 return -EPERM; 1043 1044 /* plain bpf_prog allocation */ 1045 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1046 if (!prog) 1047 return -ENOMEM; 1048 1049 err = bpf_prog_charge_memlock(prog); 1050 if (err) 1051 goto free_prog_nouncharge; 1052 1053 prog->len = attr->insn_cnt; 1054 1055 err = -EFAULT; 1056 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1057 bpf_prog_insn_size(prog)) != 0) 1058 goto free_prog; 1059 1060 prog->orig_prog = NULL; 1061 prog->jited = 0; 1062 1063 atomic_set(&prog->aux->refcnt, 1); 1064 prog->gpl_compatible = is_gpl ? 1 : 0; 1065 1066 /* find program type: socket_filter vs tracing_filter */ 1067 err = find_prog_type(type, prog); 1068 if (err < 0) 1069 goto free_prog; 1070 1071 prog->aux->load_time = ktime_get_boot_ns(); 1072 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1073 if (err) 1074 goto free_prog; 1075 1076 /* run eBPF verifier */ 1077 err = bpf_check(&prog, attr); 1078 if (err < 0) 1079 goto free_used_maps; 1080 1081 /* eBPF program is ready to be JITed */ 1082 prog = bpf_prog_select_runtime(prog, &err); 1083 if (err < 0) 1084 goto free_used_maps; 1085 1086 err = bpf_prog_alloc_id(prog); 1087 if (err) 1088 goto free_used_maps; 1089 1090 err = bpf_prog_new_fd(prog); 1091 if (err < 0) { 1092 /* failed to allocate fd. 1093 * bpf_prog_put() is needed because the above 1094 * bpf_prog_alloc_id() has published the prog 1095 * to the userspace and the userspace may 1096 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1097 */ 1098 bpf_prog_put(prog); 1099 return err; 1100 } 1101 1102 bpf_prog_kallsyms_add(prog); 1103 trace_bpf_prog_load(prog, err); 1104 return err; 1105 1106 free_used_maps: 1107 free_used_maps(prog->aux); 1108 free_prog: 1109 bpf_prog_uncharge_memlock(prog); 1110 free_prog_nouncharge: 1111 bpf_prog_free(prog); 1112 return err; 1113 } 1114 1115 #define BPF_OBJ_LAST_FIELD bpf_fd 1116 1117 static int bpf_obj_pin(const union bpf_attr *attr) 1118 { 1119 if (CHECK_ATTR(BPF_OBJ)) 1120 return -EINVAL; 1121 1122 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1123 } 1124 1125 static int bpf_obj_get(const union bpf_attr *attr) 1126 { 1127 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 1128 return -EINVAL; 1129 1130 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 1131 } 1132 1133 #ifdef CONFIG_CGROUP_BPF 1134 1135 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1136 1137 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1138 { 1139 struct bpf_prog *prog = NULL; 1140 int ufd = attr->target_fd; 1141 struct bpf_map *map; 1142 struct fd f; 1143 int err; 1144 1145 f = fdget(ufd); 1146 map = __bpf_map_get(f); 1147 if (IS_ERR(map)) 1148 return PTR_ERR(map); 1149 1150 if (attach) { 1151 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1152 BPF_PROG_TYPE_SK_SKB); 1153 if (IS_ERR(prog)) { 1154 fdput(f); 1155 return PTR_ERR(prog); 1156 } 1157 } 1158 1159 err = sock_map_prog(map, prog, attr->attach_type); 1160 if (err) { 1161 fdput(f); 1162 if (prog) 1163 bpf_prog_put(prog); 1164 return err; 1165 } 1166 1167 fdput(f); 1168 return 0; 1169 } 1170 1171 #define BPF_F_ATTACH_MASK \ 1172 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1173 1174 static int bpf_prog_attach(const union bpf_attr *attr) 1175 { 1176 enum bpf_prog_type ptype; 1177 struct bpf_prog *prog; 1178 struct cgroup *cgrp; 1179 int ret; 1180 1181 if (!capable(CAP_NET_ADMIN)) 1182 return -EPERM; 1183 1184 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1185 return -EINVAL; 1186 1187 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1188 return -EINVAL; 1189 1190 switch (attr->attach_type) { 1191 case BPF_CGROUP_INET_INGRESS: 1192 case BPF_CGROUP_INET_EGRESS: 1193 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1194 break; 1195 case BPF_CGROUP_INET_SOCK_CREATE: 1196 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1197 break; 1198 case BPF_CGROUP_SOCK_OPS: 1199 ptype = BPF_PROG_TYPE_SOCK_OPS; 1200 break; 1201 case BPF_SK_SKB_STREAM_PARSER: 1202 case BPF_SK_SKB_STREAM_VERDICT: 1203 return sockmap_get_from_fd(attr, true); 1204 default: 1205 return -EINVAL; 1206 } 1207 1208 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1209 if (IS_ERR(prog)) 1210 return PTR_ERR(prog); 1211 1212 cgrp = cgroup_get_from_fd(attr->target_fd); 1213 if (IS_ERR(cgrp)) { 1214 bpf_prog_put(prog); 1215 return PTR_ERR(cgrp); 1216 } 1217 1218 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1219 attr->attach_flags); 1220 if (ret) 1221 bpf_prog_put(prog); 1222 cgroup_put(cgrp); 1223 1224 return ret; 1225 } 1226 1227 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1228 1229 static int bpf_prog_detach(const union bpf_attr *attr) 1230 { 1231 enum bpf_prog_type ptype; 1232 struct bpf_prog *prog; 1233 struct cgroup *cgrp; 1234 int ret; 1235 1236 if (!capable(CAP_NET_ADMIN)) 1237 return -EPERM; 1238 1239 if (CHECK_ATTR(BPF_PROG_DETACH)) 1240 return -EINVAL; 1241 1242 switch (attr->attach_type) { 1243 case BPF_CGROUP_INET_INGRESS: 1244 case BPF_CGROUP_INET_EGRESS: 1245 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1246 break; 1247 case BPF_CGROUP_INET_SOCK_CREATE: 1248 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1249 break; 1250 case BPF_CGROUP_SOCK_OPS: 1251 ptype = BPF_PROG_TYPE_SOCK_OPS; 1252 break; 1253 case BPF_SK_SKB_STREAM_PARSER: 1254 case BPF_SK_SKB_STREAM_VERDICT: 1255 return sockmap_get_from_fd(attr, false); 1256 default: 1257 return -EINVAL; 1258 } 1259 1260 cgrp = cgroup_get_from_fd(attr->target_fd); 1261 if (IS_ERR(cgrp)) 1262 return PTR_ERR(cgrp); 1263 1264 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1265 if (IS_ERR(prog)) 1266 prog = NULL; 1267 1268 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1269 if (prog) 1270 bpf_prog_put(prog); 1271 cgroup_put(cgrp); 1272 return ret; 1273 } 1274 1275 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1276 1277 static int bpf_prog_query(const union bpf_attr *attr, 1278 union bpf_attr __user *uattr) 1279 { 1280 struct cgroup *cgrp; 1281 int ret; 1282 1283 if (!capable(CAP_NET_ADMIN)) 1284 return -EPERM; 1285 if (CHECK_ATTR(BPF_PROG_QUERY)) 1286 return -EINVAL; 1287 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1288 return -EINVAL; 1289 1290 switch (attr->query.attach_type) { 1291 case BPF_CGROUP_INET_INGRESS: 1292 case BPF_CGROUP_INET_EGRESS: 1293 case BPF_CGROUP_INET_SOCK_CREATE: 1294 case BPF_CGROUP_SOCK_OPS: 1295 break; 1296 default: 1297 return -EINVAL; 1298 } 1299 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1300 if (IS_ERR(cgrp)) 1301 return PTR_ERR(cgrp); 1302 ret = cgroup_bpf_query(cgrp, attr, uattr); 1303 cgroup_put(cgrp); 1304 return ret; 1305 } 1306 #endif /* CONFIG_CGROUP_BPF */ 1307 1308 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1309 1310 static int bpf_prog_test_run(const union bpf_attr *attr, 1311 union bpf_attr __user *uattr) 1312 { 1313 struct bpf_prog *prog; 1314 int ret = -ENOTSUPP; 1315 1316 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1317 return -EINVAL; 1318 1319 prog = bpf_prog_get(attr->test.prog_fd); 1320 if (IS_ERR(prog)) 1321 return PTR_ERR(prog); 1322 1323 if (prog->aux->ops->test_run) 1324 ret = prog->aux->ops->test_run(prog, attr, uattr); 1325 1326 bpf_prog_put(prog); 1327 return ret; 1328 } 1329 1330 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1331 1332 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1333 union bpf_attr __user *uattr, 1334 struct idr *idr, 1335 spinlock_t *lock) 1336 { 1337 u32 next_id = attr->start_id; 1338 int err = 0; 1339 1340 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1341 return -EINVAL; 1342 1343 if (!capable(CAP_SYS_ADMIN)) 1344 return -EPERM; 1345 1346 next_id++; 1347 spin_lock_bh(lock); 1348 if (!idr_get_next(idr, &next_id)) 1349 err = -ENOENT; 1350 spin_unlock_bh(lock); 1351 1352 if (!err) 1353 err = put_user(next_id, &uattr->next_id); 1354 1355 return err; 1356 } 1357 1358 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1359 1360 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1361 { 1362 struct bpf_prog *prog; 1363 u32 id = attr->prog_id; 1364 int fd; 1365 1366 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1367 return -EINVAL; 1368 1369 if (!capable(CAP_SYS_ADMIN)) 1370 return -EPERM; 1371 1372 spin_lock_bh(&prog_idr_lock); 1373 prog = idr_find(&prog_idr, id); 1374 if (prog) 1375 prog = bpf_prog_inc_not_zero(prog); 1376 else 1377 prog = ERR_PTR(-ENOENT); 1378 spin_unlock_bh(&prog_idr_lock); 1379 1380 if (IS_ERR(prog)) 1381 return PTR_ERR(prog); 1382 1383 fd = bpf_prog_new_fd(prog); 1384 if (fd < 0) 1385 bpf_prog_put(prog); 1386 1387 return fd; 1388 } 1389 1390 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id 1391 1392 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1393 { 1394 struct bpf_map *map; 1395 u32 id = attr->map_id; 1396 int fd; 1397 1398 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) 1399 return -EINVAL; 1400 1401 if (!capable(CAP_SYS_ADMIN)) 1402 return -EPERM; 1403 1404 spin_lock_bh(&map_idr_lock); 1405 map = idr_find(&map_idr, id); 1406 if (map) 1407 map = bpf_map_inc_not_zero(map, true); 1408 else 1409 map = ERR_PTR(-ENOENT); 1410 spin_unlock_bh(&map_idr_lock); 1411 1412 if (IS_ERR(map)) 1413 return PTR_ERR(map); 1414 1415 fd = bpf_map_new_fd(map); 1416 if (fd < 0) 1417 bpf_map_put(map); 1418 1419 return fd; 1420 } 1421 1422 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1423 const union bpf_attr *attr, 1424 union bpf_attr __user *uattr) 1425 { 1426 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1427 struct bpf_prog_info info = {}; 1428 u32 info_len = attr->info.info_len; 1429 char __user *uinsns; 1430 u32 ulen; 1431 int err; 1432 1433 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1434 if (err) 1435 return err; 1436 info_len = min_t(u32, sizeof(info), info_len); 1437 1438 if (copy_from_user(&info, uinfo, info_len)) 1439 return -EFAULT; 1440 1441 info.type = prog->type; 1442 info.id = prog->aux->id; 1443 info.load_time = prog->aux->load_time; 1444 info.created_by_uid = from_kuid_munged(current_user_ns(), 1445 prog->aux->user->uid); 1446 1447 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1448 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1449 1450 ulen = info.nr_map_ids; 1451 info.nr_map_ids = prog->aux->used_map_cnt; 1452 ulen = min_t(u32, info.nr_map_ids, ulen); 1453 if (ulen) { 1454 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1455 u32 i; 1456 1457 for (i = 0; i < ulen; i++) 1458 if (put_user(prog->aux->used_maps[i]->id, 1459 &user_map_ids[i])) 1460 return -EFAULT; 1461 } 1462 1463 if (!capable(CAP_SYS_ADMIN)) { 1464 info.jited_prog_len = 0; 1465 info.xlated_prog_len = 0; 1466 goto done; 1467 } 1468 1469 ulen = info.jited_prog_len; 1470 info.jited_prog_len = prog->jited_len; 1471 if (info.jited_prog_len && ulen) { 1472 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1473 ulen = min_t(u32, info.jited_prog_len, ulen); 1474 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1475 return -EFAULT; 1476 } 1477 1478 ulen = info.xlated_prog_len; 1479 info.xlated_prog_len = bpf_prog_insn_size(prog); 1480 if (info.xlated_prog_len && ulen) { 1481 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1482 ulen = min_t(u32, info.xlated_prog_len, ulen); 1483 if (copy_to_user(uinsns, prog->insnsi, ulen)) 1484 return -EFAULT; 1485 } 1486 1487 done: 1488 if (copy_to_user(uinfo, &info, info_len) || 1489 put_user(info_len, &uattr->info.info_len)) 1490 return -EFAULT; 1491 1492 return 0; 1493 } 1494 1495 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1496 const union bpf_attr *attr, 1497 union bpf_attr __user *uattr) 1498 { 1499 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1500 struct bpf_map_info info = {}; 1501 u32 info_len = attr->info.info_len; 1502 int err; 1503 1504 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1505 if (err) 1506 return err; 1507 info_len = min_t(u32, sizeof(info), info_len); 1508 1509 info.type = map->map_type; 1510 info.id = map->id; 1511 info.key_size = map->key_size; 1512 info.value_size = map->value_size; 1513 info.max_entries = map->max_entries; 1514 info.map_flags = map->map_flags; 1515 memcpy(info.name, map->name, sizeof(map->name)); 1516 1517 if (copy_to_user(uinfo, &info, info_len) || 1518 put_user(info_len, &uattr->info.info_len)) 1519 return -EFAULT; 1520 1521 return 0; 1522 } 1523 1524 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1525 1526 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1527 union bpf_attr __user *uattr) 1528 { 1529 int ufd = attr->info.bpf_fd; 1530 struct fd f; 1531 int err; 1532 1533 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1534 return -EINVAL; 1535 1536 f = fdget(ufd); 1537 if (!f.file) 1538 return -EBADFD; 1539 1540 if (f.file->f_op == &bpf_prog_fops) 1541 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1542 uattr); 1543 else if (f.file->f_op == &bpf_map_fops) 1544 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1545 uattr); 1546 else 1547 err = -EINVAL; 1548 1549 fdput(f); 1550 return err; 1551 } 1552 1553 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1554 { 1555 union bpf_attr attr = {}; 1556 int err; 1557 1558 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1559 return -EPERM; 1560 1561 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1562 if (err) 1563 return err; 1564 size = min_t(u32, size, sizeof(attr)); 1565 1566 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1567 if (copy_from_user(&attr, uattr, size) != 0) 1568 return -EFAULT; 1569 1570 switch (cmd) { 1571 case BPF_MAP_CREATE: 1572 err = map_create(&attr); 1573 break; 1574 case BPF_MAP_LOOKUP_ELEM: 1575 err = map_lookup_elem(&attr); 1576 break; 1577 case BPF_MAP_UPDATE_ELEM: 1578 err = map_update_elem(&attr); 1579 break; 1580 case BPF_MAP_DELETE_ELEM: 1581 err = map_delete_elem(&attr); 1582 break; 1583 case BPF_MAP_GET_NEXT_KEY: 1584 err = map_get_next_key(&attr); 1585 break; 1586 case BPF_PROG_LOAD: 1587 err = bpf_prog_load(&attr); 1588 break; 1589 case BPF_OBJ_PIN: 1590 err = bpf_obj_pin(&attr); 1591 break; 1592 case BPF_OBJ_GET: 1593 err = bpf_obj_get(&attr); 1594 break; 1595 #ifdef CONFIG_CGROUP_BPF 1596 case BPF_PROG_ATTACH: 1597 err = bpf_prog_attach(&attr); 1598 break; 1599 case BPF_PROG_DETACH: 1600 err = bpf_prog_detach(&attr); 1601 break; 1602 case BPF_PROG_QUERY: 1603 err = bpf_prog_query(&attr, uattr); 1604 break; 1605 #endif 1606 case BPF_PROG_TEST_RUN: 1607 err = bpf_prog_test_run(&attr, uattr); 1608 break; 1609 case BPF_PROG_GET_NEXT_ID: 1610 err = bpf_obj_get_next_id(&attr, uattr, 1611 &prog_idr, &prog_idr_lock); 1612 break; 1613 case BPF_MAP_GET_NEXT_ID: 1614 err = bpf_obj_get_next_id(&attr, uattr, 1615 &map_idr, &map_idr_lock); 1616 break; 1617 case BPF_PROG_GET_FD_BY_ID: 1618 err = bpf_prog_get_fd_by_id(&attr); 1619 break; 1620 case BPF_MAP_GET_FD_BY_ID: 1621 err = bpf_map_get_fd_by_id(&attr); 1622 break; 1623 case BPF_OBJ_GET_INFO_BY_FD: 1624 err = bpf_obj_get_info_by_fd(&attr, uattr); 1625 break; 1626 default: 1627 err = -EINVAL; 1628 break; 1629 } 1630 1631 return err; 1632 } 1633