1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 DEFINE_PER_CPU(int, bpf_prog_active); 38 static DEFINE_IDR(prog_idr); 39 static DEFINE_SPINLOCK(prog_idr_lock); 40 static DEFINE_IDR(map_idr); 41 static DEFINE_SPINLOCK(map_idr_lock); 42 43 int sysctl_unprivileged_bpf_disabled __read_mostly; 44 45 static const struct bpf_map_ops * const bpf_map_types[] = { 46 #define BPF_PROG_TYPE(_id, _ops) 47 #define BPF_MAP_TYPE(_id, _ops) \ 48 [_id] = &_ops, 49 #include <linux/bpf_types.h> 50 #undef BPF_PROG_TYPE 51 #undef BPF_MAP_TYPE 52 }; 53 54 /* 55 * If we're handed a bigger struct than we know of, ensure all the unknown bits 56 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 57 * we don't know about yet. 58 * 59 * There is a ToCToU between this function call and the following 60 * copy_from_user() call. However, this is not a concern since this function is 61 * meant to be a future-proofing of bits. 62 */ 63 static int check_uarg_tail_zero(void __user *uaddr, 64 size_t expected_size, 65 size_t actual_size) 66 { 67 unsigned char __user *addr; 68 unsigned char __user *end; 69 unsigned char val; 70 int err; 71 72 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 73 return -E2BIG; 74 75 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 76 return -EFAULT; 77 78 if (actual_size <= expected_size) 79 return 0; 80 81 addr = uaddr + expected_size; 82 end = uaddr + actual_size; 83 84 for (; addr < end; addr++) { 85 err = get_user(val, addr); 86 if (err) 87 return err; 88 if (val) 89 return -E2BIG; 90 } 91 92 return 0; 93 } 94 95 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 96 { 97 struct bpf_map *map; 98 99 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 100 !bpf_map_types[attr->map_type]) 101 return ERR_PTR(-EINVAL); 102 103 map = bpf_map_types[attr->map_type]->map_alloc(attr); 104 if (IS_ERR(map)) 105 return map; 106 map->ops = bpf_map_types[attr->map_type]; 107 map->map_type = attr->map_type; 108 return map; 109 } 110 111 void *bpf_map_area_alloc(size_t size, int numa_node) 112 { 113 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 114 * trigger under memory pressure as we really just want to 115 * fail instead. 116 */ 117 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 118 void *area; 119 120 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 121 area = kmalloc_node(size, GFP_USER | flags, numa_node); 122 if (area != NULL) 123 return area; 124 } 125 126 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 127 __builtin_return_address(0)); 128 } 129 130 void bpf_map_area_free(void *area) 131 { 132 kvfree(area); 133 } 134 135 int bpf_map_precharge_memlock(u32 pages) 136 { 137 struct user_struct *user = get_current_user(); 138 unsigned long memlock_limit, cur; 139 140 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 141 cur = atomic_long_read(&user->locked_vm); 142 free_uid(user); 143 if (cur + pages > memlock_limit) 144 return -EPERM; 145 return 0; 146 } 147 148 static int bpf_map_charge_memlock(struct bpf_map *map) 149 { 150 struct user_struct *user = get_current_user(); 151 unsigned long memlock_limit; 152 153 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 154 155 atomic_long_add(map->pages, &user->locked_vm); 156 157 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 158 atomic_long_sub(map->pages, &user->locked_vm); 159 free_uid(user); 160 return -EPERM; 161 } 162 map->user = user; 163 return 0; 164 } 165 166 static void bpf_map_uncharge_memlock(struct bpf_map *map) 167 { 168 struct user_struct *user = map->user; 169 170 atomic_long_sub(map->pages, &user->locked_vm); 171 free_uid(user); 172 } 173 174 static int bpf_map_alloc_id(struct bpf_map *map) 175 { 176 int id; 177 178 spin_lock_bh(&map_idr_lock); 179 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 180 if (id > 0) 181 map->id = id; 182 spin_unlock_bh(&map_idr_lock); 183 184 if (WARN_ON_ONCE(!id)) 185 return -ENOSPC; 186 187 return id > 0 ? 0 : id; 188 } 189 190 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 191 { 192 unsigned long flags; 193 194 if (do_idr_lock) 195 spin_lock_irqsave(&map_idr_lock, flags); 196 else 197 __acquire(&map_idr_lock); 198 199 idr_remove(&map_idr, map->id); 200 201 if (do_idr_lock) 202 spin_unlock_irqrestore(&map_idr_lock, flags); 203 else 204 __release(&map_idr_lock); 205 } 206 207 /* called from workqueue */ 208 static void bpf_map_free_deferred(struct work_struct *work) 209 { 210 struct bpf_map *map = container_of(work, struct bpf_map, work); 211 212 bpf_map_uncharge_memlock(map); 213 /* implementation dependent freeing */ 214 map->ops->map_free(map); 215 } 216 217 static void bpf_map_put_uref(struct bpf_map *map) 218 { 219 if (atomic_dec_and_test(&map->usercnt)) { 220 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 221 bpf_fd_array_map_clear(map); 222 } 223 } 224 225 /* decrement map refcnt and schedule it for freeing via workqueue 226 * (unrelying map implementation ops->map_free() might sleep) 227 */ 228 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 229 { 230 if (atomic_dec_and_test(&map->refcnt)) { 231 /* bpf_map_free_id() must be called first */ 232 bpf_map_free_id(map, do_idr_lock); 233 INIT_WORK(&map->work, bpf_map_free_deferred); 234 schedule_work(&map->work); 235 } 236 } 237 238 void bpf_map_put(struct bpf_map *map) 239 { 240 __bpf_map_put(map, true); 241 } 242 243 void bpf_map_put_with_uref(struct bpf_map *map) 244 { 245 bpf_map_put_uref(map); 246 bpf_map_put(map); 247 } 248 249 static int bpf_map_release(struct inode *inode, struct file *filp) 250 { 251 struct bpf_map *map = filp->private_data; 252 253 if (map->ops->map_release) 254 map->ops->map_release(map, filp); 255 256 bpf_map_put_with_uref(map); 257 return 0; 258 } 259 260 #ifdef CONFIG_PROC_FS 261 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 262 { 263 const struct bpf_map *map = filp->private_data; 264 const struct bpf_array *array; 265 u32 owner_prog_type = 0; 266 u32 owner_jited = 0; 267 268 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 269 array = container_of(map, struct bpf_array, map); 270 owner_prog_type = array->owner_prog_type; 271 owner_jited = array->owner_jited; 272 } 273 274 seq_printf(m, 275 "map_type:\t%u\n" 276 "key_size:\t%u\n" 277 "value_size:\t%u\n" 278 "max_entries:\t%u\n" 279 "map_flags:\t%#x\n" 280 "memlock:\t%llu\n", 281 map->map_type, 282 map->key_size, 283 map->value_size, 284 map->max_entries, 285 map->map_flags, 286 map->pages * 1ULL << PAGE_SHIFT); 287 288 if (owner_prog_type) { 289 seq_printf(m, "owner_prog_type:\t%u\n", 290 owner_prog_type); 291 seq_printf(m, "owner_jited:\t%u\n", 292 owner_jited); 293 } 294 } 295 #endif 296 297 static const struct file_operations bpf_map_fops = { 298 #ifdef CONFIG_PROC_FS 299 .show_fdinfo = bpf_map_show_fdinfo, 300 #endif 301 .release = bpf_map_release, 302 }; 303 304 int bpf_map_new_fd(struct bpf_map *map) 305 { 306 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 307 O_RDWR | O_CLOEXEC); 308 } 309 310 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 311 #define CHECK_ATTR(CMD) \ 312 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 313 sizeof(attr->CMD##_LAST_FIELD), 0, \ 314 sizeof(*attr) - \ 315 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 316 sizeof(attr->CMD##_LAST_FIELD)) != NULL 317 318 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 319 * Return 0 on success and < 0 on error. 320 */ 321 static int bpf_obj_name_cpy(char *dst, const char *src) 322 { 323 const char *end = src + BPF_OBJ_NAME_LEN; 324 325 memset(dst, 0, BPF_OBJ_NAME_LEN); 326 327 /* Copy all isalnum() and '_' char */ 328 while (src < end && *src) { 329 if (!isalnum(*src) && *src != '_') 330 return -EINVAL; 331 *dst++ = *src++; 332 } 333 334 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 335 if (src == end) 336 return -EINVAL; 337 338 return 0; 339 } 340 341 #define BPF_MAP_CREATE_LAST_FIELD map_name 342 /* called via syscall */ 343 static int map_create(union bpf_attr *attr) 344 { 345 int numa_node = bpf_map_attr_numa_node(attr); 346 struct bpf_map *map; 347 int err; 348 349 err = CHECK_ATTR(BPF_MAP_CREATE); 350 if (err) 351 return -EINVAL; 352 353 if (numa_node != NUMA_NO_NODE && 354 ((unsigned int)numa_node >= nr_node_ids || 355 !node_online(numa_node))) 356 return -EINVAL; 357 358 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 359 map = find_and_alloc_map(attr); 360 if (IS_ERR(map)) 361 return PTR_ERR(map); 362 363 err = bpf_obj_name_cpy(map->name, attr->map_name); 364 if (err) 365 goto free_map_nouncharge; 366 367 atomic_set(&map->refcnt, 1); 368 atomic_set(&map->usercnt, 1); 369 370 err = bpf_map_charge_memlock(map); 371 if (err) 372 goto free_map_nouncharge; 373 374 err = bpf_map_alloc_id(map); 375 if (err) 376 goto free_map; 377 378 err = bpf_map_new_fd(map); 379 if (err < 0) { 380 /* failed to allocate fd. 381 * bpf_map_put() is needed because the above 382 * bpf_map_alloc_id() has published the map 383 * to the userspace and the userspace may 384 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 385 */ 386 bpf_map_put(map); 387 return err; 388 } 389 390 trace_bpf_map_create(map, err); 391 return err; 392 393 free_map: 394 bpf_map_uncharge_memlock(map); 395 free_map_nouncharge: 396 map->ops->map_free(map); 397 return err; 398 } 399 400 /* if error is returned, fd is released. 401 * On success caller should complete fd access with matching fdput() 402 */ 403 struct bpf_map *__bpf_map_get(struct fd f) 404 { 405 if (!f.file) 406 return ERR_PTR(-EBADF); 407 if (f.file->f_op != &bpf_map_fops) { 408 fdput(f); 409 return ERR_PTR(-EINVAL); 410 } 411 412 return f.file->private_data; 413 } 414 415 /* prog's and map's refcnt limit */ 416 #define BPF_MAX_REFCNT 32768 417 418 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 419 { 420 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 421 atomic_dec(&map->refcnt); 422 return ERR_PTR(-EBUSY); 423 } 424 if (uref) 425 atomic_inc(&map->usercnt); 426 return map; 427 } 428 429 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 430 { 431 struct fd f = fdget(ufd); 432 struct bpf_map *map; 433 434 map = __bpf_map_get(f); 435 if (IS_ERR(map)) 436 return map; 437 438 map = bpf_map_inc(map, true); 439 fdput(f); 440 441 return map; 442 } 443 444 /* map_idr_lock should have been held */ 445 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 446 bool uref) 447 { 448 int refold; 449 450 refold = __atomic_add_unless(&map->refcnt, 1, 0); 451 452 if (refold >= BPF_MAX_REFCNT) { 453 __bpf_map_put(map, false); 454 return ERR_PTR(-EBUSY); 455 } 456 457 if (!refold) 458 return ERR_PTR(-ENOENT); 459 460 if (uref) 461 atomic_inc(&map->usercnt); 462 463 return map; 464 } 465 466 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 467 { 468 return -ENOTSUPP; 469 } 470 471 /* last field in 'union bpf_attr' used by this command */ 472 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 473 474 static int map_lookup_elem(union bpf_attr *attr) 475 { 476 void __user *ukey = u64_to_user_ptr(attr->key); 477 void __user *uvalue = u64_to_user_ptr(attr->value); 478 int ufd = attr->map_fd; 479 struct bpf_map *map; 480 void *key, *value, *ptr; 481 u32 value_size; 482 struct fd f; 483 int err; 484 485 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 486 return -EINVAL; 487 488 f = fdget(ufd); 489 map = __bpf_map_get(f); 490 if (IS_ERR(map)) 491 return PTR_ERR(map); 492 493 key = memdup_user(ukey, map->key_size); 494 if (IS_ERR(key)) { 495 err = PTR_ERR(key); 496 goto err_put; 497 } 498 499 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 500 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 501 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 502 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 503 else if (IS_FD_MAP(map)) 504 value_size = sizeof(u32); 505 else 506 value_size = map->value_size; 507 508 err = -ENOMEM; 509 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 510 if (!value) 511 goto free_key; 512 513 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 514 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 515 err = bpf_percpu_hash_copy(map, key, value); 516 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 517 err = bpf_percpu_array_copy(map, key, value); 518 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 519 err = bpf_stackmap_copy(map, key, value); 520 } else if (IS_FD_ARRAY(map)) { 521 err = bpf_fd_array_map_lookup_elem(map, key, value); 522 } else if (IS_FD_HASH(map)) { 523 err = bpf_fd_htab_map_lookup_elem(map, key, value); 524 } else { 525 rcu_read_lock(); 526 ptr = map->ops->map_lookup_elem(map, key); 527 if (ptr) 528 memcpy(value, ptr, value_size); 529 rcu_read_unlock(); 530 err = ptr ? 0 : -ENOENT; 531 } 532 533 if (err) 534 goto free_value; 535 536 err = -EFAULT; 537 if (copy_to_user(uvalue, value, value_size) != 0) 538 goto free_value; 539 540 trace_bpf_map_lookup_elem(map, ufd, key, value); 541 err = 0; 542 543 free_value: 544 kfree(value); 545 free_key: 546 kfree(key); 547 err_put: 548 fdput(f); 549 return err; 550 } 551 552 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 553 554 static int map_update_elem(union bpf_attr *attr) 555 { 556 void __user *ukey = u64_to_user_ptr(attr->key); 557 void __user *uvalue = u64_to_user_ptr(attr->value); 558 int ufd = attr->map_fd; 559 struct bpf_map *map; 560 void *key, *value; 561 u32 value_size; 562 struct fd f; 563 int err; 564 565 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 566 return -EINVAL; 567 568 f = fdget(ufd); 569 map = __bpf_map_get(f); 570 if (IS_ERR(map)) 571 return PTR_ERR(map); 572 573 key = memdup_user(ukey, map->key_size); 574 if (IS_ERR(key)) { 575 err = PTR_ERR(key); 576 goto err_put; 577 } 578 579 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 580 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 581 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 582 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 583 else 584 value_size = map->value_size; 585 586 err = -ENOMEM; 587 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 588 if (!value) 589 goto free_key; 590 591 err = -EFAULT; 592 if (copy_from_user(value, uvalue, value_size) != 0) 593 goto free_value; 594 595 /* Need to create a kthread, thus must support schedule */ 596 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 597 err = map->ops->map_update_elem(map, key, value, attr->flags); 598 goto out; 599 } 600 601 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 602 * inside bpf map update or delete otherwise deadlocks are possible 603 */ 604 preempt_disable(); 605 __this_cpu_inc(bpf_prog_active); 606 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 607 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 608 err = bpf_percpu_hash_update(map, key, value, attr->flags); 609 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 610 err = bpf_percpu_array_update(map, key, value, attr->flags); 611 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 612 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 613 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 614 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 615 rcu_read_lock(); 616 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 617 attr->flags); 618 rcu_read_unlock(); 619 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 620 rcu_read_lock(); 621 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 622 attr->flags); 623 rcu_read_unlock(); 624 } else { 625 rcu_read_lock(); 626 err = map->ops->map_update_elem(map, key, value, attr->flags); 627 rcu_read_unlock(); 628 } 629 __this_cpu_dec(bpf_prog_active); 630 preempt_enable(); 631 out: 632 if (!err) 633 trace_bpf_map_update_elem(map, ufd, key, value); 634 free_value: 635 kfree(value); 636 free_key: 637 kfree(key); 638 err_put: 639 fdput(f); 640 return err; 641 } 642 643 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 644 645 static int map_delete_elem(union bpf_attr *attr) 646 { 647 void __user *ukey = u64_to_user_ptr(attr->key); 648 int ufd = attr->map_fd; 649 struct bpf_map *map; 650 struct fd f; 651 void *key; 652 int err; 653 654 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 655 return -EINVAL; 656 657 f = fdget(ufd); 658 map = __bpf_map_get(f); 659 if (IS_ERR(map)) 660 return PTR_ERR(map); 661 662 key = memdup_user(ukey, map->key_size); 663 if (IS_ERR(key)) { 664 err = PTR_ERR(key); 665 goto err_put; 666 } 667 668 preempt_disable(); 669 __this_cpu_inc(bpf_prog_active); 670 rcu_read_lock(); 671 err = map->ops->map_delete_elem(map, key); 672 rcu_read_unlock(); 673 __this_cpu_dec(bpf_prog_active); 674 preempt_enable(); 675 676 if (!err) 677 trace_bpf_map_delete_elem(map, ufd, key); 678 kfree(key); 679 err_put: 680 fdput(f); 681 return err; 682 } 683 684 /* last field in 'union bpf_attr' used by this command */ 685 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 686 687 static int map_get_next_key(union bpf_attr *attr) 688 { 689 void __user *ukey = u64_to_user_ptr(attr->key); 690 void __user *unext_key = u64_to_user_ptr(attr->next_key); 691 int ufd = attr->map_fd; 692 struct bpf_map *map; 693 void *key, *next_key; 694 struct fd f; 695 int err; 696 697 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 698 return -EINVAL; 699 700 f = fdget(ufd); 701 map = __bpf_map_get(f); 702 if (IS_ERR(map)) 703 return PTR_ERR(map); 704 705 if (ukey) { 706 key = memdup_user(ukey, map->key_size); 707 if (IS_ERR(key)) { 708 err = PTR_ERR(key); 709 goto err_put; 710 } 711 } else { 712 key = NULL; 713 } 714 715 err = -ENOMEM; 716 next_key = kmalloc(map->key_size, GFP_USER); 717 if (!next_key) 718 goto free_key; 719 720 rcu_read_lock(); 721 err = map->ops->map_get_next_key(map, key, next_key); 722 rcu_read_unlock(); 723 if (err) 724 goto free_next_key; 725 726 err = -EFAULT; 727 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 728 goto free_next_key; 729 730 trace_bpf_map_next_key(map, ufd, key, next_key); 731 err = 0; 732 733 free_next_key: 734 kfree(next_key); 735 free_key: 736 kfree(key); 737 err_put: 738 fdput(f); 739 return err; 740 } 741 742 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 743 #define BPF_PROG_TYPE(_id, _ops) \ 744 [_id] = &_ops, 745 #define BPF_MAP_TYPE(_id, _ops) 746 #include <linux/bpf_types.h> 747 #undef BPF_PROG_TYPE 748 #undef BPF_MAP_TYPE 749 }; 750 751 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 752 { 753 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 754 return -EINVAL; 755 756 prog->aux->ops = bpf_prog_types[type]; 757 prog->type = type; 758 return 0; 759 } 760 761 /* drop refcnt on maps used by eBPF program and free auxilary data */ 762 static void free_used_maps(struct bpf_prog_aux *aux) 763 { 764 int i; 765 766 for (i = 0; i < aux->used_map_cnt; i++) 767 bpf_map_put(aux->used_maps[i]); 768 769 kfree(aux->used_maps); 770 } 771 772 int __bpf_prog_charge(struct user_struct *user, u32 pages) 773 { 774 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 775 unsigned long user_bufs; 776 777 if (user) { 778 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 779 if (user_bufs > memlock_limit) { 780 atomic_long_sub(pages, &user->locked_vm); 781 return -EPERM; 782 } 783 } 784 785 return 0; 786 } 787 788 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 789 { 790 if (user) 791 atomic_long_sub(pages, &user->locked_vm); 792 } 793 794 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 795 { 796 struct user_struct *user = get_current_user(); 797 int ret; 798 799 ret = __bpf_prog_charge(user, prog->pages); 800 if (ret) { 801 free_uid(user); 802 return ret; 803 } 804 805 prog->aux->user = user; 806 return 0; 807 } 808 809 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 810 { 811 struct user_struct *user = prog->aux->user; 812 813 __bpf_prog_uncharge(user, prog->pages); 814 free_uid(user); 815 } 816 817 static int bpf_prog_alloc_id(struct bpf_prog *prog) 818 { 819 int id; 820 821 spin_lock_bh(&prog_idr_lock); 822 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 823 if (id > 0) 824 prog->aux->id = id; 825 spin_unlock_bh(&prog_idr_lock); 826 827 /* id is in [1, INT_MAX) */ 828 if (WARN_ON_ONCE(!id)) 829 return -ENOSPC; 830 831 return id > 0 ? 0 : id; 832 } 833 834 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 835 { 836 /* cBPF to eBPF migrations are currently not in the idr store. */ 837 if (!prog->aux->id) 838 return; 839 840 if (do_idr_lock) 841 spin_lock_bh(&prog_idr_lock); 842 else 843 __acquire(&prog_idr_lock); 844 845 idr_remove(&prog_idr, prog->aux->id); 846 847 if (do_idr_lock) 848 spin_unlock_bh(&prog_idr_lock); 849 else 850 __release(&prog_idr_lock); 851 } 852 853 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 854 { 855 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 856 857 free_used_maps(aux); 858 bpf_prog_uncharge_memlock(aux->prog); 859 bpf_prog_free(aux->prog); 860 } 861 862 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 863 { 864 if (atomic_dec_and_test(&prog->aux->refcnt)) { 865 trace_bpf_prog_put_rcu(prog); 866 /* bpf_prog_free_id() must be called first */ 867 bpf_prog_free_id(prog, do_idr_lock); 868 bpf_prog_kallsyms_del(prog); 869 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 870 } 871 } 872 873 void bpf_prog_put(struct bpf_prog *prog) 874 { 875 __bpf_prog_put(prog, true); 876 } 877 EXPORT_SYMBOL_GPL(bpf_prog_put); 878 879 static int bpf_prog_release(struct inode *inode, struct file *filp) 880 { 881 struct bpf_prog *prog = filp->private_data; 882 883 bpf_prog_put(prog); 884 return 0; 885 } 886 887 #ifdef CONFIG_PROC_FS 888 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 889 { 890 const struct bpf_prog *prog = filp->private_data; 891 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 892 893 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 894 seq_printf(m, 895 "prog_type:\t%u\n" 896 "prog_jited:\t%u\n" 897 "prog_tag:\t%s\n" 898 "memlock:\t%llu\n", 899 prog->type, 900 prog->jited, 901 prog_tag, 902 prog->pages * 1ULL << PAGE_SHIFT); 903 } 904 #endif 905 906 static const struct file_operations bpf_prog_fops = { 907 #ifdef CONFIG_PROC_FS 908 .show_fdinfo = bpf_prog_show_fdinfo, 909 #endif 910 .release = bpf_prog_release, 911 }; 912 913 int bpf_prog_new_fd(struct bpf_prog *prog) 914 { 915 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 916 O_RDWR | O_CLOEXEC); 917 } 918 919 static struct bpf_prog *____bpf_prog_get(struct fd f) 920 { 921 if (!f.file) 922 return ERR_PTR(-EBADF); 923 if (f.file->f_op != &bpf_prog_fops) { 924 fdput(f); 925 return ERR_PTR(-EINVAL); 926 } 927 928 return f.file->private_data; 929 } 930 931 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 932 { 933 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 934 atomic_sub(i, &prog->aux->refcnt); 935 return ERR_PTR(-EBUSY); 936 } 937 return prog; 938 } 939 EXPORT_SYMBOL_GPL(bpf_prog_add); 940 941 void bpf_prog_sub(struct bpf_prog *prog, int i) 942 { 943 /* Only to be used for undoing previous bpf_prog_add() in some 944 * error path. We still know that another entity in our call 945 * path holds a reference to the program, thus atomic_sub() can 946 * be safely used in such cases! 947 */ 948 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 949 } 950 EXPORT_SYMBOL_GPL(bpf_prog_sub); 951 952 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 953 { 954 return bpf_prog_add(prog, 1); 955 } 956 EXPORT_SYMBOL_GPL(bpf_prog_inc); 957 958 /* prog_idr_lock should have been held */ 959 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 960 { 961 int refold; 962 963 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 964 965 if (refold >= BPF_MAX_REFCNT) { 966 __bpf_prog_put(prog, false); 967 return ERR_PTR(-EBUSY); 968 } 969 970 if (!refold) 971 return ERR_PTR(-ENOENT); 972 973 return prog; 974 } 975 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 976 977 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 978 { 979 struct fd f = fdget(ufd); 980 struct bpf_prog *prog; 981 982 prog = ____bpf_prog_get(f); 983 if (IS_ERR(prog)) 984 return prog; 985 if (type && prog->type != *type) { 986 prog = ERR_PTR(-EINVAL); 987 goto out; 988 } 989 990 prog = bpf_prog_inc(prog); 991 out: 992 fdput(f); 993 return prog; 994 } 995 996 struct bpf_prog *bpf_prog_get(u32 ufd) 997 { 998 return __bpf_prog_get(ufd, NULL); 999 } 1000 1001 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 1002 { 1003 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 1004 1005 if (!IS_ERR(prog)) 1006 trace_bpf_prog_get_type(prog); 1007 return prog; 1008 } 1009 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 1010 1011 /* last field in 'union bpf_attr' used by this command */ 1012 #define BPF_PROG_LOAD_LAST_FIELD prog_name 1013 1014 static int bpf_prog_load(union bpf_attr *attr) 1015 { 1016 enum bpf_prog_type type = attr->prog_type; 1017 struct bpf_prog *prog; 1018 int err; 1019 char license[128]; 1020 bool is_gpl; 1021 1022 if (CHECK_ATTR(BPF_PROG_LOAD)) 1023 return -EINVAL; 1024 1025 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1026 return -EINVAL; 1027 1028 /* copy eBPF program license from user space */ 1029 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1030 sizeof(license) - 1) < 0) 1031 return -EFAULT; 1032 license[sizeof(license) - 1] = 0; 1033 1034 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1035 is_gpl = license_is_gpl_compatible(license); 1036 1037 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1038 return -E2BIG; 1039 1040 if (type == BPF_PROG_TYPE_KPROBE && 1041 attr->kern_version != LINUX_VERSION_CODE) 1042 return -EINVAL; 1043 1044 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1045 type != BPF_PROG_TYPE_CGROUP_SKB && 1046 !capable(CAP_SYS_ADMIN)) 1047 return -EPERM; 1048 1049 /* plain bpf_prog allocation */ 1050 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1051 if (!prog) 1052 return -ENOMEM; 1053 1054 err = bpf_prog_charge_memlock(prog); 1055 if (err) 1056 goto free_prog_nouncharge; 1057 1058 prog->len = attr->insn_cnt; 1059 1060 err = -EFAULT; 1061 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1062 bpf_prog_insn_size(prog)) != 0) 1063 goto free_prog; 1064 1065 prog->orig_prog = NULL; 1066 prog->jited = 0; 1067 1068 atomic_set(&prog->aux->refcnt, 1); 1069 prog->gpl_compatible = is_gpl ? 1 : 0; 1070 1071 /* find program type: socket_filter vs tracing_filter */ 1072 err = find_prog_type(type, prog); 1073 if (err < 0) 1074 goto free_prog; 1075 1076 prog->aux->load_time = ktime_get_boot_ns(); 1077 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1078 if (err) 1079 goto free_prog; 1080 1081 /* run eBPF verifier */ 1082 err = bpf_check(&prog, attr); 1083 if (err < 0) 1084 goto free_used_maps; 1085 1086 /* eBPF program is ready to be JITed */ 1087 prog = bpf_prog_select_runtime(prog, &err); 1088 if (err < 0) 1089 goto free_used_maps; 1090 1091 err = bpf_prog_alloc_id(prog); 1092 if (err) 1093 goto free_used_maps; 1094 1095 err = bpf_prog_new_fd(prog); 1096 if (err < 0) { 1097 /* failed to allocate fd. 1098 * bpf_prog_put() is needed because the above 1099 * bpf_prog_alloc_id() has published the prog 1100 * to the userspace and the userspace may 1101 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1102 */ 1103 bpf_prog_put(prog); 1104 return err; 1105 } 1106 1107 bpf_prog_kallsyms_add(prog); 1108 trace_bpf_prog_load(prog, err); 1109 return err; 1110 1111 free_used_maps: 1112 free_used_maps(prog->aux); 1113 free_prog: 1114 bpf_prog_uncharge_memlock(prog); 1115 free_prog_nouncharge: 1116 bpf_prog_free(prog); 1117 return err; 1118 } 1119 1120 #define BPF_OBJ_LAST_FIELD bpf_fd 1121 1122 static int bpf_obj_pin(const union bpf_attr *attr) 1123 { 1124 if (CHECK_ATTR(BPF_OBJ)) 1125 return -EINVAL; 1126 1127 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1128 } 1129 1130 static int bpf_obj_get(const union bpf_attr *attr) 1131 { 1132 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 1133 return -EINVAL; 1134 1135 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 1136 } 1137 1138 #ifdef CONFIG_CGROUP_BPF 1139 1140 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1141 1142 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1143 { 1144 struct bpf_prog *prog = NULL; 1145 int ufd = attr->target_fd; 1146 struct bpf_map *map; 1147 struct fd f; 1148 int err; 1149 1150 f = fdget(ufd); 1151 map = __bpf_map_get(f); 1152 if (IS_ERR(map)) 1153 return PTR_ERR(map); 1154 1155 if (attach) { 1156 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1157 BPF_PROG_TYPE_SK_SKB); 1158 if (IS_ERR(prog)) { 1159 fdput(f); 1160 return PTR_ERR(prog); 1161 } 1162 } 1163 1164 err = sock_map_prog(map, prog, attr->attach_type); 1165 if (err) { 1166 fdput(f); 1167 if (prog) 1168 bpf_prog_put(prog); 1169 return err; 1170 } 1171 1172 fdput(f); 1173 return 0; 1174 } 1175 1176 #define BPF_F_ATTACH_MASK \ 1177 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1178 1179 static int bpf_prog_attach(const union bpf_attr *attr) 1180 { 1181 enum bpf_prog_type ptype; 1182 struct bpf_prog *prog; 1183 struct cgroup *cgrp; 1184 int ret; 1185 1186 if (!capable(CAP_NET_ADMIN)) 1187 return -EPERM; 1188 1189 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1190 return -EINVAL; 1191 1192 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1193 return -EINVAL; 1194 1195 switch (attr->attach_type) { 1196 case BPF_CGROUP_INET_INGRESS: 1197 case BPF_CGROUP_INET_EGRESS: 1198 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1199 break; 1200 case BPF_CGROUP_INET_SOCK_CREATE: 1201 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1202 break; 1203 case BPF_CGROUP_SOCK_OPS: 1204 ptype = BPF_PROG_TYPE_SOCK_OPS; 1205 break; 1206 case BPF_SK_SKB_STREAM_PARSER: 1207 case BPF_SK_SKB_STREAM_VERDICT: 1208 return sockmap_get_from_fd(attr, true); 1209 default: 1210 return -EINVAL; 1211 } 1212 1213 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1214 if (IS_ERR(prog)) 1215 return PTR_ERR(prog); 1216 1217 cgrp = cgroup_get_from_fd(attr->target_fd); 1218 if (IS_ERR(cgrp)) { 1219 bpf_prog_put(prog); 1220 return PTR_ERR(cgrp); 1221 } 1222 1223 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1224 attr->attach_flags); 1225 if (ret) 1226 bpf_prog_put(prog); 1227 cgroup_put(cgrp); 1228 1229 return ret; 1230 } 1231 1232 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1233 1234 static int bpf_prog_detach(const union bpf_attr *attr) 1235 { 1236 enum bpf_prog_type ptype; 1237 struct bpf_prog *prog; 1238 struct cgroup *cgrp; 1239 int ret; 1240 1241 if (!capable(CAP_NET_ADMIN)) 1242 return -EPERM; 1243 1244 if (CHECK_ATTR(BPF_PROG_DETACH)) 1245 return -EINVAL; 1246 1247 switch (attr->attach_type) { 1248 case BPF_CGROUP_INET_INGRESS: 1249 case BPF_CGROUP_INET_EGRESS: 1250 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1251 break; 1252 case BPF_CGROUP_INET_SOCK_CREATE: 1253 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1254 break; 1255 case BPF_CGROUP_SOCK_OPS: 1256 ptype = BPF_PROG_TYPE_SOCK_OPS; 1257 break; 1258 case BPF_SK_SKB_STREAM_PARSER: 1259 case BPF_SK_SKB_STREAM_VERDICT: 1260 return sockmap_get_from_fd(attr, false); 1261 default: 1262 return -EINVAL; 1263 } 1264 1265 cgrp = cgroup_get_from_fd(attr->target_fd); 1266 if (IS_ERR(cgrp)) 1267 return PTR_ERR(cgrp); 1268 1269 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1270 if (IS_ERR(prog)) 1271 prog = NULL; 1272 1273 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1274 if (prog) 1275 bpf_prog_put(prog); 1276 cgroup_put(cgrp); 1277 return ret; 1278 } 1279 1280 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1281 1282 static int bpf_prog_query(const union bpf_attr *attr, 1283 union bpf_attr __user *uattr) 1284 { 1285 struct cgroup *cgrp; 1286 int ret; 1287 1288 if (!capable(CAP_NET_ADMIN)) 1289 return -EPERM; 1290 if (CHECK_ATTR(BPF_PROG_QUERY)) 1291 return -EINVAL; 1292 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1293 return -EINVAL; 1294 1295 switch (attr->query.attach_type) { 1296 case BPF_CGROUP_INET_INGRESS: 1297 case BPF_CGROUP_INET_EGRESS: 1298 case BPF_CGROUP_INET_SOCK_CREATE: 1299 case BPF_CGROUP_SOCK_OPS: 1300 break; 1301 default: 1302 return -EINVAL; 1303 } 1304 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1305 if (IS_ERR(cgrp)) 1306 return PTR_ERR(cgrp); 1307 ret = cgroup_bpf_query(cgrp, attr, uattr); 1308 cgroup_put(cgrp); 1309 return ret; 1310 } 1311 #endif /* CONFIG_CGROUP_BPF */ 1312 1313 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1314 1315 static int bpf_prog_test_run(const union bpf_attr *attr, 1316 union bpf_attr __user *uattr) 1317 { 1318 struct bpf_prog *prog; 1319 int ret = -ENOTSUPP; 1320 1321 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1322 return -EINVAL; 1323 1324 prog = bpf_prog_get(attr->test.prog_fd); 1325 if (IS_ERR(prog)) 1326 return PTR_ERR(prog); 1327 1328 if (prog->aux->ops->test_run) 1329 ret = prog->aux->ops->test_run(prog, attr, uattr); 1330 1331 bpf_prog_put(prog); 1332 return ret; 1333 } 1334 1335 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1336 1337 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1338 union bpf_attr __user *uattr, 1339 struct idr *idr, 1340 spinlock_t *lock) 1341 { 1342 u32 next_id = attr->start_id; 1343 int err = 0; 1344 1345 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1346 return -EINVAL; 1347 1348 if (!capable(CAP_SYS_ADMIN)) 1349 return -EPERM; 1350 1351 next_id++; 1352 spin_lock_bh(lock); 1353 if (!idr_get_next(idr, &next_id)) 1354 err = -ENOENT; 1355 spin_unlock_bh(lock); 1356 1357 if (!err) 1358 err = put_user(next_id, &uattr->next_id); 1359 1360 return err; 1361 } 1362 1363 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1364 1365 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1366 { 1367 struct bpf_prog *prog; 1368 u32 id = attr->prog_id; 1369 int fd; 1370 1371 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1372 return -EINVAL; 1373 1374 if (!capable(CAP_SYS_ADMIN)) 1375 return -EPERM; 1376 1377 spin_lock_bh(&prog_idr_lock); 1378 prog = idr_find(&prog_idr, id); 1379 if (prog) 1380 prog = bpf_prog_inc_not_zero(prog); 1381 else 1382 prog = ERR_PTR(-ENOENT); 1383 spin_unlock_bh(&prog_idr_lock); 1384 1385 if (IS_ERR(prog)) 1386 return PTR_ERR(prog); 1387 1388 fd = bpf_prog_new_fd(prog); 1389 if (fd < 0) 1390 bpf_prog_put(prog); 1391 1392 return fd; 1393 } 1394 1395 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id 1396 1397 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1398 { 1399 struct bpf_map *map; 1400 u32 id = attr->map_id; 1401 int fd; 1402 1403 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) 1404 return -EINVAL; 1405 1406 if (!capable(CAP_SYS_ADMIN)) 1407 return -EPERM; 1408 1409 spin_lock_bh(&map_idr_lock); 1410 map = idr_find(&map_idr, id); 1411 if (map) 1412 map = bpf_map_inc_not_zero(map, true); 1413 else 1414 map = ERR_PTR(-ENOENT); 1415 spin_unlock_bh(&map_idr_lock); 1416 1417 if (IS_ERR(map)) 1418 return PTR_ERR(map); 1419 1420 fd = bpf_map_new_fd(map); 1421 if (fd < 0) 1422 bpf_map_put(map); 1423 1424 return fd; 1425 } 1426 1427 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1428 const union bpf_attr *attr, 1429 union bpf_attr __user *uattr) 1430 { 1431 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1432 struct bpf_prog_info info = {}; 1433 u32 info_len = attr->info.info_len; 1434 char __user *uinsns; 1435 u32 ulen; 1436 int err; 1437 1438 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1439 if (err) 1440 return err; 1441 info_len = min_t(u32, sizeof(info), info_len); 1442 1443 if (copy_from_user(&info, uinfo, info_len)) 1444 return -EFAULT; 1445 1446 info.type = prog->type; 1447 info.id = prog->aux->id; 1448 info.load_time = prog->aux->load_time; 1449 info.created_by_uid = from_kuid_munged(current_user_ns(), 1450 prog->aux->user->uid); 1451 1452 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1453 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1454 1455 ulen = info.nr_map_ids; 1456 info.nr_map_ids = prog->aux->used_map_cnt; 1457 ulen = min_t(u32, info.nr_map_ids, ulen); 1458 if (ulen) { 1459 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1460 u32 i; 1461 1462 for (i = 0; i < ulen; i++) 1463 if (put_user(prog->aux->used_maps[i]->id, 1464 &user_map_ids[i])) 1465 return -EFAULT; 1466 } 1467 1468 if (!capable(CAP_SYS_ADMIN)) { 1469 info.jited_prog_len = 0; 1470 info.xlated_prog_len = 0; 1471 goto done; 1472 } 1473 1474 ulen = info.jited_prog_len; 1475 info.jited_prog_len = prog->jited_len; 1476 if (info.jited_prog_len && ulen) { 1477 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1478 ulen = min_t(u32, info.jited_prog_len, ulen); 1479 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1480 return -EFAULT; 1481 } 1482 1483 ulen = info.xlated_prog_len; 1484 info.xlated_prog_len = bpf_prog_insn_size(prog); 1485 if (info.xlated_prog_len && ulen) { 1486 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1487 ulen = min_t(u32, info.xlated_prog_len, ulen); 1488 if (copy_to_user(uinsns, prog->insnsi, ulen)) 1489 return -EFAULT; 1490 } 1491 1492 done: 1493 if (copy_to_user(uinfo, &info, info_len) || 1494 put_user(info_len, &uattr->info.info_len)) 1495 return -EFAULT; 1496 1497 return 0; 1498 } 1499 1500 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1501 const union bpf_attr *attr, 1502 union bpf_attr __user *uattr) 1503 { 1504 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1505 struct bpf_map_info info = {}; 1506 u32 info_len = attr->info.info_len; 1507 int err; 1508 1509 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1510 if (err) 1511 return err; 1512 info_len = min_t(u32, sizeof(info), info_len); 1513 1514 info.type = map->map_type; 1515 info.id = map->id; 1516 info.key_size = map->key_size; 1517 info.value_size = map->value_size; 1518 info.max_entries = map->max_entries; 1519 info.map_flags = map->map_flags; 1520 memcpy(info.name, map->name, sizeof(map->name)); 1521 1522 if (copy_to_user(uinfo, &info, info_len) || 1523 put_user(info_len, &uattr->info.info_len)) 1524 return -EFAULT; 1525 1526 return 0; 1527 } 1528 1529 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1530 1531 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1532 union bpf_attr __user *uattr) 1533 { 1534 int ufd = attr->info.bpf_fd; 1535 struct fd f; 1536 int err; 1537 1538 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1539 return -EINVAL; 1540 1541 f = fdget(ufd); 1542 if (!f.file) 1543 return -EBADFD; 1544 1545 if (f.file->f_op == &bpf_prog_fops) 1546 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1547 uattr); 1548 else if (f.file->f_op == &bpf_map_fops) 1549 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1550 uattr); 1551 else 1552 err = -EINVAL; 1553 1554 fdput(f); 1555 return err; 1556 } 1557 1558 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1559 { 1560 union bpf_attr attr = {}; 1561 int err; 1562 1563 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1564 return -EPERM; 1565 1566 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1567 if (err) 1568 return err; 1569 size = min_t(u32, size, sizeof(attr)); 1570 1571 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1572 if (copy_from_user(&attr, uattr, size) != 0) 1573 return -EFAULT; 1574 1575 switch (cmd) { 1576 case BPF_MAP_CREATE: 1577 err = map_create(&attr); 1578 break; 1579 case BPF_MAP_LOOKUP_ELEM: 1580 err = map_lookup_elem(&attr); 1581 break; 1582 case BPF_MAP_UPDATE_ELEM: 1583 err = map_update_elem(&attr); 1584 break; 1585 case BPF_MAP_DELETE_ELEM: 1586 err = map_delete_elem(&attr); 1587 break; 1588 case BPF_MAP_GET_NEXT_KEY: 1589 err = map_get_next_key(&attr); 1590 break; 1591 case BPF_PROG_LOAD: 1592 err = bpf_prog_load(&attr); 1593 break; 1594 case BPF_OBJ_PIN: 1595 err = bpf_obj_pin(&attr); 1596 break; 1597 case BPF_OBJ_GET: 1598 err = bpf_obj_get(&attr); 1599 break; 1600 #ifdef CONFIG_CGROUP_BPF 1601 case BPF_PROG_ATTACH: 1602 err = bpf_prog_attach(&attr); 1603 break; 1604 case BPF_PROG_DETACH: 1605 err = bpf_prog_detach(&attr); 1606 break; 1607 case BPF_PROG_QUERY: 1608 err = bpf_prog_query(&attr, uattr); 1609 break; 1610 #endif 1611 case BPF_PROG_TEST_RUN: 1612 err = bpf_prog_test_run(&attr, uattr); 1613 break; 1614 case BPF_PROG_GET_NEXT_ID: 1615 err = bpf_obj_get_next_id(&attr, uattr, 1616 &prog_idr, &prog_idr_lock); 1617 break; 1618 case BPF_MAP_GET_NEXT_ID: 1619 err = bpf_obj_get_next_id(&attr, uattr, 1620 &map_idr, &map_idr_lock); 1621 break; 1622 case BPF_PROG_GET_FD_BY_ID: 1623 err = bpf_prog_get_fd_by_id(&attr); 1624 break; 1625 case BPF_MAP_GET_FD_BY_ID: 1626 err = bpf_map_get_fd_by_id(&attr); 1627 break; 1628 case BPF_OBJ_GET_INFO_BY_FD: 1629 err = bpf_obj_get_info_by_fd(&attr, uattr); 1630 break; 1631 default: 1632 err = -EINVAL; 1633 break; 1634 } 1635 1636 return err; 1637 } 1638