1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 DEFINE_PER_CPU(int, bpf_prog_active); 38 static DEFINE_IDR(prog_idr); 39 static DEFINE_SPINLOCK(prog_idr_lock); 40 static DEFINE_IDR(map_idr); 41 static DEFINE_SPINLOCK(map_idr_lock); 42 43 int sysctl_unprivileged_bpf_disabled __read_mostly; 44 45 static const struct bpf_map_ops * const bpf_map_types[] = { 46 #define BPF_PROG_TYPE(_id, _ops) 47 #define BPF_MAP_TYPE(_id, _ops) \ 48 [_id] = &_ops, 49 #include <linux/bpf_types.h> 50 #undef BPF_PROG_TYPE 51 #undef BPF_MAP_TYPE 52 }; 53 54 /* 55 * If we're handed a bigger struct than we know of, ensure all the unknown bits 56 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 57 * we don't know about yet. 58 * 59 * There is a ToCToU between this function call and the following 60 * copy_from_user() call. However, this is not a concern since this function is 61 * meant to be a future-proofing of bits. 62 */ 63 static int check_uarg_tail_zero(void __user *uaddr, 64 size_t expected_size, 65 size_t actual_size) 66 { 67 unsigned char __user *addr; 68 unsigned char __user *end; 69 unsigned char val; 70 int err; 71 72 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 73 return -E2BIG; 74 75 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 76 return -EFAULT; 77 78 if (actual_size <= expected_size) 79 return 0; 80 81 addr = uaddr + expected_size; 82 end = uaddr + actual_size; 83 84 for (; addr < end; addr++) { 85 err = get_user(val, addr); 86 if (err) 87 return err; 88 if (val) 89 return -E2BIG; 90 } 91 92 return 0; 93 } 94 95 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 96 { 97 struct bpf_map *map; 98 99 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 100 !bpf_map_types[attr->map_type]) 101 return ERR_PTR(-EINVAL); 102 103 map = bpf_map_types[attr->map_type]->map_alloc(attr); 104 if (IS_ERR(map)) 105 return map; 106 map->ops = bpf_map_types[attr->map_type]; 107 map->map_type = attr->map_type; 108 return map; 109 } 110 111 void *bpf_map_area_alloc(size_t size, int numa_node) 112 { 113 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 114 * trigger under memory pressure as we really just want to 115 * fail instead. 116 */ 117 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 118 void *area; 119 120 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 121 area = kmalloc_node(size, GFP_USER | flags, numa_node); 122 if (area != NULL) 123 return area; 124 } 125 126 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 127 __builtin_return_address(0)); 128 } 129 130 void bpf_map_area_free(void *area) 131 { 132 kvfree(area); 133 } 134 135 int bpf_map_precharge_memlock(u32 pages) 136 { 137 struct user_struct *user = get_current_user(); 138 unsigned long memlock_limit, cur; 139 140 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 141 cur = atomic_long_read(&user->locked_vm); 142 free_uid(user); 143 if (cur + pages > memlock_limit) 144 return -EPERM; 145 return 0; 146 } 147 148 static int bpf_map_charge_memlock(struct bpf_map *map) 149 { 150 struct user_struct *user = get_current_user(); 151 unsigned long memlock_limit; 152 153 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 154 155 atomic_long_add(map->pages, &user->locked_vm); 156 157 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 158 atomic_long_sub(map->pages, &user->locked_vm); 159 free_uid(user); 160 return -EPERM; 161 } 162 map->user = user; 163 return 0; 164 } 165 166 static void bpf_map_uncharge_memlock(struct bpf_map *map) 167 { 168 struct user_struct *user = map->user; 169 170 atomic_long_sub(map->pages, &user->locked_vm); 171 free_uid(user); 172 } 173 174 static int bpf_map_alloc_id(struct bpf_map *map) 175 { 176 int id; 177 178 spin_lock_bh(&map_idr_lock); 179 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 180 if (id > 0) 181 map->id = id; 182 spin_unlock_bh(&map_idr_lock); 183 184 if (WARN_ON_ONCE(!id)) 185 return -ENOSPC; 186 187 return id > 0 ? 0 : id; 188 } 189 190 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 191 { 192 unsigned long flags; 193 194 if (do_idr_lock) 195 spin_lock_irqsave(&map_idr_lock, flags); 196 else 197 __acquire(&map_idr_lock); 198 199 idr_remove(&map_idr, map->id); 200 201 if (do_idr_lock) 202 spin_unlock_irqrestore(&map_idr_lock, flags); 203 else 204 __release(&map_idr_lock); 205 } 206 207 /* called from workqueue */ 208 static void bpf_map_free_deferred(struct work_struct *work) 209 { 210 struct bpf_map *map = container_of(work, struct bpf_map, work); 211 212 bpf_map_uncharge_memlock(map); 213 /* implementation dependent freeing */ 214 map->ops->map_free(map); 215 } 216 217 static void bpf_map_put_uref(struct bpf_map *map) 218 { 219 if (atomic_dec_and_test(&map->usercnt)) { 220 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 221 bpf_fd_array_map_clear(map); 222 } 223 } 224 225 /* decrement map refcnt and schedule it for freeing via workqueue 226 * (unrelying map implementation ops->map_free() might sleep) 227 */ 228 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 229 { 230 if (atomic_dec_and_test(&map->refcnt)) { 231 /* bpf_map_free_id() must be called first */ 232 bpf_map_free_id(map, do_idr_lock); 233 INIT_WORK(&map->work, bpf_map_free_deferred); 234 schedule_work(&map->work); 235 } 236 } 237 238 void bpf_map_put(struct bpf_map *map) 239 { 240 __bpf_map_put(map, true); 241 } 242 243 void bpf_map_put_with_uref(struct bpf_map *map) 244 { 245 bpf_map_put_uref(map); 246 bpf_map_put(map); 247 } 248 249 static int bpf_map_release(struct inode *inode, struct file *filp) 250 { 251 struct bpf_map *map = filp->private_data; 252 253 if (map->ops->map_release) 254 map->ops->map_release(map, filp); 255 256 bpf_map_put_with_uref(map); 257 return 0; 258 } 259 260 #ifdef CONFIG_PROC_FS 261 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 262 { 263 const struct bpf_map *map = filp->private_data; 264 const struct bpf_array *array; 265 u32 owner_prog_type = 0; 266 u32 owner_jited = 0; 267 268 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 269 array = container_of(map, struct bpf_array, map); 270 owner_prog_type = array->owner_prog_type; 271 owner_jited = array->owner_jited; 272 } 273 274 seq_printf(m, 275 "map_type:\t%u\n" 276 "key_size:\t%u\n" 277 "value_size:\t%u\n" 278 "max_entries:\t%u\n" 279 "map_flags:\t%#x\n" 280 "memlock:\t%llu\n", 281 map->map_type, 282 map->key_size, 283 map->value_size, 284 map->max_entries, 285 map->map_flags, 286 map->pages * 1ULL << PAGE_SHIFT); 287 288 if (owner_prog_type) { 289 seq_printf(m, "owner_prog_type:\t%u\n", 290 owner_prog_type); 291 seq_printf(m, "owner_jited:\t%u\n", 292 owner_jited); 293 } 294 } 295 #endif 296 297 static const struct file_operations bpf_map_fops = { 298 #ifdef CONFIG_PROC_FS 299 .show_fdinfo = bpf_map_show_fdinfo, 300 #endif 301 .release = bpf_map_release, 302 }; 303 304 int bpf_map_new_fd(struct bpf_map *map) 305 { 306 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 307 O_RDWR | O_CLOEXEC); 308 } 309 310 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 311 #define CHECK_ATTR(CMD) \ 312 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 313 sizeof(attr->CMD##_LAST_FIELD), 0, \ 314 sizeof(*attr) - \ 315 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 316 sizeof(attr->CMD##_LAST_FIELD)) != NULL 317 318 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 319 * Return 0 on success and < 0 on error. 320 */ 321 static int bpf_obj_name_cpy(char *dst, const char *src) 322 { 323 const char *end = src + BPF_OBJ_NAME_LEN; 324 325 /* Copy all isalnum() and '_' char */ 326 while (src < end && *src) { 327 if (!isalnum(*src) && *src != '_') 328 return -EINVAL; 329 *dst++ = *src++; 330 } 331 332 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 333 if (src == end) 334 return -EINVAL; 335 336 /* '\0' terminates dst */ 337 *dst = 0; 338 339 return 0; 340 } 341 342 #define BPF_MAP_CREATE_LAST_FIELD map_name 343 /* called via syscall */ 344 static int map_create(union bpf_attr *attr) 345 { 346 int numa_node = bpf_map_attr_numa_node(attr); 347 struct bpf_map *map; 348 int err; 349 350 err = CHECK_ATTR(BPF_MAP_CREATE); 351 if (err) 352 return -EINVAL; 353 354 if (numa_node != NUMA_NO_NODE && 355 ((unsigned int)numa_node >= nr_node_ids || 356 !node_online(numa_node))) 357 return -EINVAL; 358 359 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 360 map = find_and_alloc_map(attr); 361 if (IS_ERR(map)) 362 return PTR_ERR(map); 363 364 err = bpf_obj_name_cpy(map->name, attr->map_name); 365 if (err) 366 goto free_map_nouncharge; 367 368 atomic_set(&map->refcnt, 1); 369 atomic_set(&map->usercnt, 1); 370 371 err = bpf_map_charge_memlock(map); 372 if (err) 373 goto free_map_nouncharge; 374 375 err = bpf_map_alloc_id(map); 376 if (err) 377 goto free_map; 378 379 err = bpf_map_new_fd(map); 380 if (err < 0) { 381 /* failed to allocate fd. 382 * bpf_map_put() is needed because the above 383 * bpf_map_alloc_id() has published the map 384 * to the userspace and the userspace may 385 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 386 */ 387 bpf_map_put(map); 388 return err; 389 } 390 391 trace_bpf_map_create(map, err); 392 return err; 393 394 free_map: 395 bpf_map_uncharge_memlock(map); 396 free_map_nouncharge: 397 map->ops->map_free(map); 398 return err; 399 } 400 401 /* if error is returned, fd is released. 402 * On success caller should complete fd access with matching fdput() 403 */ 404 struct bpf_map *__bpf_map_get(struct fd f) 405 { 406 if (!f.file) 407 return ERR_PTR(-EBADF); 408 if (f.file->f_op != &bpf_map_fops) { 409 fdput(f); 410 return ERR_PTR(-EINVAL); 411 } 412 413 return f.file->private_data; 414 } 415 416 /* prog's and map's refcnt limit */ 417 #define BPF_MAX_REFCNT 32768 418 419 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 420 { 421 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 422 atomic_dec(&map->refcnt); 423 return ERR_PTR(-EBUSY); 424 } 425 if (uref) 426 atomic_inc(&map->usercnt); 427 return map; 428 } 429 430 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 431 { 432 struct fd f = fdget(ufd); 433 struct bpf_map *map; 434 435 map = __bpf_map_get(f); 436 if (IS_ERR(map)) 437 return map; 438 439 map = bpf_map_inc(map, true); 440 fdput(f); 441 442 return map; 443 } 444 445 /* map_idr_lock should have been held */ 446 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 447 bool uref) 448 { 449 int refold; 450 451 refold = __atomic_add_unless(&map->refcnt, 1, 0); 452 453 if (refold >= BPF_MAX_REFCNT) { 454 __bpf_map_put(map, false); 455 return ERR_PTR(-EBUSY); 456 } 457 458 if (!refold) 459 return ERR_PTR(-ENOENT); 460 461 if (uref) 462 atomic_inc(&map->usercnt); 463 464 return map; 465 } 466 467 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 468 { 469 return -ENOTSUPP; 470 } 471 472 /* last field in 'union bpf_attr' used by this command */ 473 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 474 475 static int map_lookup_elem(union bpf_attr *attr) 476 { 477 void __user *ukey = u64_to_user_ptr(attr->key); 478 void __user *uvalue = u64_to_user_ptr(attr->value); 479 int ufd = attr->map_fd; 480 struct bpf_map *map; 481 void *key, *value, *ptr; 482 u32 value_size; 483 struct fd f; 484 int err; 485 486 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 487 return -EINVAL; 488 489 f = fdget(ufd); 490 map = __bpf_map_get(f); 491 if (IS_ERR(map)) 492 return PTR_ERR(map); 493 494 key = memdup_user(ukey, map->key_size); 495 if (IS_ERR(key)) { 496 err = PTR_ERR(key); 497 goto err_put; 498 } 499 500 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 501 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 502 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 503 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 504 else if (IS_FD_MAP(map)) 505 value_size = sizeof(u32); 506 else 507 value_size = map->value_size; 508 509 err = -ENOMEM; 510 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 511 if (!value) 512 goto free_key; 513 514 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 515 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 516 err = bpf_percpu_hash_copy(map, key, value); 517 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 518 err = bpf_percpu_array_copy(map, key, value); 519 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 520 err = bpf_stackmap_copy(map, key, value); 521 } else if (IS_FD_ARRAY(map)) { 522 err = bpf_fd_array_map_lookup_elem(map, key, value); 523 } else if (IS_FD_HASH(map)) { 524 err = bpf_fd_htab_map_lookup_elem(map, key, value); 525 } else { 526 rcu_read_lock(); 527 ptr = map->ops->map_lookup_elem(map, key); 528 if (ptr) 529 memcpy(value, ptr, value_size); 530 rcu_read_unlock(); 531 err = ptr ? 0 : -ENOENT; 532 } 533 534 if (err) 535 goto free_value; 536 537 err = -EFAULT; 538 if (copy_to_user(uvalue, value, value_size) != 0) 539 goto free_value; 540 541 trace_bpf_map_lookup_elem(map, ufd, key, value); 542 err = 0; 543 544 free_value: 545 kfree(value); 546 free_key: 547 kfree(key); 548 err_put: 549 fdput(f); 550 return err; 551 } 552 553 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 554 555 static int map_update_elem(union bpf_attr *attr) 556 { 557 void __user *ukey = u64_to_user_ptr(attr->key); 558 void __user *uvalue = u64_to_user_ptr(attr->value); 559 int ufd = attr->map_fd; 560 struct bpf_map *map; 561 void *key, *value; 562 u32 value_size; 563 struct fd f; 564 int err; 565 566 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 567 return -EINVAL; 568 569 f = fdget(ufd); 570 map = __bpf_map_get(f); 571 if (IS_ERR(map)) 572 return PTR_ERR(map); 573 574 key = memdup_user(ukey, map->key_size); 575 if (IS_ERR(key)) { 576 err = PTR_ERR(key); 577 goto err_put; 578 } 579 580 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 581 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 582 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 583 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 584 else 585 value_size = map->value_size; 586 587 err = -ENOMEM; 588 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 589 if (!value) 590 goto free_key; 591 592 err = -EFAULT; 593 if (copy_from_user(value, uvalue, value_size) != 0) 594 goto free_value; 595 596 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 597 * inside bpf map update or delete otherwise deadlocks are possible 598 */ 599 preempt_disable(); 600 __this_cpu_inc(bpf_prog_active); 601 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 602 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 603 err = bpf_percpu_hash_update(map, key, value, attr->flags); 604 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 605 err = bpf_percpu_array_update(map, key, value, attr->flags); 606 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 607 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 608 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 609 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 610 rcu_read_lock(); 611 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 612 attr->flags); 613 rcu_read_unlock(); 614 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 615 rcu_read_lock(); 616 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 617 attr->flags); 618 rcu_read_unlock(); 619 } else { 620 rcu_read_lock(); 621 err = map->ops->map_update_elem(map, key, value, attr->flags); 622 rcu_read_unlock(); 623 } 624 __this_cpu_dec(bpf_prog_active); 625 preempt_enable(); 626 627 if (!err) 628 trace_bpf_map_update_elem(map, ufd, key, value); 629 free_value: 630 kfree(value); 631 free_key: 632 kfree(key); 633 err_put: 634 fdput(f); 635 return err; 636 } 637 638 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 639 640 static int map_delete_elem(union bpf_attr *attr) 641 { 642 void __user *ukey = u64_to_user_ptr(attr->key); 643 int ufd = attr->map_fd; 644 struct bpf_map *map; 645 struct fd f; 646 void *key; 647 int err; 648 649 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 650 return -EINVAL; 651 652 f = fdget(ufd); 653 map = __bpf_map_get(f); 654 if (IS_ERR(map)) 655 return PTR_ERR(map); 656 657 key = memdup_user(ukey, map->key_size); 658 if (IS_ERR(key)) { 659 err = PTR_ERR(key); 660 goto err_put; 661 } 662 663 preempt_disable(); 664 __this_cpu_inc(bpf_prog_active); 665 rcu_read_lock(); 666 err = map->ops->map_delete_elem(map, key); 667 rcu_read_unlock(); 668 __this_cpu_dec(bpf_prog_active); 669 preempt_enable(); 670 671 if (!err) 672 trace_bpf_map_delete_elem(map, ufd, key); 673 kfree(key); 674 err_put: 675 fdput(f); 676 return err; 677 } 678 679 /* last field in 'union bpf_attr' used by this command */ 680 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 681 682 static int map_get_next_key(union bpf_attr *attr) 683 { 684 void __user *ukey = u64_to_user_ptr(attr->key); 685 void __user *unext_key = u64_to_user_ptr(attr->next_key); 686 int ufd = attr->map_fd; 687 struct bpf_map *map; 688 void *key, *next_key; 689 struct fd f; 690 int err; 691 692 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 693 return -EINVAL; 694 695 f = fdget(ufd); 696 map = __bpf_map_get(f); 697 if (IS_ERR(map)) 698 return PTR_ERR(map); 699 700 if (ukey) { 701 key = memdup_user(ukey, map->key_size); 702 if (IS_ERR(key)) { 703 err = PTR_ERR(key); 704 goto err_put; 705 } 706 } else { 707 key = NULL; 708 } 709 710 err = -ENOMEM; 711 next_key = kmalloc(map->key_size, GFP_USER); 712 if (!next_key) 713 goto free_key; 714 715 rcu_read_lock(); 716 err = map->ops->map_get_next_key(map, key, next_key); 717 rcu_read_unlock(); 718 if (err) 719 goto free_next_key; 720 721 err = -EFAULT; 722 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 723 goto free_next_key; 724 725 trace_bpf_map_next_key(map, ufd, key, next_key); 726 err = 0; 727 728 free_next_key: 729 kfree(next_key); 730 free_key: 731 kfree(key); 732 err_put: 733 fdput(f); 734 return err; 735 } 736 737 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 738 #define BPF_PROG_TYPE(_id, _ops) \ 739 [_id] = &_ops, 740 #define BPF_MAP_TYPE(_id, _ops) 741 #include <linux/bpf_types.h> 742 #undef BPF_PROG_TYPE 743 #undef BPF_MAP_TYPE 744 }; 745 746 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 747 { 748 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 749 return -EINVAL; 750 751 prog->aux->ops = bpf_prog_types[type]; 752 prog->type = type; 753 return 0; 754 } 755 756 /* drop refcnt on maps used by eBPF program and free auxilary data */ 757 static void free_used_maps(struct bpf_prog_aux *aux) 758 { 759 int i; 760 761 for (i = 0; i < aux->used_map_cnt; i++) 762 bpf_map_put(aux->used_maps[i]); 763 764 kfree(aux->used_maps); 765 } 766 767 int __bpf_prog_charge(struct user_struct *user, u32 pages) 768 { 769 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 770 unsigned long user_bufs; 771 772 if (user) { 773 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 774 if (user_bufs > memlock_limit) { 775 atomic_long_sub(pages, &user->locked_vm); 776 return -EPERM; 777 } 778 } 779 780 return 0; 781 } 782 783 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 784 { 785 if (user) 786 atomic_long_sub(pages, &user->locked_vm); 787 } 788 789 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 790 { 791 struct user_struct *user = get_current_user(); 792 int ret; 793 794 ret = __bpf_prog_charge(user, prog->pages); 795 if (ret) { 796 free_uid(user); 797 return ret; 798 } 799 800 prog->aux->user = user; 801 return 0; 802 } 803 804 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 805 { 806 struct user_struct *user = prog->aux->user; 807 808 __bpf_prog_uncharge(user, prog->pages); 809 free_uid(user); 810 } 811 812 static int bpf_prog_alloc_id(struct bpf_prog *prog) 813 { 814 int id; 815 816 spin_lock_bh(&prog_idr_lock); 817 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 818 if (id > 0) 819 prog->aux->id = id; 820 spin_unlock_bh(&prog_idr_lock); 821 822 /* id is in [1, INT_MAX) */ 823 if (WARN_ON_ONCE(!id)) 824 return -ENOSPC; 825 826 return id > 0 ? 0 : id; 827 } 828 829 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 830 { 831 /* cBPF to eBPF migrations are currently not in the idr store. */ 832 if (!prog->aux->id) 833 return; 834 835 if (do_idr_lock) 836 spin_lock_bh(&prog_idr_lock); 837 else 838 __acquire(&prog_idr_lock); 839 840 idr_remove(&prog_idr, prog->aux->id); 841 842 if (do_idr_lock) 843 spin_unlock_bh(&prog_idr_lock); 844 else 845 __release(&prog_idr_lock); 846 } 847 848 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 849 { 850 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 851 852 free_used_maps(aux); 853 bpf_prog_uncharge_memlock(aux->prog); 854 bpf_prog_free(aux->prog); 855 } 856 857 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 858 { 859 if (atomic_dec_and_test(&prog->aux->refcnt)) { 860 trace_bpf_prog_put_rcu(prog); 861 /* bpf_prog_free_id() must be called first */ 862 bpf_prog_free_id(prog, do_idr_lock); 863 bpf_prog_kallsyms_del(prog); 864 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 865 } 866 } 867 868 void bpf_prog_put(struct bpf_prog *prog) 869 { 870 __bpf_prog_put(prog, true); 871 } 872 EXPORT_SYMBOL_GPL(bpf_prog_put); 873 874 static int bpf_prog_release(struct inode *inode, struct file *filp) 875 { 876 struct bpf_prog *prog = filp->private_data; 877 878 bpf_prog_put(prog); 879 return 0; 880 } 881 882 #ifdef CONFIG_PROC_FS 883 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 884 { 885 const struct bpf_prog *prog = filp->private_data; 886 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 887 888 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 889 seq_printf(m, 890 "prog_type:\t%u\n" 891 "prog_jited:\t%u\n" 892 "prog_tag:\t%s\n" 893 "memlock:\t%llu\n", 894 prog->type, 895 prog->jited, 896 prog_tag, 897 prog->pages * 1ULL << PAGE_SHIFT); 898 } 899 #endif 900 901 static const struct file_operations bpf_prog_fops = { 902 #ifdef CONFIG_PROC_FS 903 .show_fdinfo = bpf_prog_show_fdinfo, 904 #endif 905 .release = bpf_prog_release, 906 }; 907 908 int bpf_prog_new_fd(struct bpf_prog *prog) 909 { 910 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 911 O_RDWR | O_CLOEXEC); 912 } 913 914 static struct bpf_prog *____bpf_prog_get(struct fd f) 915 { 916 if (!f.file) 917 return ERR_PTR(-EBADF); 918 if (f.file->f_op != &bpf_prog_fops) { 919 fdput(f); 920 return ERR_PTR(-EINVAL); 921 } 922 923 return f.file->private_data; 924 } 925 926 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 927 { 928 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 929 atomic_sub(i, &prog->aux->refcnt); 930 return ERR_PTR(-EBUSY); 931 } 932 return prog; 933 } 934 EXPORT_SYMBOL_GPL(bpf_prog_add); 935 936 void bpf_prog_sub(struct bpf_prog *prog, int i) 937 { 938 /* Only to be used for undoing previous bpf_prog_add() in some 939 * error path. We still know that another entity in our call 940 * path holds a reference to the program, thus atomic_sub() can 941 * be safely used in such cases! 942 */ 943 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 944 } 945 EXPORT_SYMBOL_GPL(bpf_prog_sub); 946 947 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 948 { 949 return bpf_prog_add(prog, 1); 950 } 951 EXPORT_SYMBOL_GPL(bpf_prog_inc); 952 953 /* prog_idr_lock should have been held */ 954 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 955 { 956 int refold; 957 958 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 959 960 if (refold >= BPF_MAX_REFCNT) { 961 __bpf_prog_put(prog, false); 962 return ERR_PTR(-EBUSY); 963 } 964 965 if (!refold) 966 return ERR_PTR(-ENOENT); 967 968 return prog; 969 } 970 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 971 972 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 973 { 974 struct fd f = fdget(ufd); 975 struct bpf_prog *prog; 976 977 prog = ____bpf_prog_get(f); 978 if (IS_ERR(prog)) 979 return prog; 980 if (type && prog->type != *type) { 981 prog = ERR_PTR(-EINVAL); 982 goto out; 983 } 984 985 prog = bpf_prog_inc(prog); 986 out: 987 fdput(f); 988 return prog; 989 } 990 991 struct bpf_prog *bpf_prog_get(u32 ufd) 992 { 993 return __bpf_prog_get(ufd, NULL); 994 } 995 996 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 997 { 998 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 999 1000 if (!IS_ERR(prog)) 1001 trace_bpf_prog_get_type(prog); 1002 return prog; 1003 } 1004 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 1005 1006 /* last field in 'union bpf_attr' used by this command */ 1007 #define BPF_PROG_LOAD_LAST_FIELD prog_name 1008 1009 static int bpf_prog_load(union bpf_attr *attr) 1010 { 1011 enum bpf_prog_type type = attr->prog_type; 1012 struct bpf_prog *prog; 1013 int err; 1014 char license[128]; 1015 bool is_gpl; 1016 1017 if (CHECK_ATTR(BPF_PROG_LOAD)) 1018 return -EINVAL; 1019 1020 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1021 return -EINVAL; 1022 1023 /* copy eBPF program license from user space */ 1024 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1025 sizeof(license) - 1) < 0) 1026 return -EFAULT; 1027 license[sizeof(license) - 1] = 0; 1028 1029 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1030 is_gpl = license_is_gpl_compatible(license); 1031 1032 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1033 return -E2BIG; 1034 1035 if (type == BPF_PROG_TYPE_KPROBE && 1036 attr->kern_version != LINUX_VERSION_CODE) 1037 return -EINVAL; 1038 1039 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1040 type != BPF_PROG_TYPE_CGROUP_SKB && 1041 !capable(CAP_SYS_ADMIN)) 1042 return -EPERM; 1043 1044 /* plain bpf_prog allocation */ 1045 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1046 if (!prog) 1047 return -ENOMEM; 1048 1049 err = bpf_prog_charge_memlock(prog); 1050 if (err) 1051 goto free_prog_nouncharge; 1052 1053 prog->len = attr->insn_cnt; 1054 1055 err = -EFAULT; 1056 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1057 bpf_prog_insn_size(prog)) != 0) 1058 goto free_prog; 1059 1060 prog->orig_prog = NULL; 1061 prog->jited = 0; 1062 1063 atomic_set(&prog->aux->refcnt, 1); 1064 prog->gpl_compatible = is_gpl ? 1 : 0; 1065 1066 /* find program type: socket_filter vs tracing_filter */ 1067 err = find_prog_type(type, prog); 1068 if (err < 0) 1069 goto free_prog; 1070 1071 prog->aux->load_time = ktime_get_boot_ns(); 1072 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1073 if (err) 1074 goto free_prog; 1075 1076 /* run eBPF verifier */ 1077 err = bpf_check(&prog, attr); 1078 if (err < 0) 1079 goto free_used_maps; 1080 1081 /* eBPF program is ready to be JITed */ 1082 prog = bpf_prog_select_runtime(prog, &err); 1083 if (err < 0) 1084 goto free_used_maps; 1085 1086 err = bpf_prog_alloc_id(prog); 1087 if (err) 1088 goto free_used_maps; 1089 1090 err = bpf_prog_new_fd(prog); 1091 if (err < 0) { 1092 /* failed to allocate fd. 1093 * bpf_prog_put() is needed because the above 1094 * bpf_prog_alloc_id() has published the prog 1095 * to the userspace and the userspace may 1096 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1097 */ 1098 bpf_prog_put(prog); 1099 return err; 1100 } 1101 1102 bpf_prog_kallsyms_add(prog); 1103 trace_bpf_prog_load(prog, err); 1104 return err; 1105 1106 free_used_maps: 1107 free_used_maps(prog->aux); 1108 free_prog: 1109 bpf_prog_uncharge_memlock(prog); 1110 free_prog_nouncharge: 1111 bpf_prog_free(prog); 1112 return err; 1113 } 1114 1115 #define BPF_OBJ_LAST_FIELD bpf_fd 1116 1117 static int bpf_obj_pin(const union bpf_attr *attr) 1118 { 1119 if (CHECK_ATTR(BPF_OBJ)) 1120 return -EINVAL; 1121 1122 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1123 } 1124 1125 static int bpf_obj_get(const union bpf_attr *attr) 1126 { 1127 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 1128 return -EINVAL; 1129 1130 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 1131 } 1132 1133 #ifdef CONFIG_CGROUP_BPF 1134 1135 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1136 1137 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1138 { 1139 struct bpf_prog *prog = NULL; 1140 int ufd = attr->target_fd; 1141 struct bpf_map *map; 1142 struct fd f; 1143 int err; 1144 1145 f = fdget(ufd); 1146 map = __bpf_map_get(f); 1147 if (IS_ERR(map)) 1148 return PTR_ERR(map); 1149 1150 if (attach) { 1151 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1152 BPF_PROG_TYPE_SK_SKB); 1153 if (IS_ERR(prog)) { 1154 fdput(f); 1155 return PTR_ERR(prog); 1156 } 1157 } 1158 1159 err = sock_map_prog(map, prog, attr->attach_type); 1160 if (err) { 1161 fdput(f); 1162 if (prog) 1163 bpf_prog_put(prog); 1164 return err; 1165 } 1166 1167 fdput(f); 1168 return 0; 1169 } 1170 1171 static int bpf_prog_attach(const union bpf_attr *attr) 1172 { 1173 enum bpf_prog_type ptype; 1174 struct bpf_prog *prog; 1175 struct cgroup *cgrp; 1176 int ret; 1177 1178 if (!capable(CAP_NET_ADMIN)) 1179 return -EPERM; 1180 1181 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1182 return -EINVAL; 1183 1184 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 1185 return -EINVAL; 1186 1187 switch (attr->attach_type) { 1188 case BPF_CGROUP_INET_INGRESS: 1189 case BPF_CGROUP_INET_EGRESS: 1190 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1191 break; 1192 case BPF_CGROUP_INET_SOCK_CREATE: 1193 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1194 break; 1195 case BPF_CGROUP_SOCK_OPS: 1196 ptype = BPF_PROG_TYPE_SOCK_OPS; 1197 break; 1198 case BPF_SK_SKB_STREAM_PARSER: 1199 case BPF_SK_SKB_STREAM_VERDICT: 1200 return sockmap_get_from_fd(attr, true); 1201 default: 1202 return -EINVAL; 1203 } 1204 1205 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1206 if (IS_ERR(prog)) 1207 return PTR_ERR(prog); 1208 1209 cgrp = cgroup_get_from_fd(attr->target_fd); 1210 if (IS_ERR(cgrp)) { 1211 bpf_prog_put(prog); 1212 return PTR_ERR(cgrp); 1213 } 1214 1215 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 1216 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 1217 if (ret) 1218 bpf_prog_put(prog); 1219 cgroup_put(cgrp); 1220 1221 return ret; 1222 } 1223 1224 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1225 1226 static int bpf_prog_detach(const union bpf_attr *attr) 1227 { 1228 struct cgroup *cgrp; 1229 int ret; 1230 1231 if (!capable(CAP_NET_ADMIN)) 1232 return -EPERM; 1233 1234 if (CHECK_ATTR(BPF_PROG_DETACH)) 1235 return -EINVAL; 1236 1237 switch (attr->attach_type) { 1238 case BPF_CGROUP_INET_INGRESS: 1239 case BPF_CGROUP_INET_EGRESS: 1240 case BPF_CGROUP_INET_SOCK_CREATE: 1241 case BPF_CGROUP_SOCK_OPS: 1242 cgrp = cgroup_get_from_fd(attr->target_fd); 1243 if (IS_ERR(cgrp)) 1244 return PTR_ERR(cgrp); 1245 1246 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 1247 cgroup_put(cgrp); 1248 break; 1249 case BPF_SK_SKB_STREAM_PARSER: 1250 case BPF_SK_SKB_STREAM_VERDICT: 1251 ret = sockmap_get_from_fd(attr, false); 1252 break; 1253 default: 1254 return -EINVAL; 1255 } 1256 1257 return ret; 1258 } 1259 1260 #endif /* CONFIG_CGROUP_BPF */ 1261 1262 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1263 1264 static int bpf_prog_test_run(const union bpf_attr *attr, 1265 union bpf_attr __user *uattr) 1266 { 1267 struct bpf_prog *prog; 1268 int ret = -ENOTSUPP; 1269 1270 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1271 return -EINVAL; 1272 1273 prog = bpf_prog_get(attr->test.prog_fd); 1274 if (IS_ERR(prog)) 1275 return PTR_ERR(prog); 1276 1277 if (prog->aux->ops->test_run) 1278 ret = prog->aux->ops->test_run(prog, attr, uattr); 1279 1280 bpf_prog_put(prog); 1281 return ret; 1282 } 1283 1284 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1285 1286 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1287 union bpf_attr __user *uattr, 1288 struct idr *idr, 1289 spinlock_t *lock) 1290 { 1291 u32 next_id = attr->start_id; 1292 int err = 0; 1293 1294 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1295 return -EINVAL; 1296 1297 if (!capable(CAP_SYS_ADMIN)) 1298 return -EPERM; 1299 1300 next_id++; 1301 spin_lock_bh(lock); 1302 if (!idr_get_next(idr, &next_id)) 1303 err = -ENOENT; 1304 spin_unlock_bh(lock); 1305 1306 if (!err) 1307 err = put_user(next_id, &uattr->next_id); 1308 1309 return err; 1310 } 1311 1312 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1313 1314 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1315 { 1316 struct bpf_prog *prog; 1317 u32 id = attr->prog_id; 1318 int fd; 1319 1320 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1321 return -EINVAL; 1322 1323 if (!capable(CAP_SYS_ADMIN)) 1324 return -EPERM; 1325 1326 spin_lock_bh(&prog_idr_lock); 1327 prog = idr_find(&prog_idr, id); 1328 if (prog) 1329 prog = bpf_prog_inc_not_zero(prog); 1330 else 1331 prog = ERR_PTR(-ENOENT); 1332 spin_unlock_bh(&prog_idr_lock); 1333 1334 if (IS_ERR(prog)) 1335 return PTR_ERR(prog); 1336 1337 fd = bpf_prog_new_fd(prog); 1338 if (fd < 0) 1339 bpf_prog_put(prog); 1340 1341 return fd; 1342 } 1343 1344 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id 1345 1346 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1347 { 1348 struct bpf_map *map; 1349 u32 id = attr->map_id; 1350 int fd; 1351 1352 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID)) 1353 return -EINVAL; 1354 1355 if (!capable(CAP_SYS_ADMIN)) 1356 return -EPERM; 1357 1358 spin_lock_bh(&map_idr_lock); 1359 map = idr_find(&map_idr, id); 1360 if (map) 1361 map = bpf_map_inc_not_zero(map, true); 1362 else 1363 map = ERR_PTR(-ENOENT); 1364 spin_unlock_bh(&map_idr_lock); 1365 1366 if (IS_ERR(map)) 1367 return PTR_ERR(map); 1368 1369 fd = bpf_map_new_fd(map); 1370 if (fd < 0) 1371 bpf_map_put(map); 1372 1373 return fd; 1374 } 1375 1376 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1377 const union bpf_attr *attr, 1378 union bpf_attr __user *uattr) 1379 { 1380 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1381 struct bpf_prog_info info = {}; 1382 u32 info_len = attr->info.info_len; 1383 char __user *uinsns; 1384 u32 ulen; 1385 int err; 1386 1387 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1388 if (err) 1389 return err; 1390 info_len = min_t(u32, sizeof(info), info_len); 1391 1392 if (copy_from_user(&info, uinfo, info_len)) 1393 return -EFAULT; 1394 1395 info.type = prog->type; 1396 info.id = prog->aux->id; 1397 info.load_time = prog->aux->load_time; 1398 info.created_by_uid = from_kuid_munged(current_user_ns(), 1399 prog->aux->user->uid); 1400 1401 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1402 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1403 1404 ulen = info.nr_map_ids; 1405 info.nr_map_ids = prog->aux->used_map_cnt; 1406 ulen = min_t(u32, info.nr_map_ids, ulen); 1407 if (ulen) { 1408 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1409 u32 i; 1410 1411 for (i = 0; i < ulen; i++) 1412 if (put_user(prog->aux->used_maps[i]->id, 1413 &user_map_ids[i])) 1414 return -EFAULT; 1415 } 1416 1417 if (!capable(CAP_SYS_ADMIN)) { 1418 info.jited_prog_len = 0; 1419 info.xlated_prog_len = 0; 1420 goto done; 1421 } 1422 1423 ulen = info.jited_prog_len; 1424 info.jited_prog_len = prog->jited_len; 1425 if (info.jited_prog_len && ulen) { 1426 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1427 ulen = min_t(u32, info.jited_prog_len, ulen); 1428 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1429 return -EFAULT; 1430 } 1431 1432 ulen = info.xlated_prog_len; 1433 info.xlated_prog_len = bpf_prog_insn_size(prog); 1434 if (info.xlated_prog_len && ulen) { 1435 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1436 ulen = min_t(u32, info.xlated_prog_len, ulen); 1437 if (copy_to_user(uinsns, prog->insnsi, ulen)) 1438 return -EFAULT; 1439 } 1440 1441 done: 1442 if (copy_to_user(uinfo, &info, info_len) || 1443 put_user(info_len, &uattr->info.info_len)) 1444 return -EFAULT; 1445 1446 return 0; 1447 } 1448 1449 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1450 const union bpf_attr *attr, 1451 union bpf_attr __user *uattr) 1452 { 1453 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1454 struct bpf_map_info info = {}; 1455 u32 info_len = attr->info.info_len; 1456 int err; 1457 1458 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1459 if (err) 1460 return err; 1461 info_len = min_t(u32, sizeof(info), info_len); 1462 1463 info.type = map->map_type; 1464 info.id = map->id; 1465 info.key_size = map->key_size; 1466 info.value_size = map->value_size; 1467 info.max_entries = map->max_entries; 1468 info.map_flags = map->map_flags; 1469 memcpy(info.name, map->name, sizeof(map->name)); 1470 1471 if (copy_to_user(uinfo, &info, info_len) || 1472 put_user(info_len, &uattr->info.info_len)) 1473 return -EFAULT; 1474 1475 return 0; 1476 } 1477 1478 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1479 1480 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1481 union bpf_attr __user *uattr) 1482 { 1483 int ufd = attr->info.bpf_fd; 1484 struct fd f; 1485 int err; 1486 1487 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1488 return -EINVAL; 1489 1490 f = fdget(ufd); 1491 if (!f.file) 1492 return -EBADFD; 1493 1494 if (f.file->f_op == &bpf_prog_fops) 1495 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1496 uattr); 1497 else if (f.file->f_op == &bpf_map_fops) 1498 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1499 uattr); 1500 else 1501 err = -EINVAL; 1502 1503 fdput(f); 1504 return err; 1505 } 1506 1507 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1508 { 1509 union bpf_attr attr = {}; 1510 int err; 1511 1512 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1513 return -EPERM; 1514 1515 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1516 if (err) 1517 return err; 1518 size = min_t(u32, size, sizeof(attr)); 1519 1520 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1521 if (copy_from_user(&attr, uattr, size) != 0) 1522 return -EFAULT; 1523 1524 switch (cmd) { 1525 case BPF_MAP_CREATE: 1526 err = map_create(&attr); 1527 break; 1528 case BPF_MAP_LOOKUP_ELEM: 1529 err = map_lookup_elem(&attr); 1530 break; 1531 case BPF_MAP_UPDATE_ELEM: 1532 err = map_update_elem(&attr); 1533 break; 1534 case BPF_MAP_DELETE_ELEM: 1535 err = map_delete_elem(&attr); 1536 break; 1537 case BPF_MAP_GET_NEXT_KEY: 1538 err = map_get_next_key(&attr); 1539 break; 1540 case BPF_PROG_LOAD: 1541 err = bpf_prog_load(&attr); 1542 break; 1543 case BPF_OBJ_PIN: 1544 err = bpf_obj_pin(&attr); 1545 break; 1546 case BPF_OBJ_GET: 1547 err = bpf_obj_get(&attr); 1548 break; 1549 #ifdef CONFIG_CGROUP_BPF 1550 case BPF_PROG_ATTACH: 1551 err = bpf_prog_attach(&attr); 1552 break; 1553 case BPF_PROG_DETACH: 1554 err = bpf_prog_detach(&attr); 1555 break; 1556 #endif 1557 case BPF_PROG_TEST_RUN: 1558 err = bpf_prog_test_run(&attr, uattr); 1559 break; 1560 case BPF_PROG_GET_NEXT_ID: 1561 err = bpf_obj_get_next_id(&attr, uattr, 1562 &prog_idr, &prog_idr_lock); 1563 break; 1564 case BPF_MAP_GET_NEXT_ID: 1565 err = bpf_obj_get_next_id(&attr, uattr, 1566 &map_idr, &map_idr_lock); 1567 break; 1568 case BPF_PROG_GET_FD_BY_ID: 1569 err = bpf_prog_get_fd_by_id(&attr); 1570 break; 1571 case BPF_MAP_GET_FD_BY_ID: 1572 err = bpf_map_get_fd_by_id(&attr); 1573 break; 1574 case BPF_OBJ_GET_INFO_BY_FD: 1575 err = bpf_obj_get_info_by_fd(&attr, uattr); 1576 break; 1577 default: 1578 err = -EINVAL; 1579 break; 1580 } 1581 1582 return err; 1583 } 1584