1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/syscalls.h> 14 #include <linux/slab.h> 15 #include <linux/vmalloc.h> 16 #include <linux/mmzone.h> 17 #include <linux/anon_inodes.h> 18 #include <linux/file.h> 19 #include <linux/license.h> 20 #include <linux/filter.h> 21 #include <linux/version.h> 22 #include <linux/kernel.h> 23 24 DEFINE_PER_CPU(int, bpf_prog_active); 25 26 int sysctl_unprivileged_bpf_disabled __read_mostly; 27 28 static LIST_HEAD(bpf_map_types); 29 30 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 31 { 32 struct bpf_map_type_list *tl; 33 struct bpf_map *map; 34 35 list_for_each_entry(tl, &bpf_map_types, list_node) { 36 if (tl->type == attr->map_type) { 37 map = tl->ops->map_alloc(attr); 38 if (IS_ERR(map)) 39 return map; 40 map->ops = tl->ops; 41 map->map_type = attr->map_type; 42 return map; 43 } 44 } 45 return ERR_PTR(-EINVAL); 46 } 47 48 /* boot time registration of different map implementations */ 49 void bpf_register_map_type(struct bpf_map_type_list *tl) 50 { 51 list_add(&tl->list_node, &bpf_map_types); 52 } 53 54 void *bpf_map_area_alloc(size_t size) 55 { 56 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 57 * trigger under memory pressure as we really just want to 58 * fail instead. 59 */ 60 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 61 void *area; 62 63 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 64 area = kmalloc(size, GFP_USER | flags); 65 if (area != NULL) 66 return area; 67 } 68 69 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, 70 PAGE_KERNEL); 71 } 72 73 void bpf_map_area_free(void *area) 74 { 75 kvfree(area); 76 } 77 78 int bpf_map_precharge_memlock(u32 pages) 79 { 80 struct user_struct *user = get_current_user(); 81 unsigned long memlock_limit, cur; 82 83 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 84 cur = atomic_long_read(&user->locked_vm); 85 free_uid(user); 86 if (cur + pages > memlock_limit) 87 return -EPERM; 88 return 0; 89 } 90 91 static int bpf_map_charge_memlock(struct bpf_map *map) 92 { 93 struct user_struct *user = get_current_user(); 94 unsigned long memlock_limit; 95 96 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 97 98 atomic_long_add(map->pages, &user->locked_vm); 99 100 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 101 atomic_long_sub(map->pages, &user->locked_vm); 102 free_uid(user); 103 return -EPERM; 104 } 105 map->user = user; 106 return 0; 107 } 108 109 static void bpf_map_uncharge_memlock(struct bpf_map *map) 110 { 111 struct user_struct *user = map->user; 112 113 atomic_long_sub(map->pages, &user->locked_vm); 114 free_uid(user); 115 } 116 117 /* called from workqueue */ 118 static void bpf_map_free_deferred(struct work_struct *work) 119 { 120 struct bpf_map *map = container_of(work, struct bpf_map, work); 121 122 bpf_map_uncharge_memlock(map); 123 /* implementation dependent freeing */ 124 map->ops->map_free(map); 125 } 126 127 static void bpf_map_put_uref(struct bpf_map *map) 128 { 129 if (atomic_dec_and_test(&map->usercnt)) { 130 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 131 bpf_fd_array_map_clear(map); 132 } 133 } 134 135 /* decrement map refcnt and schedule it for freeing via workqueue 136 * (unrelying map implementation ops->map_free() might sleep) 137 */ 138 void bpf_map_put(struct bpf_map *map) 139 { 140 if (atomic_dec_and_test(&map->refcnt)) { 141 INIT_WORK(&map->work, bpf_map_free_deferred); 142 schedule_work(&map->work); 143 } 144 } 145 146 void bpf_map_put_with_uref(struct bpf_map *map) 147 { 148 bpf_map_put_uref(map); 149 bpf_map_put(map); 150 } 151 152 static int bpf_map_release(struct inode *inode, struct file *filp) 153 { 154 struct bpf_map *map = filp->private_data; 155 156 if (map->ops->map_release) 157 map->ops->map_release(map, filp); 158 159 bpf_map_put_with_uref(map); 160 return 0; 161 } 162 163 #ifdef CONFIG_PROC_FS 164 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 165 { 166 const struct bpf_map *map = filp->private_data; 167 const struct bpf_array *array; 168 u32 owner_prog_type = 0; 169 170 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 171 array = container_of(map, struct bpf_array, map); 172 owner_prog_type = array->owner_prog_type; 173 } 174 175 seq_printf(m, 176 "map_type:\t%u\n" 177 "key_size:\t%u\n" 178 "value_size:\t%u\n" 179 "max_entries:\t%u\n" 180 "map_flags:\t%#x\n" 181 "memlock:\t%llu\n", 182 map->map_type, 183 map->key_size, 184 map->value_size, 185 map->max_entries, 186 map->map_flags, 187 map->pages * 1ULL << PAGE_SHIFT); 188 189 if (owner_prog_type) 190 seq_printf(m, "owner_prog_type:\t%u\n", 191 owner_prog_type); 192 } 193 #endif 194 195 static const struct file_operations bpf_map_fops = { 196 #ifdef CONFIG_PROC_FS 197 .show_fdinfo = bpf_map_show_fdinfo, 198 #endif 199 .release = bpf_map_release, 200 }; 201 202 int bpf_map_new_fd(struct bpf_map *map) 203 { 204 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 205 O_RDWR | O_CLOEXEC); 206 } 207 208 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 209 #define CHECK_ATTR(CMD) \ 210 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 211 sizeof(attr->CMD##_LAST_FIELD), 0, \ 212 sizeof(*attr) - \ 213 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 214 sizeof(attr->CMD##_LAST_FIELD)) != NULL 215 216 #define BPF_MAP_CREATE_LAST_FIELD map_flags 217 /* called via syscall */ 218 static int map_create(union bpf_attr *attr) 219 { 220 struct bpf_map *map; 221 int err; 222 223 err = CHECK_ATTR(BPF_MAP_CREATE); 224 if (err) 225 return -EINVAL; 226 227 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 228 map = find_and_alloc_map(attr); 229 if (IS_ERR(map)) 230 return PTR_ERR(map); 231 232 atomic_set(&map->refcnt, 1); 233 atomic_set(&map->usercnt, 1); 234 235 err = bpf_map_charge_memlock(map); 236 if (err) 237 goto free_map_nouncharge; 238 239 err = bpf_map_new_fd(map); 240 if (err < 0) 241 /* failed to allocate fd */ 242 goto free_map; 243 244 return err; 245 246 free_map: 247 bpf_map_uncharge_memlock(map); 248 free_map_nouncharge: 249 map->ops->map_free(map); 250 return err; 251 } 252 253 /* if error is returned, fd is released. 254 * On success caller should complete fd access with matching fdput() 255 */ 256 struct bpf_map *__bpf_map_get(struct fd f) 257 { 258 if (!f.file) 259 return ERR_PTR(-EBADF); 260 if (f.file->f_op != &bpf_map_fops) { 261 fdput(f); 262 return ERR_PTR(-EINVAL); 263 } 264 265 return f.file->private_data; 266 } 267 268 /* prog's and map's refcnt limit */ 269 #define BPF_MAX_REFCNT 32768 270 271 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 272 { 273 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 274 atomic_dec(&map->refcnt); 275 return ERR_PTR(-EBUSY); 276 } 277 if (uref) 278 atomic_inc(&map->usercnt); 279 return map; 280 } 281 282 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 283 { 284 struct fd f = fdget(ufd); 285 struct bpf_map *map; 286 287 map = __bpf_map_get(f); 288 if (IS_ERR(map)) 289 return map; 290 291 map = bpf_map_inc(map, true); 292 fdput(f); 293 294 return map; 295 } 296 297 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 298 { 299 return -ENOTSUPP; 300 } 301 302 /* last field in 'union bpf_attr' used by this command */ 303 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 304 305 static int map_lookup_elem(union bpf_attr *attr) 306 { 307 void __user *ukey = u64_to_user_ptr(attr->key); 308 void __user *uvalue = u64_to_user_ptr(attr->value); 309 int ufd = attr->map_fd; 310 struct bpf_map *map; 311 void *key, *value, *ptr; 312 u32 value_size; 313 struct fd f; 314 int err; 315 316 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 317 return -EINVAL; 318 319 f = fdget(ufd); 320 map = __bpf_map_get(f); 321 if (IS_ERR(map)) 322 return PTR_ERR(map); 323 324 err = -ENOMEM; 325 key = kmalloc(map->key_size, GFP_USER); 326 if (!key) 327 goto err_put; 328 329 err = -EFAULT; 330 if (copy_from_user(key, ukey, map->key_size) != 0) 331 goto free_key; 332 333 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 334 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 335 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 336 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 337 else 338 value_size = map->value_size; 339 340 err = -ENOMEM; 341 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 342 if (!value) 343 goto free_key; 344 345 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 346 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 347 err = bpf_percpu_hash_copy(map, key, value); 348 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 349 err = bpf_percpu_array_copy(map, key, value); 350 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 351 err = bpf_stackmap_copy(map, key, value); 352 } else { 353 rcu_read_lock(); 354 ptr = map->ops->map_lookup_elem(map, key); 355 if (ptr) 356 memcpy(value, ptr, value_size); 357 rcu_read_unlock(); 358 err = ptr ? 0 : -ENOENT; 359 } 360 361 if (err) 362 goto free_value; 363 364 err = -EFAULT; 365 if (copy_to_user(uvalue, value, value_size) != 0) 366 goto free_value; 367 368 err = 0; 369 370 free_value: 371 kfree(value); 372 free_key: 373 kfree(key); 374 err_put: 375 fdput(f); 376 return err; 377 } 378 379 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 380 381 static int map_update_elem(union bpf_attr *attr) 382 { 383 void __user *ukey = u64_to_user_ptr(attr->key); 384 void __user *uvalue = u64_to_user_ptr(attr->value); 385 int ufd = attr->map_fd; 386 struct bpf_map *map; 387 void *key, *value; 388 u32 value_size; 389 struct fd f; 390 int err; 391 392 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 393 return -EINVAL; 394 395 f = fdget(ufd); 396 map = __bpf_map_get(f); 397 if (IS_ERR(map)) 398 return PTR_ERR(map); 399 400 err = -ENOMEM; 401 key = kmalloc(map->key_size, GFP_USER); 402 if (!key) 403 goto err_put; 404 405 err = -EFAULT; 406 if (copy_from_user(key, ukey, map->key_size) != 0) 407 goto free_key; 408 409 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 410 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 411 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 412 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 413 else 414 value_size = map->value_size; 415 416 err = -ENOMEM; 417 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 418 if (!value) 419 goto free_key; 420 421 err = -EFAULT; 422 if (copy_from_user(value, uvalue, value_size) != 0) 423 goto free_value; 424 425 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 426 * inside bpf map update or delete otherwise deadlocks are possible 427 */ 428 preempt_disable(); 429 __this_cpu_inc(bpf_prog_active); 430 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 431 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 432 err = bpf_percpu_hash_update(map, key, value, attr->flags); 433 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 434 err = bpf_percpu_array_update(map, key, value, attr->flags); 435 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 436 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 437 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { 438 rcu_read_lock(); 439 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 440 attr->flags); 441 rcu_read_unlock(); 442 } else { 443 rcu_read_lock(); 444 err = map->ops->map_update_elem(map, key, value, attr->flags); 445 rcu_read_unlock(); 446 } 447 __this_cpu_dec(bpf_prog_active); 448 preempt_enable(); 449 450 free_value: 451 kfree(value); 452 free_key: 453 kfree(key); 454 err_put: 455 fdput(f); 456 return err; 457 } 458 459 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 460 461 static int map_delete_elem(union bpf_attr *attr) 462 { 463 void __user *ukey = u64_to_user_ptr(attr->key); 464 int ufd = attr->map_fd; 465 struct bpf_map *map; 466 struct fd f; 467 void *key; 468 int err; 469 470 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 471 return -EINVAL; 472 473 f = fdget(ufd); 474 map = __bpf_map_get(f); 475 if (IS_ERR(map)) 476 return PTR_ERR(map); 477 478 err = -ENOMEM; 479 key = kmalloc(map->key_size, GFP_USER); 480 if (!key) 481 goto err_put; 482 483 err = -EFAULT; 484 if (copy_from_user(key, ukey, map->key_size) != 0) 485 goto free_key; 486 487 preempt_disable(); 488 __this_cpu_inc(bpf_prog_active); 489 rcu_read_lock(); 490 err = map->ops->map_delete_elem(map, key); 491 rcu_read_unlock(); 492 __this_cpu_dec(bpf_prog_active); 493 preempt_enable(); 494 495 free_key: 496 kfree(key); 497 err_put: 498 fdput(f); 499 return err; 500 } 501 502 /* last field in 'union bpf_attr' used by this command */ 503 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 504 505 static int map_get_next_key(union bpf_attr *attr) 506 { 507 void __user *ukey = u64_to_user_ptr(attr->key); 508 void __user *unext_key = u64_to_user_ptr(attr->next_key); 509 int ufd = attr->map_fd; 510 struct bpf_map *map; 511 void *key, *next_key; 512 struct fd f; 513 int err; 514 515 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 516 return -EINVAL; 517 518 f = fdget(ufd); 519 map = __bpf_map_get(f); 520 if (IS_ERR(map)) 521 return PTR_ERR(map); 522 523 err = -ENOMEM; 524 key = kmalloc(map->key_size, GFP_USER); 525 if (!key) 526 goto err_put; 527 528 err = -EFAULT; 529 if (copy_from_user(key, ukey, map->key_size) != 0) 530 goto free_key; 531 532 err = -ENOMEM; 533 next_key = kmalloc(map->key_size, GFP_USER); 534 if (!next_key) 535 goto free_key; 536 537 rcu_read_lock(); 538 err = map->ops->map_get_next_key(map, key, next_key); 539 rcu_read_unlock(); 540 if (err) 541 goto free_next_key; 542 543 err = -EFAULT; 544 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 545 goto free_next_key; 546 547 err = 0; 548 549 free_next_key: 550 kfree(next_key); 551 free_key: 552 kfree(key); 553 err_put: 554 fdput(f); 555 return err; 556 } 557 558 static LIST_HEAD(bpf_prog_types); 559 560 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 561 { 562 struct bpf_prog_type_list *tl; 563 564 list_for_each_entry(tl, &bpf_prog_types, list_node) { 565 if (tl->type == type) { 566 prog->aux->ops = tl->ops; 567 prog->type = type; 568 return 0; 569 } 570 } 571 572 return -EINVAL; 573 } 574 575 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 576 { 577 list_add(&tl->list_node, &bpf_prog_types); 578 } 579 580 /* fixup insn->imm field of bpf_call instructions: 581 * if (insn->imm == BPF_FUNC_map_lookup_elem) 582 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 583 * else if (insn->imm == BPF_FUNC_map_update_elem) 584 * insn->imm = bpf_map_update_elem - __bpf_call_base; 585 * else ... 586 * 587 * this function is called after eBPF program passed verification 588 */ 589 static void fixup_bpf_calls(struct bpf_prog *prog) 590 { 591 const struct bpf_func_proto *fn; 592 int i; 593 594 for (i = 0; i < prog->len; i++) { 595 struct bpf_insn *insn = &prog->insnsi[i]; 596 597 if (insn->code == (BPF_JMP | BPF_CALL)) { 598 /* we reach here when program has bpf_call instructions 599 * and it passed bpf_check(), means that 600 * ops->get_func_proto must have been supplied, check it 601 */ 602 BUG_ON(!prog->aux->ops->get_func_proto); 603 604 if (insn->imm == BPF_FUNC_get_route_realm) 605 prog->dst_needed = 1; 606 if (insn->imm == BPF_FUNC_get_prandom_u32) 607 bpf_user_rnd_init_once(); 608 if (insn->imm == BPF_FUNC_xdp_adjust_head) 609 prog->xdp_adjust_head = 1; 610 if (insn->imm == BPF_FUNC_tail_call) { 611 /* mark bpf_tail_call as different opcode 612 * to avoid conditional branch in 613 * interpeter for every normal call 614 * and to prevent accidental JITing by 615 * JIT compiler that doesn't support 616 * bpf_tail_call yet 617 */ 618 insn->imm = 0; 619 insn->code |= BPF_X; 620 continue; 621 } 622 623 fn = prog->aux->ops->get_func_proto(insn->imm); 624 /* all functions that have prototype and verifier allowed 625 * programs to call them, must be real in-kernel functions 626 */ 627 BUG_ON(!fn->func); 628 insn->imm = fn->func - __bpf_call_base; 629 } 630 } 631 } 632 633 /* drop refcnt on maps used by eBPF program and free auxilary data */ 634 static void free_used_maps(struct bpf_prog_aux *aux) 635 { 636 int i; 637 638 for (i = 0; i < aux->used_map_cnt; i++) 639 bpf_map_put(aux->used_maps[i]); 640 641 kfree(aux->used_maps); 642 } 643 644 int __bpf_prog_charge(struct user_struct *user, u32 pages) 645 { 646 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 647 unsigned long user_bufs; 648 649 if (user) { 650 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 651 if (user_bufs > memlock_limit) { 652 atomic_long_sub(pages, &user->locked_vm); 653 return -EPERM; 654 } 655 } 656 657 return 0; 658 } 659 660 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 661 { 662 if (user) 663 atomic_long_sub(pages, &user->locked_vm); 664 } 665 666 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 667 { 668 struct user_struct *user = get_current_user(); 669 int ret; 670 671 ret = __bpf_prog_charge(user, prog->pages); 672 if (ret) { 673 free_uid(user); 674 return ret; 675 } 676 677 prog->aux->user = user; 678 return 0; 679 } 680 681 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 682 { 683 struct user_struct *user = prog->aux->user; 684 685 __bpf_prog_uncharge(user, prog->pages); 686 free_uid(user); 687 } 688 689 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 690 { 691 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 692 693 free_used_maps(aux); 694 bpf_prog_uncharge_memlock(aux->prog); 695 bpf_prog_free(aux->prog); 696 } 697 698 void bpf_prog_put(struct bpf_prog *prog) 699 { 700 if (atomic_dec_and_test(&prog->aux->refcnt)) 701 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 702 } 703 EXPORT_SYMBOL_GPL(bpf_prog_put); 704 705 static int bpf_prog_release(struct inode *inode, struct file *filp) 706 { 707 struct bpf_prog *prog = filp->private_data; 708 709 bpf_prog_put(prog); 710 return 0; 711 } 712 713 #ifdef CONFIG_PROC_FS 714 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 715 { 716 const struct bpf_prog *prog = filp->private_data; 717 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 718 719 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 720 seq_printf(m, 721 "prog_type:\t%u\n" 722 "prog_jited:\t%u\n" 723 "prog_tag:\t%s\n" 724 "memlock:\t%llu\n", 725 prog->type, 726 prog->jited, 727 prog_tag, 728 prog->pages * 1ULL << PAGE_SHIFT); 729 } 730 #endif 731 732 static const struct file_operations bpf_prog_fops = { 733 #ifdef CONFIG_PROC_FS 734 .show_fdinfo = bpf_prog_show_fdinfo, 735 #endif 736 .release = bpf_prog_release, 737 }; 738 739 int bpf_prog_new_fd(struct bpf_prog *prog) 740 { 741 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 742 O_RDWR | O_CLOEXEC); 743 } 744 745 static struct bpf_prog *____bpf_prog_get(struct fd f) 746 { 747 if (!f.file) 748 return ERR_PTR(-EBADF); 749 if (f.file->f_op != &bpf_prog_fops) { 750 fdput(f); 751 return ERR_PTR(-EINVAL); 752 } 753 754 return f.file->private_data; 755 } 756 757 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 758 { 759 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 760 atomic_sub(i, &prog->aux->refcnt); 761 return ERR_PTR(-EBUSY); 762 } 763 return prog; 764 } 765 EXPORT_SYMBOL_GPL(bpf_prog_add); 766 767 void bpf_prog_sub(struct bpf_prog *prog, int i) 768 { 769 /* Only to be used for undoing previous bpf_prog_add() in some 770 * error path. We still know that another entity in our call 771 * path holds a reference to the program, thus atomic_sub() can 772 * be safely used in such cases! 773 */ 774 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 775 } 776 EXPORT_SYMBOL_GPL(bpf_prog_sub); 777 778 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 779 { 780 return bpf_prog_add(prog, 1); 781 } 782 EXPORT_SYMBOL_GPL(bpf_prog_inc); 783 784 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 785 { 786 struct fd f = fdget(ufd); 787 struct bpf_prog *prog; 788 789 prog = ____bpf_prog_get(f); 790 if (IS_ERR(prog)) 791 return prog; 792 if (type && prog->type != *type) { 793 prog = ERR_PTR(-EINVAL); 794 goto out; 795 } 796 797 prog = bpf_prog_inc(prog); 798 out: 799 fdput(f); 800 return prog; 801 } 802 803 struct bpf_prog *bpf_prog_get(u32 ufd) 804 { 805 return __bpf_prog_get(ufd, NULL); 806 } 807 808 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 809 { 810 return __bpf_prog_get(ufd, &type); 811 } 812 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 813 814 /* last field in 'union bpf_attr' used by this command */ 815 #define BPF_PROG_LOAD_LAST_FIELD kern_version 816 817 static int bpf_prog_load(union bpf_attr *attr) 818 { 819 enum bpf_prog_type type = attr->prog_type; 820 struct bpf_prog *prog; 821 int err; 822 char license[128]; 823 bool is_gpl; 824 825 if (CHECK_ATTR(BPF_PROG_LOAD)) 826 return -EINVAL; 827 828 /* copy eBPF program license from user space */ 829 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 830 sizeof(license) - 1) < 0) 831 return -EFAULT; 832 license[sizeof(license) - 1] = 0; 833 834 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 835 is_gpl = license_is_gpl_compatible(license); 836 837 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 838 return -E2BIG; 839 840 if (type == BPF_PROG_TYPE_KPROBE && 841 attr->kern_version != LINUX_VERSION_CODE) 842 return -EINVAL; 843 844 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 845 return -EPERM; 846 847 /* plain bpf_prog allocation */ 848 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 849 if (!prog) 850 return -ENOMEM; 851 852 err = bpf_prog_charge_memlock(prog); 853 if (err) 854 goto free_prog_nouncharge; 855 856 prog->len = attr->insn_cnt; 857 858 err = -EFAULT; 859 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 860 bpf_prog_insn_size(prog)) != 0) 861 goto free_prog; 862 863 prog->orig_prog = NULL; 864 prog->jited = 0; 865 866 atomic_set(&prog->aux->refcnt, 1); 867 prog->gpl_compatible = is_gpl ? 1 : 0; 868 869 /* find program type: socket_filter vs tracing_filter */ 870 err = find_prog_type(type, prog); 871 if (err < 0) 872 goto free_prog; 873 874 /* run eBPF verifier */ 875 err = bpf_check(&prog, attr); 876 if (err < 0) 877 goto free_used_maps; 878 879 /* fixup BPF_CALL->imm field */ 880 fixup_bpf_calls(prog); 881 882 /* eBPF program is ready to be JITed */ 883 prog = bpf_prog_select_runtime(prog, &err); 884 if (err < 0) 885 goto free_used_maps; 886 887 err = bpf_prog_new_fd(prog); 888 if (err < 0) 889 /* failed to allocate fd */ 890 goto free_used_maps; 891 892 return err; 893 894 free_used_maps: 895 free_used_maps(prog->aux); 896 free_prog: 897 bpf_prog_uncharge_memlock(prog); 898 free_prog_nouncharge: 899 bpf_prog_free(prog); 900 return err; 901 } 902 903 #define BPF_OBJ_LAST_FIELD bpf_fd 904 905 static int bpf_obj_pin(const union bpf_attr *attr) 906 { 907 if (CHECK_ATTR(BPF_OBJ)) 908 return -EINVAL; 909 910 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 911 } 912 913 static int bpf_obj_get(const union bpf_attr *attr) 914 { 915 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 916 return -EINVAL; 917 918 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 919 } 920 921 #ifdef CONFIG_CGROUP_BPF 922 923 #define BPF_PROG_ATTACH_LAST_FIELD attach_type 924 925 static int bpf_prog_attach(const union bpf_attr *attr) 926 { 927 struct bpf_prog *prog; 928 struct cgroup *cgrp; 929 enum bpf_prog_type ptype; 930 931 if (!capable(CAP_NET_ADMIN)) 932 return -EPERM; 933 934 if (CHECK_ATTR(BPF_PROG_ATTACH)) 935 return -EINVAL; 936 937 switch (attr->attach_type) { 938 case BPF_CGROUP_INET_INGRESS: 939 case BPF_CGROUP_INET_EGRESS: 940 ptype = BPF_PROG_TYPE_CGROUP_SKB; 941 break; 942 case BPF_CGROUP_INET_SOCK_CREATE: 943 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 944 break; 945 default: 946 return -EINVAL; 947 } 948 949 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 950 if (IS_ERR(prog)) 951 return PTR_ERR(prog); 952 953 cgrp = cgroup_get_from_fd(attr->target_fd); 954 if (IS_ERR(cgrp)) { 955 bpf_prog_put(prog); 956 return PTR_ERR(cgrp); 957 } 958 959 cgroup_bpf_update(cgrp, prog, attr->attach_type); 960 cgroup_put(cgrp); 961 962 return 0; 963 } 964 965 #define BPF_PROG_DETACH_LAST_FIELD attach_type 966 967 static int bpf_prog_detach(const union bpf_attr *attr) 968 { 969 struct cgroup *cgrp; 970 971 if (!capable(CAP_NET_ADMIN)) 972 return -EPERM; 973 974 if (CHECK_ATTR(BPF_PROG_DETACH)) 975 return -EINVAL; 976 977 switch (attr->attach_type) { 978 case BPF_CGROUP_INET_INGRESS: 979 case BPF_CGROUP_INET_EGRESS: 980 case BPF_CGROUP_INET_SOCK_CREATE: 981 cgrp = cgroup_get_from_fd(attr->target_fd); 982 if (IS_ERR(cgrp)) 983 return PTR_ERR(cgrp); 984 985 cgroup_bpf_update(cgrp, NULL, attr->attach_type); 986 cgroup_put(cgrp); 987 break; 988 989 default: 990 return -EINVAL; 991 } 992 993 return 0; 994 } 995 #endif /* CONFIG_CGROUP_BPF */ 996 997 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 998 { 999 union bpf_attr attr = {}; 1000 int err; 1001 1002 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1003 return -EPERM; 1004 1005 if (!access_ok(VERIFY_READ, uattr, 1)) 1006 return -EFAULT; 1007 1008 if (size > PAGE_SIZE) /* silly large */ 1009 return -E2BIG; 1010 1011 /* If we're handed a bigger struct than we know of, 1012 * ensure all the unknown bits are 0 - i.e. new 1013 * user-space does not rely on any kernel feature 1014 * extensions we dont know about yet. 1015 */ 1016 if (size > sizeof(attr)) { 1017 unsigned char __user *addr; 1018 unsigned char __user *end; 1019 unsigned char val; 1020 1021 addr = (void __user *)uattr + sizeof(attr); 1022 end = (void __user *)uattr + size; 1023 1024 for (; addr < end; addr++) { 1025 err = get_user(val, addr); 1026 if (err) 1027 return err; 1028 if (val) 1029 return -E2BIG; 1030 } 1031 size = sizeof(attr); 1032 } 1033 1034 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1035 if (copy_from_user(&attr, uattr, size) != 0) 1036 return -EFAULT; 1037 1038 switch (cmd) { 1039 case BPF_MAP_CREATE: 1040 err = map_create(&attr); 1041 break; 1042 case BPF_MAP_LOOKUP_ELEM: 1043 err = map_lookup_elem(&attr); 1044 break; 1045 case BPF_MAP_UPDATE_ELEM: 1046 err = map_update_elem(&attr); 1047 break; 1048 case BPF_MAP_DELETE_ELEM: 1049 err = map_delete_elem(&attr); 1050 break; 1051 case BPF_MAP_GET_NEXT_KEY: 1052 err = map_get_next_key(&attr); 1053 break; 1054 case BPF_PROG_LOAD: 1055 err = bpf_prog_load(&attr); 1056 break; 1057 case BPF_OBJ_PIN: 1058 err = bpf_obj_pin(&attr); 1059 break; 1060 case BPF_OBJ_GET: 1061 err = bpf_obj_get(&attr); 1062 break; 1063 1064 #ifdef CONFIG_CGROUP_BPF 1065 case BPF_PROG_ATTACH: 1066 err = bpf_prog_attach(&attr); 1067 break; 1068 case BPF_PROG_DETACH: 1069 err = bpf_prog_detach(&attr); 1070 break; 1071 #endif 1072 1073 default: 1074 err = -EINVAL; 1075 break; 1076 } 1077 1078 return err; 1079 } 1080