1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 26 DEFINE_PER_CPU(int, bpf_prog_active); 27 28 int sysctl_unprivileged_bpf_disabled __read_mostly; 29 30 static LIST_HEAD(bpf_map_types); 31 32 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 33 { 34 struct bpf_map_type_list *tl; 35 struct bpf_map *map; 36 37 list_for_each_entry(tl, &bpf_map_types, list_node) { 38 if (tl->type == attr->map_type) { 39 map = tl->ops->map_alloc(attr); 40 if (IS_ERR(map)) 41 return map; 42 map->ops = tl->ops; 43 map->map_type = attr->map_type; 44 return map; 45 } 46 } 47 return ERR_PTR(-EINVAL); 48 } 49 50 /* boot time registration of different map implementations */ 51 void bpf_register_map_type(struct bpf_map_type_list *tl) 52 { 53 list_add(&tl->list_node, &bpf_map_types); 54 } 55 56 void *bpf_map_area_alloc(size_t size) 57 { 58 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 59 * trigger under memory pressure as we really just want to 60 * fail instead. 61 */ 62 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 63 void *area; 64 65 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 66 area = kmalloc(size, GFP_USER | flags); 67 if (area != NULL) 68 return area; 69 } 70 71 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, 72 PAGE_KERNEL); 73 } 74 75 void bpf_map_area_free(void *area) 76 { 77 kvfree(area); 78 } 79 80 int bpf_map_precharge_memlock(u32 pages) 81 { 82 struct user_struct *user = get_current_user(); 83 unsigned long memlock_limit, cur; 84 85 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 86 cur = atomic_long_read(&user->locked_vm); 87 free_uid(user); 88 if (cur + pages > memlock_limit) 89 return -EPERM; 90 return 0; 91 } 92 93 static int bpf_map_charge_memlock(struct bpf_map *map) 94 { 95 struct user_struct *user = get_current_user(); 96 unsigned long memlock_limit; 97 98 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 99 100 atomic_long_add(map->pages, &user->locked_vm); 101 102 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 103 atomic_long_sub(map->pages, &user->locked_vm); 104 free_uid(user); 105 return -EPERM; 106 } 107 map->user = user; 108 return 0; 109 } 110 111 static void bpf_map_uncharge_memlock(struct bpf_map *map) 112 { 113 struct user_struct *user = map->user; 114 115 atomic_long_sub(map->pages, &user->locked_vm); 116 free_uid(user); 117 } 118 119 /* called from workqueue */ 120 static void bpf_map_free_deferred(struct work_struct *work) 121 { 122 struct bpf_map *map = container_of(work, struct bpf_map, work); 123 124 bpf_map_uncharge_memlock(map); 125 /* implementation dependent freeing */ 126 map->ops->map_free(map); 127 } 128 129 static void bpf_map_put_uref(struct bpf_map *map) 130 { 131 if (atomic_dec_and_test(&map->usercnt)) { 132 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 133 bpf_fd_array_map_clear(map); 134 } 135 } 136 137 /* decrement map refcnt and schedule it for freeing via workqueue 138 * (unrelying map implementation ops->map_free() might sleep) 139 */ 140 void bpf_map_put(struct bpf_map *map) 141 { 142 if (atomic_dec_and_test(&map->refcnt)) { 143 INIT_WORK(&map->work, bpf_map_free_deferred); 144 schedule_work(&map->work); 145 } 146 } 147 148 void bpf_map_put_with_uref(struct bpf_map *map) 149 { 150 bpf_map_put_uref(map); 151 bpf_map_put(map); 152 } 153 154 static int bpf_map_release(struct inode *inode, struct file *filp) 155 { 156 struct bpf_map *map = filp->private_data; 157 158 if (map->ops->map_release) 159 map->ops->map_release(map, filp); 160 161 bpf_map_put_with_uref(map); 162 return 0; 163 } 164 165 #ifdef CONFIG_PROC_FS 166 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 167 { 168 const struct bpf_map *map = filp->private_data; 169 const struct bpf_array *array; 170 u32 owner_prog_type = 0; 171 172 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 173 array = container_of(map, struct bpf_array, map); 174 owner_prog_type = array->owner_prog_type; 175 } 176 177 seq_printf(m, 178 "map_type:\t%u\n" 179 "key_size:\t%u\n" 180 "value_size:\t%u\n" 181 "max_entries:\t%u\n" 182 "map_flags:\t%#x\n" 183 "memlock:\t%llu\n", 184 map->map_type, 185 map->key_size, 186 map->value_size, 187 map->max_entries, 188 map->map_flags, 189 map->pages * 1ULL << PAGE_SHIFT); 190 191 if (owner_prog_type) 192 seq_printf(m, "owner_prog_type:\t%u\n", 193 owner_prog_type); 194 } 195 #endif 196 197 static const struct file_operations bpf_map_fops = { 198 #ifdef CONFIG_PROC_FS 199 .show_fdinfo = bpf_map_show_fdinfo, 200 #endif 201 .release = bpf_map_release, 202 }; 203 204 int bpf_map_new_fd(struct bpf_map *map) 205 { 206 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 207 O_RDWR | O_CLOEXEC); 208 } 209 210 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 211 #define CHECK_ATTR(CMD) \ 212 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 213 sizeof(attr->CMD##_LAST_FIELD), 0, \ 214 sizeof(*attr) - \ 215 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 216 sizeof(attr->CMD##_LAST_FIELD)) != NULL 217 218 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd 219 /* called via syscall */ 220 static int map_create(union bpf_attr *attr) 221 { 222 struct bpf_map *map; 223 int err; 224 225 err = CHECK_ATTR(BPF_MAP_CREATE); 226 if (err) 227 return -EINVAL; 228 229 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 230 map = find_and_alloc_map(attr); 231 if (IS_ERR(map)) 232 return PTR_ERR(map); 233 234 atomic_set(&map->refcnt, 1); 235 atomic_set(&map->usercnt, 1); 236 237 err = bpf_map_charge_memlock(map); 238 if (err) 239 goto free_map_nouncharge; 240 241 err = bpf_map_new_fd(map); 242 if (err < 0) 243 /* failed to allocate fd */ 244 goto free_map; 245 246 trace_bpf_map_create(map, err); 247 return err; 248 249 free_map: 250 bpf_map_uncharge_memlock(map); 251 free_map_nouncharge: 252 map->ops->map_free(map); 253 return err; 254 } 255 256 /* if error is returned, fd is released. 257 * On success caller should complete fd access with matching fdput() 258 */ 259 struct bpf_map *__bpf_map_get(struct fd f) 260 { 261 if (!f.file) 262 return ERR_PTR(-EBADF); 263 if (f.file->f_op != &bpf_map_fops) { 264 fdput(f); 265 return ERR_PTR(-EINVAL); 266 } 267 268 return f.file->private_data; 269 } 270 271 /* prog's and map's refcnt limit */ 272 #define BPF_MAX_REFCNT 32768 273 274 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 275 { 276 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 277 atomic_dec(&map->refcnt); 278 return ERR_PTR(-EBUSY); 279 } 280 if (uref) 281 atomic_inc(&map->usercnt); 282 return map; 283 } 284 285 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 286 { 287 struct fd f = fdget(ufd); 288 struct bpf_map *map; 289 290 map = __bpf_map_get(f); 291 if (IS_ERR(map)) 292 return map; 293 294 map = bpf_map_inc(map, true); 295 fdput(f); 296 297 return map; 298 } 299 300 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 301 { 302 return -ENOTSUPP; 303 } 304 305 /* last field in 'union bpf_attr' used by this command */ 306 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 307 308 static int map_lookup_elem(union bpf_attr *attr) 309 { 310 void __user *ukey = u64_to_user_ptr(attr->key); 311 void __user *uvalue = u64_to_user_ptr(attr->value); 312 int ufd = attr->map_fd; 313 struct bpf_map *map; 314 void *key, *value, *ptr; 315 u32 value_size; 316 struct fd f; 317 int err; 318 319 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 320 return -EINVAL; 321 322 f = fdget(ufd); 323 map = __bpf_map_get(f); 324 if (IS_ERR(map)) 325 return PTR_ERR(map); 326 327 err = -ENOMEM; 328 key = kmalloc(map->key_size, GFP_USER); 329 if (!key) 330 goto err_put; 331 332 err = -EFAULT; 333 if (copy_from_user(key, ukey, map->key_size) != 0) 334 goto free_key; 335 336 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 337 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 338 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 339 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 340 else 341 value_size = map->value_size; 342 343 err = -ENOMEM; 344 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 345 if (!value) 346 goto free_key; 347 348 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 349 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 350 err = bpf_percpu_hash_copy(map, key, value); 351 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 352 err = bpf_percpu_array_copy(map, key, value); 353 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 354 err = bpf_stackmap_copy(map, key, value); 355 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 356 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 357 err = -ENOTSUPP; 358 } else { 359 rcu_read_lock(); 360 ptr = map->ops->map_lookup_elem(map, key); 361 if (ptr) 362 memcpy(value, ptr, value_size); 363 rcu_read_unlock(); 364 err = ptr ? 0 : -ENOENT; 365 } 366 367 if (err) 368 goto free_value; 369 370 err = -EFAULT; 371 if (copy_to_user(uvalue, value, value_size) != 0) 372 goto free_value; 373 374 trace_bpf_map_lookup_elem(map, ufd, key, value); 375 err = 0; 376 377 free_value: 378 kfree(value); 379 free_key: 380 kfree(key); 381 err_put: 382 fdput(f); 383 return err; 384 } 385 386 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 387 388 static int map_update_elem(union bpf_attr *attr) 389 { 390 void __user *ukey = u64_to_user_ptr(attr->key); 391 void __user *uvalue = u64_to_user_ptr(attr->value); 392 int ufd = attr->map_fd; 393 struct bpf_map *map; 394 void *key, *value; 395 u32 value_size; 396 struct fd f; 397 int err; 398 399 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 400 return -EINVAL; 401 402 f = fdget(ufd); 403 map = __bpf_map_get(f); 404 if (IS_ERR(map)) 405 return PTR_ERR(map); 406 407 err = -ENOMEM; 408 key = kmalloc(map->key_size, GFP_USER); 409 if (!key) 410 goto err_put; 411 412 err = -EFAULT; 413 if (copy_from_user(key, ukey, map->key_size) != 0) 414 goto free_key; 415 416 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 417 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 418 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 419 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 420 else 421 value_size = map->value_size; 422 423 err = -ENOMEM; 424 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 425 if (!value) 426 goto free_key; 427 428 err = -EFAULT; 429 if (copy_from_user(value, uvalue, value_size) != 0) 430 goto free_value; 431 432 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 433 * inside bpf map update or delete otherwise deadlocks are possible 434 */ 435 preempt_disable(); 436 __this_cpu_inc(bpf_prog_active); 437 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 438 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 439 err = bpf_percpu_hash_update(map, key, value, attr->flags); 440 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 441 err = bpf_percpu_array_update(map, key, value, attr->flags); 442 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 443 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 444 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 445 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 446 rcu_read_lock(); 447 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 448 attr->flags); 449 rcu_read_unlock(); 450 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 451 rcu_read_lock(); 452 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 453 attr->flags); 454 rcu_read_unlock(); 455 } else { 456 rcu_read_lock(); 457 err = map->ops->map_update_elem(map, key, value, attr->flags); 458 rcu_read_unlock(); 459 } 460 __this_cpu_dec(bpf_prog_active); 461 preempt_enable(); 462 463 if (!err) 464 trace_bpf_map_update_elem(map, ufd, key, value); 465 free_value: 466 kfree(value); 467 free_key: 468 kfree(key); 469 err_put: 470 fdput(f); 471 return err; 472 } 473 474 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 475 476 static int map_delete_elem(union bpf_attr *attr) 477 { 478 void __user *ukey = u64_to_user_ptr(attr->key); 479 int ufd = attr->map_fd; 480 struct bpf_map *map; 481 struct fd f; 482 void *key; 483 int err; 484 485 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 486 return -EINVAL; 487 488 f = fdget(ufd); 489 map = __bpf_map_get(f); 490 if (IS_ERR(map)) 491 return PTR_ERR(map); 492 493 err = -ENOMEM; 494 key = kmalloc(map->key_size, GFP_USER); 495 if (!key) 496 goto err_put; 497 498 err = -EFAULT; 499 if (copy_from_user(key, ukey, map->key_size) != 0) 500 goto free_key; 501 502 preempt_disable(); 503 __this_cpu_inc(bpf_prog_active); 504 rcu_read_lock(); 505 err = map->ops->map_delete_elem(map, key); 506 rcu_read_unlock(); 507 __this_cpu_dec(bpf_prog_active); 508 preempt_enable(); 509 510 if (!err) 511 trace_bpf_map_delete_elem(map, ufd, key); 512 free_key: 513 kfree(key); 514 err_put: 515 fdput(f); 516 return err; 517 } 518 519 /* last field in 'union bpf_attr' used by this command */ 520 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 521 522 static int map_get_next_key(union bpf_attr *attr) 523 { 524 void __user *ukey = u64_to_user_ptr(attr->key); 525 void __user *unext_key = u64_to_user_ptr(attr->next_key); 526 int ufd = attr->map_fd; 527 struct bpf_map *map; 528 void *key, *next_key; 529 struct fd f; 530 int err; 531 532 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 533 return -EINVAL; 534 535 f = fdget(ufd); 536 map = __bpf_map_get(f); 537 if (IS_ERR(map)) 538 return PTR_ERR(map); 539 540 err = -ENOMEM; 541 key = kmalloc(map->key_size, GFP_USER); 542 if (!key) 543 goto err_put; 544 545 err = -EFAULT; 546 if (copy_from_user(key, ukey, map->key_size) != 0) 547 goto free_key; 548 549 err = -ENOMEM; 550 next_key = kmalloc(map->key_size, GFP_USER); 551 if (!next_key) 552 goto free_key; 553 554 rcu_read_lock(); 555 err = map->ops->map_get_next_key(map, key, next_key); 556 rcu_read_unlock(); 557 if (err) 558 goto free_next_key; 559 560 err = -EFAULT; 561 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 562 goto free_next_key; 563 564 trace_bpf_map_next_key(map, ufd, key, next_key); 565 err = 0; 566 567 free_next_key: 568 kfree(next_key); 569 free_key: 570 kfree(key); 571 err_put: 572 fdput(f); 573 return err; 574 } 575 576 static LIST_HEAD(bpf_prog_types); 577 578 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 579 { 580 struct bpf_prog_type_list *tl; 581 582 list_for_each_entry(tl, &bpf_prog_types, list_node) { 583 if (tl->type == type) { 584 prog->aux->ops = tl->ops; 585 prog->type = type; 586 return 0; 587 } 588 } 589 590 return -EINVAL; 591 } 592 593 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 594 { 595 list_add(&tl->list_node, &bpf_prog_types); 596 } 597 598 /* drop refcnt on maps used by eBPF program and free auxilary data */ 599 static void free_used_maps(struct bpf_prog_aux *aux) 600 { 601 int i; 602 603 for (i = 0; i < aux->used_map_cnt; i++) 604 bpf_map_put(aux->used_maps[i]); 605 606 kfree(aux->used_maps); 607 } 608 609 int __bpf_prog_charge(struct user_struct *user, u32 pages) 610 { 611 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 612 unsigned long user_bufs; 613 614 if (user) { 615 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 616 if (user_bufs > memlock_limit) { 617 atomic_long_sub(pages, &user->locked_vm); 618 return -EPERM; 619 } 620 } 621 622 return 0; 623 } 624 625 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 626 { 627 if (user) 628 atomic_long_sub(pages, &user->locked_vm); 629 } 630 631 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 632 { 633 struct user_struct *user = get_current_user(); 634 int ret; 635 636 ret = __bpf_prog_charge(user, prog->pages); 637 if (ret) { 638 free_uid(user); 639 return ret; 640 } 641 642 prog->aux->user = user; 643 return 0; 644 } 645 646 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 647 { 648 struct user_struct *user = prog->aux->user; 649 650 __bpf_prog_uncharge(user, prog->pages); 651 free_uid(user); 652 } 653 654 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 655 { 656 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 657 658 free_used_maps(aux); 659 bpf_prog_uncharge_memlock(aux->prog); 660 bpf_prog_free(aux->prog); 661 } 662 663 void bpf_prog_put(struct bpf_prog *prog) 664 { 665 if (atomic_dec_and_test(&prog->aux->refcnt)) { 666 trace_bpf_prog_put_rcu(prog); 667 bpf_prog_kallsyms_del(prog); 668 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 669 } 670 } 671 EXPORT_SYMBOL_GPL(bpf_prog_put); 672 673 static int bpf_prog_release(struct inode *inode, struct file *filp) 674 { 675 struct bpf_prog *prog = filp->private_data; 676 677 bpf_prog_put(prog); 678 return 0; 679 } 680 681 #ifdef CONFIG_PROC_FS 682 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 683 { 684 const struct bpf_prog *prog = filp->private_data; 685 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 686 687 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 688 seq_printf(m, 689 "prog_type:\t%u\n" 690 "prog_jited:\t%u\n" 691 "prog_tag:\t%s\n" 692 "memlock:\t%llu\n", 693 prog->type, 694 prog->jited, 695 prog_tag, 696 prog->pages * 1ULL << PAGE_SHIFT); 697 } 698 #endif 699 700 static const struct file_operations bpf_prog_fops = { 701 #ifdef CONFIG_PROC_FS 702 .show_fdinfo = bpf_prog_show_fdinfo, 703 #endif 704 .release = bpf_prog_release, 705 }; 706 707 int bpf_prog_new_fd(struct bpf_prog *prog) 708 { 709 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 710 O_RDWR | O_CLOEXEC); 711 } 712 713 static struct bpf_prog *____bpf_prog_get(struct fd f) 714 { 715 if (!f.file) 716 return ERR_PTR(-EBADF); 717 if (f.file->f_op != &bpf_prog_fops) { 718 fdput(f); 719 return ERR_PTR(-EINVAL); 720 } 721 722 return f.file->private_data; 723 } 724 725 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 726 { 727 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 728 atomic_sub(i, &prog->aux->refcnt); 729 return ERR_PTR(-EBUSY); 730 } 731 return prog; 732 } 733 EXPORT_SYMBOL_GPL(bpf_prog_add); 734 735 void bpf_prog_sub(struct bpf_prog *prog, int i) 736 { 737 /* Only to be used for undoing previous bpf_prog_add() in some 738 * error path. We still know that another entity in our call 739 * path holds a reference to the program, thus atomic_sub() can 740 * be safely used in such cases! 741 */ 742 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 743 } 744 EXPORT_SYMBOL_GPL(bpf_prog_sub); 745 746 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 747 { 748 return bpf_prog_add(prog, 1); 749 } 750 EXPORT_SYMBOL_GPL(bpf_prog_inc); 751 752 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 753 { 754 struct fd f = fdget(ufd); 755 struct bpf_prog *prog; 756 757 prog = ____bpf_prog_get(f); 758 if (IS_ERR(prog)) 759 return prog; 760 if (type && prog->type != *type) { 761 prog = ERR_PTR(-EINVAL); 762 goto out; 763 } 764 765 prog = bpf_prog_inc(prog); 766 out: 767 fdput(f); 768 return prog; 769 } 770 771 struct bpf_prog *bpf_prog_get(u32 ufd) 772 { 773 return __bpf_prog_get(ufd, NULL); 774 } 775 776 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 777 { 778 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 779 780 if (!IS_ERR(prog)) 781 trace_bpf_prog_get_type(prog); 782 return prog; 783 } 784 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 785 786 /* last field in 'union bpf_attr' used by this command */ 787 #define BPF_PROG_LOAD_LAST_FIELD kern_version 788 789 static int bpf_prog_load(union bpf_attr *attr) 790 { 791 enum bpf_prog_type type = attr->prog_type; 792 struct bpf_prog *prog; 793 int err; 794 char license[128]; 795 bool is_gpl; 796 797 if (CHECK_ATTR(BPF_PROG_LOAD)) 798 return -EINVAL; 799 800 /* copy eBPF program license from user space */ 801 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 802 sizeof(license) - 1) < 0) 803 return -EFAULT; 804 license[sizeof(license) - 1] = 0; 805 806 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 807 is_gpl = license_is_gpl_compatible(license); 808 809 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 810 return -E2BIG; 811 812 if (type == BPF_PROG_TYPE_KPROBE && 813 attr->kern_version != LINUX_VERSION_CODE) 814 return -EINVAL; 815 816 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 817 return -EPERM; 818 819 /* plain bpf_prog allocation */ 820 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 821 if (!prog) 822 return -ENOMEM; 823 824 err = bpf_prog_charge_memlock(prog); 825 if (err) 826 goto free_prog_nouncharge; 827 828 prog->len = attr->insn_cnt; 829 830 err = -EFAULT; 831 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 832 bpf_prog_insn_size(prog)) != 0) 833 goto free_prog; 834 835 prog->orig_prog = NULL; 836 prog->jited = 0; 837 838 atomic_set(&prog->aux->refcnt, 1); 839 prog->gpl_compatible = is_gpl ? 1 : 0; 840 841 /* find program type: socket_filter vs tracing_filter */ 842 err = find_prog_type(type, prog); 843 if (err < 0) 844 goto free_prog; 845 846 /* run eBPF verifier */ 847 err = bpf_check(&prog, attr); 848 if (err < 0) 849 goto free_used_maps; 850 851 /* eBPF program is ready to be JITed */ 852 prog = bpf_prog_select_runtime(prog, &err); 853 if (err < 0) 854 goto free_used_maps; 855 856 err = bpf_prog_new_fd(prog); 857 if (err < 0) 858 /* failed to allocate fd */ 859 goto free_used_maps; 860 861 bpf_prog_kallsyms_add(prog); 862 trace_bpf_prog_load(prog, err); 863 return err; 864 865 free_used_maps: 866 free_used_maps(prog->aux); 867 free_prog: 868 bpf_prog_uncharge_memlock(prog); 869 free_prog_nouncharge: 870 bpf_prog_free(prog); 871 return err; 872 } 873 874 #define BPF_OBJ_LAST_FIELD bpf_fd 875 876 static int bpf_obj_pin(const union bpf_attr *attr) 877 { 878 if (CHECK_ATTR(BPF_OBJ)) 879 return -EINVAL; 880 881 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 882 } 883 884 static int bpf_obj_get(const union bpf_attr *attr) 885 { 886 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 887 return -EINVAL; 888 889 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 890 } 891 892 #ifdef CONFIG_CGROUP_BPF 893 894 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 895 896 static int bpf_prog_attach(const union bpf_attr *attr) 897 { 898 enum bpf_prog_type ptype; 899 struct bpf_prog *prog; 900 struct cgroup *cgrp; 901 int ret; 902 903 if (!capable(CAP_NET_ADMIN)) 904 return -EPERM; 905 906 if (CHECK_ATTR(BPF_PROG_ATTACH)) 907 return -EINVAL; 908 909 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 910 return -EINVAL; 911 912 switch (attr->attach_type) { 913 case BPF_CGROUP_INET_INGRESS: 914 case BPF_CGROUP_INET_EGRESS: 915 ptype = BPF_PROG_TYPE_CGROUP_SKB; 916 break; 917 case BPF_CGROUP_INET_SOCK_CREATE: 918 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 919 break; 920 default: 921 return -EINVAL; 922 } 923 924 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 925 if (IS_ERR(prog)) 926 return PTR_ERR(prog); 927 928 cgrp = cgroup_get_from_fd(attr->target_fd); 929 if (IS_ERR(cgrp)) { 930 bpf_prog_put(prog); 931 return PTR_ERR(cgrp); 932 } 933 934 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 935 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 936 if (ret) 937 bpf_prog_put(prog); 938 cgroup_put(cgrp); 939 940 return ret; 941 } 942 943 #define BPF_PROG_DETACH_LAST_FIELD attach_type 944 945 static int bpf_prog_detach(const union bpf_attr *attr) 946 { 947 struct cgroup *cgrp; 948 int ret; 949 950 if (!capable(CAP_NET_ADMIN)) 951 return -EPERM; 952 953 if (CHECK_ATTR(BPF_PROG_DETACH)) 954 return -EINVAL; 955 956 switch (attr->attach_type) { 957 case BPF_CGROUP_INET_INGRESS: 958 case BPF_CGROUP_INET_EGRESS: 959 case BPF_CGROUP_INET_SOCK_CREATE: 960 cgrp = cgroup_get_from_fd(attr->target_fd); 961 if (IS_ERR(cgrp)) 962 return PTR_ERR(cgrp); 963 964 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 965 cgroup_put(cgrp); 966 break; 967 968 default: 969 return -EINVAL; 970 } 971 972 return ret; 973 } 974 #endif /* CONFIG_CGROUP_BPF */ 975 976 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 977 978 static int bpf_prog_test_run(const union bpf_attr *attr, 979 union bpf_attr __user *uattr) 980 { 981 struct bpf_prog *prog; 982 int ret = -ENOTSUPP; 983 984 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 985 return -EINVAL; 986 987 prog = bpf_prog_get(attr->test.prog_fd); 988 if (IS_ERR(prog)) 989 return PTR_ERR(prog); 990 991 if (prog->aux->ops->test_run) 992 ret = prog->aux->ops->test_run(prog, attr, uattr); 993 994 bpf_prog_put(prog); 995 return ret; 996 } 997 998 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 999 { 1000 union bpf_attr attr = {}; 1001 int err; 1002 1003 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1004 return -EPERM; 1005 1006 if (!access_ok(VERIFY_READ, uattr, 1)) 1007 return -EFAULT; 1008 1009 if (size > PAGE_SIZE) /* silly large */ 1010 return -E2BIG; 1011 1012 /* If we're handed a bigger struct than we know of, 1013 * ensure all the unknown bits are 0 - i.e. new 1014 * user-space does not rely on any kernel feature 1015 * extensions we dont know about yet. 1016 */ 1017 if (size > sizeof(attr)) { 1018 unsigned char __user *addr; 1019 unsigned char __user *end; 1020 unsigned char val; 1021 1022 addr = (void __user *)uattr + sizeof(attr); 1023 end = (void __user *)uattr + size; 1024 1025 for (; addr < end; addr++) { 1026 err = get_user(val, addr); 1027 if (err) 1028 return err; 1029 if (val) 1030 return -E2BIG; 1031 } 1032 size = sizeof(attr); 1033 } 1034 1035 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1036 if (copy_from_user(&attr, uattr, size) != 0) 1037 return -EFAULT; 1038 1039 switch (cmd) { 1040 case BPF_MAP_CREATE: 1041 err = map_create(&attr); 1042 break; 1043 case BPF_MAP_LOOKUP_ELEM: 1044 err = map_lookup_elem(&attr); 1045 break; 1046 case BPF_MAP_UPDATE_ELEM: 1047 err = map_update_elem(&attr); 1048 break; 1049 case BPF_MAP_DELETE_ELEM: 1050 err = map_delete_elem(&attr); 1051 break; 1052 case BPF_MAP_GET_NEXT_KEY: 1053 err = map_get_next_key(&attr); 1054 break; 1055 case BPF_PROG_LOAD: 1056 err = bpf_prog_load(&attr); 1057 break; 1058 case BPF_OBJ_PIN: 1059 err = bpf_obj_pin(&attr); 1060 break; 1061 case BPF_OBJ_GET: 1062 err = bpf_obj_get(&attr); 1063 break; 1064 #ifdef CONFIG_CGROUP_BPF 1065 case BPF_PROG_ATTACH: 1066 err = bpf_prog_attach(&attr); 1067 break; 1068 case BPF_PROG_DETACH: 1069 err = bpf_prog_detach(&attr); 1070 break; 1071 #endif 1072 case BPF_PROG_TEST_RUN: 1073 err = bpf_prog_test_run(&attr, uattr); 1074 break; 1075 default: 1076 err = -EINVAL; 1077 break; 1078 } 1079 1080 return err; 1081 } 1082