1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 26 DEFINE_PER_CPU(int, bpf_prog_active); 27 28 int sysctl_unprivileged_bpf_disabled __read_mostly; 29 30 static LIST_HEAD(bpf_map_types); 31 32 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 33 { 34 struct bpf_map_type_list *tl; 35 struct bpf_map *map; 36 37 list_for_each_entry(tl, &bpf_map_types, list_node) { 38 if (tl->type == attr->map_type) { 39 map = tl->ops->map_alloc(attr); 40 if (IS_ERR(map)) 41 return map; 42 map->ops = tl->ops; 43 map->map_type = attr->map_type; 44 return map; 45 } 46 } 47 return ERR_PTR(-EINVAL); 48 } 49 50 /* boot time registration of different map implementations */ 51 void bpf_register_map_type(struct bpf_map_type_list *tl) 52 { 53 list_add(&tl->list_node, &bpf_map_types); 54 } 55 56 void *bpf_map_area_alloc(size_t size) 57 { 58 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 59 * trigger under memory pressure as we really just want to 60 * fail instead. 61 */ 62 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 63 void *area; 64 65 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 66 area = kmalloc(size, GFP_USER | flags); 67 if (area != NULL) 68 return area; 69 } 70 71 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, 72 PAGE_KERNEL); 73 } 74 75 void bpf_map_area_free(void *area) 76 { 77 kvfree(area); 78 } 79 80 int bpf_map_precharge_memlock(u32 pages) 81 { 82 struct user_struct *user = get_current_user(); 83 unsigned long memlock_limit, cur; 84 85 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 86 cur = atomic_long_read(&user->locked_vm); 87 free_uid(user); 88 if (cur + pages > memlock_limit) 89 return -EPERM; 90 return 0; 91 } 92 93 static int bpf_map_charge_memlock(struct bpf_map *map) 94 { 95 struct user_struct *user = get_current_user(); 96 unsigned long memlock_limit; 97 98 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 99 100 atomic_long_add(map->pages, &user->locked_vm); 101 102 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 103 atomic_long_sub(map->pages, &user->locked_vm); 104 free_uid(user); 105 return -EPERM; 106 } 107 map->user = user; 108 return 0; 109 } 110 111 static void bpf_map_uncharge_memlock(struct bpf_map *map) 112 { 113 struct user_struct *user = map->user; 114 115 atomic_long_sub(map->pages, &user->locked_vm); 116 free_uid(user); 117 } 118 119 /* called from workqueue */ 120 static void bpf_map_free_deferred(struct work_struct *work) 121 { 122 struct bpf_map *map = container_of(work, struct bpf_map, work); 123 124 bpf_map_uncharge_memlock(map); 125 /* implementation dependent freeing */ 126 map->ops->map_free(map); 127 } 128 129 static void bpf_map_put_uref(struct bpf_map *map) 130 { 131 if (atomic_dec_and_test(&map->usercnt)) { 132 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 133 bpf_fd_array_map_clear(map); 134 } 135 } 136 137 /* decrement map refcnt and schedule it for freeing via workqueue 138 * (unrelying map implementation ops->map_free() might sleep) 139 */ 140 void bpf_map_put(struct bpf_map *map) 141 { 142 if (atomic_dec_and_test(&map->refcnt)) { 143 INIT_WORK(&map->work, bpf_map_free_deferred); 144 schedule_work(&map->work); 145 } 146 } 147 148 void bpf_map_put_with_uref(struct bpf_map *map) 149 { 150 bpf_map_put_uref(map); 151 bpf_map_put(map); 152 } 153 154 static int bpf_map_release(struct inode *inode, struct file *filp) 155 { 156 struct bpf_map *map = filp->private_data; 157 158 if (map->ops->map_release) 159 map->ops->map_release(map, filp); 160 161 bpf_map_put_with_uref(map); 162 return 0; 163 } 164 165 #ifdef CONFIG_PROC_FS 166 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 167 { 168 const struct bpf_map *map = filp->private_data; 169 const struct bpf_array *array; 170 u32 owner_prog_type = 0; 171 172 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 173 array = container_of(map, struct bpf_array, map); 174 owner_prog_type = array->owner_prog_type; 175 } 176 177 seq_printf(m, 178 "map_type:\t%u\n" 179 "key_size:\t%u\n" 180 "value_size:\t%u\n" 181 "max_entries:\t%u\n" 182 "map_flags:\t%#x\n" 183 "memlock:\t%llu\n", 184 map->map_type, 185 map->key_size, 186 map->value_size, 187 map->max_entries, 188 map->map_flags, 189 map->pages * 1ULL << PAGE_SHIFT); 190 191 if (owner_prog_type) 192 seq_printf(m, "owner_prog_type:\t%u\n", 193 owner_prog_type); 194 } 195 #endif 196 197 static const struct file_operations bpf_map_fops = { 198 #ifdef CONFIG_PROC_FS 199 .show_fdinfo = bpf_map_show_fdinfo, 200 #endif 201 .release = bpf_map_release, 202 }; 203 204 int bpf_map_new_fd(struct bpf_map *map) 205 { 206 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 207 O_RDWR | O_CLOEXEC); 208 } 209 210 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 211 #define CHECK_ATTR(CMD) \ 212 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 213 sizeof(attr->CMD##_LAST_FIELD), 0, \ 214 sizeof(*attr) - \ 215 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 216 sizeof(attr->CMD##_LAST_FIELD)) != NULL 217 218 #define BPF_MAP_CREATE_LAST_FIELD map_flags 219 /* called via syscall */ 220 static int map_create(union bpf_attr *attr) 221 { 222 struct bpf_map *map; 223 int err; 224 225 err = CHECK_ATTR(BPF_MAP_CREATE); 226 if (err) 227 return -EINVAL; 228 229 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 230 map = find_and_alloc_map(attr); 231 if (IS_ERR(map)) 232 return PTR_ERR(map); 233 234 atomic_set(&map->refcnt, 1); 235 atomic_set(&map->usercnt, 1); 236 237 err = bpf_map_charge_memlock(map); 238 if (err) 239 goto free_map_nouncharge; 240 241 err = bpf_map_new_fd(map); 242 if (err < 0) 243 /* failed to allocate fd */ 244 goto free_map; 245 246 trace_bpf_map_create(map, err); 247 return err; 248 249 free_map: 250 bpf_map_uncharge_memlock(map); 251 free_map_nouncharge: 252 map->ops->map_free(map); 253 return err; 254 } 255 256 /* if error is returned, fd is released. 257 * On success caller should complete fd access with matching fdput() 258 */ 259 struct bpf_map *__bpf_map_get(struct fd f) 260 { 261 if (!f.file) 262 return ERR_PTR(-EBADF); 263 if (f.file->f_op != &bpf_map_fops) { 264 fdput(f); 265 return ERR_PTR(-EINVAL); 266 } 267 268 return f.file->private_data; 269 } 270 271 /* prog's and map's refcnt limit */ 272 #define BPF_MAX_REFCNT 32768 273 274 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 275 { 276 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 277 atomic_dec(&map->refcnt); 278 return ERR_PTR(-EBUSY); 279 } 280 if (uref) 281 atomic_inc(&map->usercnt); 282 return map; 283 } 284 285 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 286 { 287 struct fd f = fdget(ufd); 288 struct bpf_map *map; 289 290 map = __bpf_map_get(f); 291 if (IS_ERR(map)) 292 return map; 293 294 map = bpf_map_inc(map, true); 295 fdput(f); 296 297 return map; 298 } 299 300 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 301 { 302 return -ENOTSUPP; 303 } 304 305 /* last field in 'union bpf_attr' used by this command */ 306 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 307 308 static int map_lookup_elem(union bpf_attr *attr) 309 { 310 void __user *ukey = u64_to_user_ptr(attr->key); 311 void __user *uvalue = u64_to_user_ptr(attr->value); 312 int ufd = attr->map_fd; 313 struct bpf_map *map; 314 void *key, *value, *ptr; 315 u32 value_size; 316 struct fd f; 317 int err; 318 319 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 320 return -EINVAL; 321 322 f = fdget(ufd); 323 map = __bpf_map_get(f); 324 if (IS_ERR(map)) 325 return PTR_ERR(map); 326 327 err = -ENOMEM; 328 key = kmalloc(map->key_size, GFP_USER); 329 if (!key) 330 goto err_put; 331 332 err = -EFAULT; 333 if (copy_from_user(key, ukey, map->key_size) != 0) 334 goto free_key; 335 336 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 337 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 338 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 339 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 340 else 341 value_size = map->value_size; 342 343 err = -ENOMEM; 344 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 345 if (!value) 346 goto free_key; 347 348 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 349 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 350 err = bpf_percpu_hash_copy(map, key, value); 351 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 352 err = bpf_percpu_array_copy(map, key, value); 353 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 354 err = bpf_stackmap_copy(map, key, value); 355 } else { 356 rcu_read_lock(); 357 ptr = map->ops->map_lookup_elem(map, key); 358 if (ptr) 359 memcpy(value, ptr, value_size); 360 rcu_read_unlock(); 361 err = ptr ? 0 : -ENOENT; 362 } 363 364 if (err) 365 goto free_value; 366 367 err = -EFAULT; 368 if (copy_to_user(uvalue, value, value_size) != 0) 369 goto free_value; 370 371 trace_bpf_map_lookup_elem(map, ufd, key, value); 372 err = 0; 373 374 free_value: 375 kfree(value); 376 free_key: 377 kfree(key); 378 err_put: 379 fdput(f); 380 return err; 381 } 382 383 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 384 385 static int map_update_elem(union bpf_attr *attr) 386 { 387 void __user *ukey = u64_to_user_ptr(attr->key); 388 void __user *uvalue = u64_to_user_ptr(attr->value); 389 int ufd = attr->map_fd; 390 struct bpf_map *map; 391 void *key, *value; 392 u32 value_size; 393 struct fd f; 394 int err; 395 396 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 397 return -EINVAL; 398 399 f = fdget(ufd); 400 map = __bpf_map_get(f); 401 if (IS_ERR(map)) 402 return PTR_ERR(map); 403 404 err = -ENOMEM; 405 key = kmalloc(map->key_size, GFP_USER); 406 if (!key) 407 goto err_put; 408 409 err = -EFAULT; 410 if (copy_from_user(key, ukey, map->key_size) != 0) 411 goto free_key; 412 413 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 414 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 415 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 416 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 417 else 418 value_size = map->value_size; 419 420 err = -ENOMEM; 421 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 422 if (!value) 423 goto free_key; 424 425 err = -EFAULT; 426 if (copy_from_user(value, uvalue, value_size) != 0) 427 goto free_value; 428 429 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 430 * inside bpf map update or delete otherwise deadlocks are possible 431 */ 432 preempt_disable(); 433 __this_cpu_inc(bpf_prog_active); 434 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 435 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 436 err = bpf_percpu_hash_update(map, key, value, attr->flags); 437 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 438 err = bpf_percpu_array_update(map, key, value, attr->flags); 439 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 440 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 441 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { 442 rcu_read_lock(); 443 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 444 attr->flags); 445 rcu_read_unlock(); 446 } else { 447 rcu_read_lock(); 448 err = map->ops->map_update_elem(map, key, value, attr->flags); 449 rcu_read_unlock(); 450 } 451 __this_cpu_dec(bpf_prog_active); 452 preempt_enable(); 453 454 if (!err) 455 trace_bpf_map_update_elem(map, ufd, key, value); 456 free_value: 457 kfree(value); 458 free_key: 459 kfree(key); 460 err_put: 461 fdput(f); 462 return err; 463 } 464 465 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 466 467 static int map_delete_elem(union bpf_attr *attr) 468 { 469 void __user *ukey = u64_to_user_ptr(attr->key); 470 int ufd = attr->map_fd; 471 struct bpf_map *map; 472 struct fd f; 473 void *key; 474 int err; 475 476 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 477 return -EINVAL; 478 479 f = fdget(ufd); 480 map = __bpf_map_get(f); 481 if (IS_ERR(map)) 482 return PTR_ERR(map); 483 484 err = -ENOMEM; 485 key = kmalloc(map->key_size, GFP_USER); 486 if (!key) 487 goto err_put; 488 489 err = -EFAULT; 490 if (copy_from_user(key, ukey, map->key_size) != 0) 491 goto free_key; 492 493 preempt_disable(); 494 __this_cpu_inc(bpf_prog_active); 495 rcu_read_lock(); 496 err = map->ops->map_delete_elem(map, key); 497 rcu_read_unlock(); 498 __this_cpu_dec(bpf_prog_active); 499 preempt_enable(); 500 501 if (!err) 502 trace_bpf_map_delete_elem(map, ufd, key); 503 free_key: 504 kfree(key); 505 err_put: 506 fdput(f); 507 return err; 508 } 509 510 /* last field in 'union bpf_attr' used by this command */ 511 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 512 513 static int map_get_next_key(union bpf_attr *attr) 514 { 515 void __user *ukey = u64_to_user_ptr(attr->key); 516 void __user *unext_key = u64_to_user_ptr(attr->next_key); 517 int ufd = attr->map_fd; 518 struct bpf_map *map; 519 void *key, *next_key; 520 struct fd f; 521 int err; 522 523 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 524 return -EINVAL; 525 526 f = fdget(ufd); 527 map = __bpf_map_get(f); 528 if (IS_ERR(map)) 529 return PTR_ERR(map); 530 531 err = -ENOMEM; 532 key = kmalloc(map->key_size, GFP_USER); 533 if (!key) 534 goto err_put; 535 536 err = -EFAULT; 537 if (copy_from_user(key, ukey, map->key_size) != 0) 538 goto free_key; 539 540 err = -ENOMEM; 541 next_key = kmalloc(map->key_size, GFP_USER); 542 if (!next_key) 543 goto free_key; 544 545 rcu_read_lock(); 546 err = map->ops->map_get_next_key(map, key, next_key); 547 rcu_read_unlock(); 548 if (err) 549 goto free_next_key; 550 551 err = -EFAULT; 552 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 553 goto free_next_key; 554 555 trace_bpf_map_next_key(map, ufd, key, next_key); 556 err = 0; 557 558 free_next_key: 559 kfree(next_key); 560 free_key: 561 kfree(key); 562 err_put: 563 fdput(f); 564 return err; 565 } 566 567 static LIST_HEAD(bpf_prog_types); 568 569 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 570 { 571 struct bpf_prog_type_list *tl; 572 573 list_for_each_entry(tl, &bpf_prog_types, list_node) { 574 if (tl->type == type) { 575 prog->aux->ops = tl->ops; 576 prog->type = type; 577 return 0; 578 } 579 } 580 581 return -EINVAL; 582 } 583 584 void bpf_register_prog_type(struct bpf_prog_type_list *tl) 585 { 586 list_add(&tl->list_node, &bpf_prog_types); 587 } 588 589 /* fixup insn->imm field of bpf_call instructions: 590 * if (insn->imm == BPF_FUNC_map_lookup_elem) 591 * insn->imm = bpf_map_lookup_elem - __bpf_call_base; 592 * else if (insn->imm == BPF_FUNC_map_update_elem) 593 * insn->imm = bpf_map_update_elem - __bpf_call_base; 594 * else ... 595 * 596 * this function is called after eBPF program passed verification 597 */ 598 static void fixup_bpf_calls(struct bpf_prog *prog) 599 { 600 const struct bpf_func_proto *fn; 601 int i; 602 603 for (i = 0; i < prog->len; i++) { 604 struct bpf_insn *insn = &prog->insnsi[i]; 605 606 if (insn->code == (BPF_JMP | BPF_CALL)) { 607 /* we reach here when program has bpf_call instructions 608 * and it passed bpf_check(), means that 609 * ops->get_func_proto must have been supplied, check it 610 */ 611 BUG_ON(!prog->aux->ops->get_func_proto); 612 613 if (insn->imm == BPF_FUNC_get_route_realm) 614 prog->dst_needed = 1; 615 if (insn->imm == BPF_FUNC_get_prandom_u32) 616 bpf_user_rnd_init_once(); 617 if (insn->imm == BPF_FUNC_xdp_adjust_head) 618 prog->xdp_adjust_head = 1; 619 if (insn->imm == BPF_FUNC_tail_call) { 620 /* mark bpf_tail_call as different opcode 621 * to avoid conditional branch in 622 * interpeter for every normal call 623 * and to prevent accidental JITing by 624 * JIT compiler that doesn't support 625 * bpf_tail_call yet 626 */ 627 insn->imm = 0; 628 insn->code |= BPF_X; 629 continue; 630 } 631 632 fn = prog->aux->ops->get_func_proto(insn->imm); 633 /* all functions that have prototype and verifier allowed 634 * programs to call them, must be real in-kernel functions 635 */ 636 BUG_ON(!fn->func); 637 insn->imm = fn->func - __bpf_call_base; 638 } 639 } 640 } 641 642 /* drop refcnt on maps used by eBPF program and free auxilary data */ 643 static void free_used_maps(struct bpf_prog_aux *aux) 644 { 645 int i; 646 647 for (i = 0; i < aux->used_map_cnt; i++) 648 bpf_map_put(aux->used_maps[i]); 649 650 kfree(aux->used_maps); 651 } 652 653 int __bpf_prog_charge(struct user_struct *user, u32 pages) 654 { 655 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 656 unsigned long user_bufs; 657 658 if (user) { 659 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 660 if (user_bufs > memlock_limit) { 661 atomic_long_sub(pages, &user->locked_vm); 662 return -EPERM; 663 } 664 } 665 666 return 0; 667 } 668 669 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 670 { 671 if (user) 672 atomic_long_sub(pages, &user->locked_vm); 673 } 674 675 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 676 { 677 struct user_struct *user = get_current_user(); 678 int ret; 679 680 ret = __bpf_prog_charge(user, prog->pages); 681 if (ret) { 682 free_uid(user); 683 return ret; 684 } 685 686 prog->aux->user = user; 687 return 0; 688 } 689 690 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 691 { 692 struct user_struct *user = prog->aux->user; 693 694 __bpf_prog_uncharge(user, prog->pages); 695 free_uid(user); 696 } 697 698 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 699 { 700 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 701 702 free_used_maps(aux); 703 bpf_prog_uncharge_memlock(aux->prog); 704 bpf_prog_free(aux->prog); 705 } 706 707 void bpf_prog_put(struct bpf_prog *prog) 708 { 709 if (atomic_dec_and_test(&prog->aux->refcnt)) { 710 trace_bpf_prog_put_rcu(prog); 711 bpf_prog_kallsyms_del(prog); 712 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 713 } 714 } 715 EXPORT_SYMBOL_GPL(bpf_prog_put); 716 717 static int bpf_prog_release(struct inode *inode, struct file *filp) 718 { 719 struct bpf_prog *prog = filp->private_data; 720 721 bpf_prog_put(prog); 722 return 0; 723 } 724 725 #ifdef CONFIG_PROC_FS 726 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 727 { 728 const struct bpf_prog *prog = filp->private_data; 729 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 730 731 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 732 seq_printf(m, 733 "prog_type:\t%u\n" 734 "prog_jited:\t%u\n" 735 "prog_tag:\t%s\n" 736 "memlock:\t%llu\n", 737 prog->type, 738 prog->jited, 739 prog_tag, 740 prog->pages * 1ULL << PAGE_SHIFT); 741 } 742 #endif 743 744 static const struct file_operations bpf_prog_fops = { 745 #ifdef CONFIG_PROC_FS 746 .show_fdinfo = bpf_prog_show_fdinfo, 747 #endif 748 .release = bpf_prog_release, 749 }; 750 751 int bpf_prog_new_fd(struct bpf_prog *prog) 752 { 753 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 754 O_RDWR | O_CLOEXEC); 755 } 756 757 static struct bpf_prog *____bpf_prog_get(struct fd f) 758 { 759 if (!f.file) 760 return ERR_PTR(-EBADF); 761 if (f.file->f_op != &bpf_prog_fops) { 762 fdput(f); 763 return ERR_PTR(-EINVAL); 764 } 765 766 return f.file->private_data; 767 } 768 769 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 770 { 771 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 772 atomic_sub(i, &prog->aux->refcnt); 773 return ERR_PTR(-EBUSY); 774 } 775 return prog; 776 } 777 EXPORT_SYMBOL_GPL(bpf_prog_add); 778 779 void bpf_prog_sub(struct bpf_prog *prog, int i) 780 { 781 /* Only to be used for undoing previous bpf_prog_add() in some 782 * error path. We still know that another entity in our call 783 * path holds a reference to the program, thus atomic_sub() can 784 * be safely used in such cases! 785 */ 786 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 787 } 788 EXPORT_SYMBOL_GPL(bpf_prog_sub); 789 790 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 791 { 792 return bpf_prog_add(prog, 1); 793 } 794 EXPORT_SYMBOL_GPL(bpf_prog_inc); 795 796 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 797 { 798 struct fd f = fdget(ufd); 799 struct bpf_prog *prog; 800 801 prog = ____bpf_prog_get(f); 802 if (IS_ERR(prog)) 803 return prog; 804 if (type && prog->type != *type) { 805 prog = ERR_PTR(-EINVAL); 806 goto out; 807 } 808 809 prog = bpf_prog_inc(prog); 810 out: 811 fdput(f); 812 return prog; 813 } 814 815 struct bpf_prog *bpf_prog_get(u32 ufd) 816 { 817 return __bpf_prog_get(ufd, NULL); 818 } 819 820 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 821 { 822 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 823 824 if (!IS_ERR(prog)) 825 trace_bpf_prog_get_type(prog); 826 return prog; 827 } 828 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 829 830 /* last field in 'union bpf_attr' used by this command */ 831 #define BPF_PROG_LOAD_LAST_FIELD kern_version 832 833 static int bpf_prog_load(union bpf_attr *attr) 834 { 835 enum bpf_prog_type type = attr->prog_type; 836 struct bpf_prog *prog; 837 int err; 838 char license[128]; 839 bool is_gpl; 840 841 if (CHECK_ATTR(BPF_PROG_LOAD)) 842 return -EINVAL; 843 844 /* copy eBPF program license from user space */ 845 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 846 sizeof(license) - 1) < 0) 847 return -EFAULT; 848 license[sizeof(license) - 1] = 0; 849 850 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 851 is_gpl = license_is_gpl_compatible(license); 852 853 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 854 return -E2BIG; 855 856 if (type == BPF_PROG_TYPE_KPROBE && 857 attr->kern_version != LINUX_VERSION_CODE) 858 return -EINVAL; 859 860 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 861 return -EPERM; 862 863 /* plain bpf_prog allocation */ 864 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 865 if (!prog) 866 return -ENOMEM; 867 868 err = bpf_prog_charge_memlock(prog); 869 if (err) 870 goto free_prog_nouncharge; 871 872 prog->len = attr->insn_cnt; 873 874 err = -EFAULT; 875 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 876 bpf_prog_insn_size(prog)) != 0) 877 goto free_prog; 878 879 prog->orig_prog = NULL; 880 prog->jited = 0; 881 882 atomic_set(&prog->aux->refcnt, 1); 883 prog->gpl_compatible = is_gpl ? 1 : 0; 884 885 /* find program type: socket_filter vs tracing_filter */ 886 err = find_prog_type(type, prog); 887 if (err < 0) 888 goto free_prog; 889 890 /* run eBPF verifier */ 891 err = bpf_check(&prog, attr); 892 if (err < 0) 893 goto free_used_maps; 894 895 /* fixup BPF_CALL->imm field */ 896 fixup_bpf_calls(prog); 897 898 /* eBPF program is ready to be JITed */ 899 prog = bpf_prog_select_runtime(prog, &err); 900 if (err < 0) 901 goto free_used_maps; 902 903 err = bpf_prog_new_fd(prog); 904 if (err < 0) 905 /* failed to allocate fd */ 906 goto free_used_maps; 907 908 bpf_prog_kallsyms_add(prog); 909 trace_bpf_prog_load(prog, err); 910 return err; 911 912 free_used_maps: 913 free_used_maps(prog->aux); 914 free_prog: 915 bpf_prog_uncharge_memlock(prog); 916 free_prog_nouncharge: 917 bpf_prog_free(prog); 918 return err; 919 } 920 921 #define BPF_OBJ_LAST_FIELD bpf_fd 922 923 static int bpf_obj_pin(const union bpf_attr *attr) 924 { 925 if (CHECK_ATTR(BPF_OBJ)) 926 return -EINVAL; 927 928 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 929 } 930 931 static int bpf_obj_get(const union bpf_attr *attr) 932 { 933 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 934 return -EINVAL; 935 936 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 937 } 938 939 #ifdef CONFIG_CGROUP_BPF 940 941 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 942 943 static int bpf_prog_attach(const union bpf_attr *attr) 944 { 945 enum bpf_prog_type ptype; 946 struct bpf_prog *prog; 947 struct cgroup *cgrp; 948 int ret; 949 950 if (!capable(CAP_NET_ADMIN)) 951 return -EPERM; 952 953 if (CHECK_ATTR(BPF_PROG_ATTACH)) 954 return -EINVAL; 955 956 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 957 return -EINVAL; 958 959 switch (attr->attach_type) { 960 case BPF_CGROUP_INET_INGRESS: 961 case BPF_CGROUP_INET_EGRESS: 962 ptype = BPF_PROG_TYPE_CGROUP_SKB; 963 break; 964 case BPF_CGROUP_INET_SOCK_CREATE: 965 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 966 break; 967 default: 968 return -EINVAL; 969 } 970 971 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 972 if (IS_ERR(prog)) 973 return PTR_ERR(prog); 974 975 cgrp = cgroup_get_from_fd(attr->target_fd); 976 if (IS_ERR(cgrp)) { 977 bpf_prog_put(prog); 978 return PTR_ERR(cgrp); 979 } 980 981 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 982 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 983 if (ret) 984 bpf_prog_put(prog); 985 cgroup_put(cgrp); 986 987 return ret; 988 } 989 990 #define BPF_PROG_DETACH_LAST_FIELD attach_type 991 992 static int bpf_prog_detach(const union bpf_attr *attr) 993 { 994 struct cgroup *cgrp; 995 int ret; 996 997 if (!capable(CAP_NET_ADMIN)) 998 return -EPERM; 999 1000 if (CHECK_ATTR(BPF_PROG_DETACH)) 1001 return -EINVAL; 1002 1003 switch (attr->attach_type) { 1004 case BPF_CGROUP_INET_INGRESS: 1005 case BPF_CGROUP_INET_EGRESS: 1006 case BPF_CGROUP_INET_SOCK_CREATE: 1007 cgrp = cgroup_get_from_fd(attr->target_fd); 1008 if (IS_ERR(cgrp)) 1009 return PTR_ERR(cgrp); 1010 1011 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 1012 cgroup_put(cgrp); 1013 break; 1014 1015 default: 1016 return -EINVAL; 1017 } 1018 1019 return ret; 1020 } 1021 #endif /* CONFIG_CGROUP_BPF */ 1022 1023 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1024 { 1025 union bpf_attr attr = {}; 1026 int err; 1027 1028 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1029 return -EPERM; 1030 1031 if (!access_ok(VERIFY_READ, uattr, 1)) 1032 return -EFAULT; 1033 1034 if (size > PAGE_SIZE) /* silly large */ 1035 return -E2BIG; 1036 1037 /* If we're handed a bigger struct than we know of, 1038 * ensure all the unknown bits are 0 - i.e. new 1039 * user-space does not rely on any kernel feature 1040 * extensions we dont know about yet. 1041 */ 1042 if (size > sizeof(attr)) { 1043 unsigned char __user *addr; 1044 unsigned char __user *end; 1045 unsigned char val; 1046 1047 addr = (void __user *)uattr + sizeof(attr); 1048 end = (void __user *)uattr + size; 1049 1050 for (; addr < end; addr++) { 1051 err = get_user(val, addr); 1052 if (err) 1053 return err; 1054 if (val) 1055 return -E2BIG; 1056 } 1057 size = sizeof(attr); 1058 } 1059 1060 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1061 if (copy_from_user(&attr, uattr, size) != 0) 1062 return -EFAULT; 1063 1064 switch (cmd) { 1065 case BPF_MAP_CREATE: 1066 err = map_create(&attr); 1067 break; 1068 case BPF_MAP_LOOKUP_ELEM: 1069 err = map_lookup_elem(&attr); 1070 break; 1071 case BPF_MAP_UPDATE_ELEM: 1072 err = map_update_elem(&attr); 1073 break; 1074 case BPF_MAP_DELETE_ELEM: 1075 err = map_delete_elem(&attr); 1076 break; 1077 case BPF_MAP_GET_NEXT_KEY: 1078 err = map_get_next_key(&attr); 1079 break; 1080 case BPF_PROG_LOAD: 1081 err = bpf_prog_load(&attr); 1082 break; 1083 case BPF_OBJ_PIN: 1084 err = bpf_obj_pin(&attr); 1085 break; 1086 case BPF_OBJ_GET: 1087 err = bpf_obj_get(&attr); 1088 break; 1089 1090 #ifdef CONFIG_CGROUP_BPF 1091 case BPF_PROG_ATTACH: 1092 err = bpf_prog_attach(&attr); 1093 break; 1094 case BPF_PROG_DETACH: 1095 err = bpf_prog_detach(&attr); 1096 break; 1097 #endif 1098 1099 default: 1100 err = -EINVAL; 1101 break; 1102 } 1103 1104 return err; 1105 } 1106