1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 26 DEFINE_PER_CPU(int, bpf_prog_active); 27 28 int sysctl_unprivileged_bpf_disabled __read_mostly; 29 30 static const struct bpf_map_ops * const bpf_map_types[] = { 31 #define BPF_PROG_TYPE(_id, _ops) 32 #define BPF_MAP_TYPE(_id, _ops) \ 33 [_id] = &_ops, 34 #include <linux/bpf_types.h> 35 #undef BPF_PROG_TYPE 36 #undef BPF_MAP_TYPE 37 }; 38 39 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 40 { 41 struct bpf_map *map; 42 43 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 44 !bpf_map_types[attr->map_type]) 45 return ERR_PTR(-EINVAL); 46 47 map = bpf_map_types[attr->map_type]->map_alloc(attr); 48 if (IS_ERR(map)) 49 return map; 50 map->ops = bpf_map_types[attr->map_type]; 51 map->map_type = attr->map_type; 52 return map; 53 } 54 55 void *bpf_map_area_alloc(size_t size) 56 { 57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 58 * trigger under memory pressure as we really just want to 59 * fail instead. 60 */ 61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 62 void *area; 63 64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 65 area = kmalloc(size, GFP_USER | flags); 66 if (area != NULL) 67 return area; 68 } 69 70 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, 71 PAGE_KERNEL); 72 } 73 74 void bpf_map_area_free(void *area) 75 { 76 kvfree(area); 77 } 78 79 int bpf_map_precharge_memlock(u32 pages) 80 { 81 struct user_struct *user = get_current_user(); 82 unsigned long memlock_limit, cur; 83 84 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 85 cur = atomic_long_read(&user->locked_vm); 86 free_uid(user); 87 if (cur + pages > memlock_limit) 88 return -EPERM; 89 return 0; 90 } 91 92 static int bpf_map_charge_memlock(struct bpf_map *map) 93 { 94 struct user_struct *user = get_current_user(); 95 unsigned long memlock_limit; 96 97 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 98 99 atomic_long_add(map->pages, &user->locked_vm); 100 101 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 102 atomic_long_sub(map->pages, &user->locked_vm); 103 free_uid(user); 104 return -EPERM; 105 } 106 map->user = user; 107 return 0; 108 } 109 110 static void bpf_map_uncharge_memlock(struct bpf_map *map) 111 { 112 struct user_struct *user = map->user; 113 114 atomic_long_sub(map->pages, &user->locked_vm); 115 free_uid(user); 116 } 117 118 /* called from workqueue */ 119 static void bpf_map_free_deferred(struct work_struct *work) 120 { 121 struct bpf_map *map = container_of(work, struct bpf_map, work); 122 123 bpf_map_uncharge_memlock(map); 124 /* implementation dependent freeing */ 125 map->ops->map_free(map); 126 } 127 128 static void bpf_map_put_uref(struct bpf_map *map) 129 { 130 if (atomic_dec_and_test(&map->usercnt)) { 131 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 132 bpf_fd_array_map_clear(map); 133 } 134 } 135 136 /* decrement map refcnt and schedule it for freeing via workqueue 137 * (unrelying map implementation ops->map_free() might sleep) 138 */ 139 void bpf_map_put(struct bpf_map *map) 140 { 141 if (atomic_dec_and_test(&map->refcnt)) { 142 INIT_WORK(&map->work, bpf_map_free_deferred); 143 schedule_work(&map->work); 144 } 145 } 146 147 void bpf_map_put_with_uref(struct bpf_map *map) 148 { 149 bpf_map_put_uref(map); 150 bpf_map_put(map); 151 } 152 153 static int bpf_map_release(struct inode *inode, struct file *filp) 154 { 155 struct bpf_map *map = filp->private_data; 156 157 if (map->ops->map_release) 158 map->ops->map_release(map, filp); 159 160 bpf_map_put_with_uref(map); 161 return 0; 162 } 163 164 #ifdef CONFIG_PROC_FS 165 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 166 { 167 const struct bpf_map *map = filp->private_data; 168 const struct bpf_array *array; 169 u32 owner_prog_type = 0; 170 171 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 172 array = container_of(map, struct bpf_array, map); 173 owner_prog_type = array->owner_prog_type; 174 } 175 176 seq_printf(m, 177 "map_type:\t%u\n" 178 "key_size:\t%u\n" 179 "value_size:\t%u\n" 180 "max_entries:\t%u\n" 181 "map_flags:\t%#x\n" 182 "memlock:\t%llu\n", 183 map->map_type, 184 map->key_size, 185 map->value_size, 186 map->max_entries, 187 map->map_flags, 188 map->pages * 1ULL << PAGE_SHIFT); 189 190 if (owner_prog_type) 191 seq_printf(m, "owner_prog_type:\t%u\n", 192 owner_prog_type); 193 } 194 #endif 195 196 static const struct file_operations bpf_map_fops = { 197 #ifdef CONFIG_PROC_FS 198 .show_fdinfo = bpf_map_show_fdinfo, 199 #endif 200 .release = bpf_map_release, 201 }; 202 203 int bpf_map_new_fd(struct bpf_map *map) 204 { 205 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 206 O_RDWR | O_CLOEXEC); 207 } 208 209 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 210 #define CHECK_ATTR(CMD) \ 211 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 212 sizeof(attr->CMD##_LAST_FIELD), 0, \ 213 sizeof(*attr) - \ 214 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 215 sizeof(attr->CMD##_LAST_FIELD)) != NULL 216 217 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd 218 /* called via syscall */ 219 static int map_create(union bpf_attr *attr) 220 { 221 struct bpf_map *map; 222 int err; 223 224 err = CHECK_ATTR(BPF_MAP_CREATE); 225 if (err) 226 return -EINVAL; 227 228 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 229 map = find_and_alloc_map(attr); 230 if (IS_ERR(map)) 231 return PTR_ERR(map); 232 233 atomic_set(&map->refcnt, 1); 234 atomic_set(&map->usercnt, 1); 235 236 err = bpf_map_charge_memlock(map); 237 if (err) 238 goto free_map_nouncharge; 239 240 err = bpf_map_new_fd(map); 241 if (err < 0) 242 /* failed to allocate fd */ 243 goto free_map; 244 245 trace_bpf_map_create(map, err); 246 return err; 247 248 free_map: 249 bpf_map_uncharge_memlock(map); 250 free_map_nouncharge: 251 map->ops->map_free(map); 252 return err; 253 } 254 255 /* if error is returned, fd is released. 256 * On success caller should complete fd access with matching fdput() 257 */ 258 struct bpf_map *__bpf_map_get(struct fd f) 259 { 260 if (!f.file) 261 return ERR_PTR(-EBADF); 262 if (f.file->f_op != &bpf_map_fops) { 263 fdput(f); 264 return ERR_PTR(-EINVAL); 265 } 266 267 return f.file->private_data; 268 } 269 270 /* prog's and map's refcnt limit */ 271 #define BPF_MAX_REFCNT 32768 272 273 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 274 { 275 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 276 atomic_dec(&map->refcnt); 277 return ERR_PTR(-EBUSY); 278 } 279 if (uref) 280 atomic_inc(&map->usercnt); 281 return map; 282 } 283 284 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 285 { 286 struct fd f = fdget(ufd); 287 struct bpf_map *map; 288 289 map = __bpf_map_get(f); 290 if (IS_ERR(map)) 291 return map; 292 293 map = bpf_map_inc(map, true); 294 fdput(f); 295 296 return map; 297 } 298 299 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 300 { 301 return -ENOTSUPP; 302 } 303 304 /* last field in 'union bpf_attr' used by this command */ 305 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 306 307 static int map_lookup_elem(union bpf_attr *attr) 308 { 309 void __user *ukey = u64_to_user_ptr(attr->key); 310 void __user *uvalue = u64_to_user_ptr(attr->value); 311 int ufd = attr->map_fd; 312 struct bpf_map *map; 313 void *key, *value, *ptr; 314 u32 value_size; 315 struct fd f; 316 int err; 317 318 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 319 return -EINVAL; 320 321 f = fdget(ufd); 322 map = __bpf_map_get(f); 323 if (IS_ERR(map)) 324 return PTR_ERR(map); 325 326 err = -ENOMEM; 327 key = kmalloc(map->key_size, GFP_USER); 328 if (!key) 329 goto err_put; 330 331 err = -EFAULT; 332 if (copy_from_user(key, ukey, map->key_size) != 0) 333 goto free_key; 334 335 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 336 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 337 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 338 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 339 else 340 value_size = map->value_size; 341 342 err = -ENOMEM; 343 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 344 if (!value) 345 goto free_key; 346 347 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 348 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 349 err = bpf_percpu_hash_copy(map, key, value); 350 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 351 err = bpf_percpu_array_copy(map, key, value); 352 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 353 err = bpf_stackmap_copy(map, key, value); 354 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 355 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 356 err = -ENOTSUPP; 357 } else { 358 rcu_read_lock(); 359 ptr = map->ops->map_lookup_elem(map, key); 360 if (ptr) 361 memcpy(value, ptr, value_size); 362 rcu_read_unlock(); 363 err = ptr ? 0 : -ENOENT; 364 } 365 366 if (err) 367 goto free_value; 368 369 err = -EFAULT; 370 if (copy_to_user(uvalue, value, value_size) != 0) 371 goto free_value; 372 373 trace_bpf_map_lookup_elem(map, ufd, key, value); 374 err = 0; 375 376 free_value: 377 kfree(value); 378 free_key: 379 kfree(key); 380 err_put: 381 fdput(f); 382 return err; 383 } 384 385 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 386 387 static int map_update_elem(union bpf_attr *attr) 388 { 389 void __user *ukey = u64_to_user_ptr(attr->key); 390 void __user *uvalue = u64_to_user_ptr(attr->value); 391 int ufd = attr->map_fd; 392 struct bpf_map *map; 393 void *key, *value; 394 u32 value_size; 395 struct fd f; 396 int err; 397 398 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 399 return -EINVAL; 400 401 f = fdget(ufd); 402 map = __bpf_map_get(f); 403 if (IS_ERR(map)) 404 return PTR_ERR(map); 405 406 err = -ENOMEM; 407 key = kmalloc(map->key_size, GFP_USER); 408 if (!key) 409 goto err_put; 410 411 err = -EFAULT; 412 if (copy_from_user(key, ukey, map->key_size) != 0) 413 goto free_key; 414 415 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 416 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 417 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 418 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 419 else 420 value_size = map->value_size; 421 422 err = -ENOMEM; 423 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 424 if (!value) 425 goto free_key; 426 427 err = -EFAULT; 428 if (copy_from_user(value, uvalue, value_size) != 0) 429 goto free_value; 430 431 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 432 * inside bpf map update or delete otherwise deadlocks are possible 433 */ 434 preempt_disable(); 435 __this_cpu_inc(bpf_prog_active); 436 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 437 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 438 err = bpf_percpu_hash_update(map, key, value, attr->flags); 439 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 440 err = bpf_percpu_array_update(map, key, value, attr->flags); 441 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 442 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 443 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 444 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 445 rcu_read_lock(); 446 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 447 attr->flags); 448 rcu_read_unlock(); 449 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 450 rcu_read_lock(); 451 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 452 attr->flags); 453 rcu_read_unlock(); 454 } else { 455 rcu_read_lock(); 456 err = map->ops->map_update_elem(map, key, value, attr->flags); 457 rcu_read_unlock(); 458 } 459 __this_cpu_dec(bpf_prog_active); 460 preempt_enable(); 461 462 if (!err) 463 trace_bpf_map_update_elem(map, ufd, key, value); 464 free_value: 465 kfree(value); 466 free_key: 467 kfree(key); 468 err_put: 469 fdput(f); 470 return err; 471 } 472 473 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 474 475 static int map_delete_elem(union bpf_attr *attr) 476 { 477 void __user *ukey = u64_to_user_ptr(attr->key); 478 int ufd = attr->map_fd; 479 struct bpf_map *map; 480 struct fd f; 481 void *key; 482 int err; 483 484 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 485 return -EINVAL; 486 487 f = fdget(ufd); 488 map = __bpf_map_get(f); 489 if (IS_ERR(map)) 490 return PTR_ERR(map); 491 492 err = -ENOMEM; 493 key = kmalloc(map->key_size, GFP_USER); 494 if (!key) 495 goto err_put; 496 497 err = -EFAULT; 498 if (copy_from_user(key, ukey, map->key_size) != 0) 499 goto free_key; 500 501 preempt_disable(); 502 __this_cpu_inc(bpf_prog_active); 503 rcu_read_lock(); 504 err = map->ops->map_delete_elem(map, key); 505 rcu_read_unlock(); 506 __this_cpu_dec(bpf_prog_active); 507 preempt_enable(); 508 509 if (!err) 510 trace_bpf_map_delete_elem(map, ufd, key); 511 free_key: 512 kfree(key); 513 err_put: 514 fdput(f); 515 return err; 516 } 517 518 /* last field in 'union bpf_attr' used by this command */ 519 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 520 521 static int map_get_next_key(union bpf_attr *attr) 522 { 523 void __user *ukey = u64_to_user_ptr(attr->key); 524 void __user *unext_key = u64_to_user_ptr(attr->next_key); 525 int ufd = attr->map_fd; 526 struct bpf_map *map; 527 void *key, *next_key; 528 struct fd f; 529 int err; 530 531 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 532 return -EINVAL; 533 534 f = fdget(ufd); 535 map = __bpf_map_get(f); 536 if (IS_ERR(map)) 537 return PTR_ERR(map); 538 539 err = -ENOMEM; 540 key = kmalloc(map->key_size, GFP_USER); 541 if (!key) 542 goto err_put; 543 544 err = -EFAULT; 545 if (copy_from_user(key, ukey, map->key_size) != 0) 546 goto free_key; 547 548 err = -ENOMEM; 549 next_key = kmalloc(map->key_size, GFP_USER); 550 if (!next_key) 551 goto free_key; 552 553 rcu_read_lock(); 554 err = map->ops->map_get_next_key(map, key, next_key); 555 rcu_read_unlock(); 556 if (err) 557 goto free_next_key; 558 559 err = -EFAULT; 560 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 561 goto free_next_key; 562 563 trace_bpf_map_next_key(map, ufd, key, next_key); 564 err = 0; 565 566 free_next_key: 567 kfree(next_key); 568 free_key: 569 kfree(key); 570 err_put: 571 fdput(f); 572 return err; 573 } 574 575 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 576 #define BPF_PROG_TYPE(_id, _ops) \ 577 [_id] = &_ops, 578 #define BPF_MAP_TYPE(_id, _ops) 579 #include <linux/bpf_types.h> 580 #undef BPF_PROG_TYPE 581 #undef BPF_MAP_TYPE 582 }; 583 584 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 585 { 586 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 587 return -EINVAL; 588 589 prog->aux->ops = bpf_prog_types[type]; 590 prog->type = type; 591 return 0; 592 } 593 594 /* drop refcnt on maps used by eBPF program and free auxilary data */ 595 static void free_used_maps(struct bpf_prog_aux *aux) 596 { 597 int i; 598 599 for (i = 0; i < aux->used_map_cnt; i++) 600 bpf_map_put(aux->used_maps[i]); 601 602 kfree(aux->used_maps); 603 } 604 605 int __bpf_prog_charge(struct user_struct *user, u32 pages) 606 { 607 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 608 unsigned long user_bufs; 609 610 if (user) { 611 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 612 if (user_bufs > memlock_limit) { 613 atomic_long_sub(pages, &user->locked_vm); 614 return -EPERM; 615 } 616 } 617 618 return 0; 619 } 620 621 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 622 { 623 if (user) 624 atomic_long_sub(pages, &user->locked_vm); 625 } 626 627 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 628 { 629 struct user_struct *user = get_current_user(); 630 int ret; 631 632 ret = __bpf_prog_charge(user, prog->pages); 633 if (ret) { 634 free_uid(user); 635 return ret; 636 } 637 638 prog->aux->user = user; 639 return 0; 640 } 641 642 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 643 { 644 struct user_struct *user = prog->aux->user; 645 646 __bpf_prog_uncharge(user, prog->pages); 647 free_uid(user); 648 } 649 650 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 651 { 652 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 653 654 free_used_maps(aux); 655 bpf_prog_uncharge_memlock(aux->prog); 656 bpf_prog_free(aux->prog); 657 } 658 659 void bpf_prog_put(struct bpf_prog *prog) 660 { 661 if (atomic_dec_and_test(&prog->aux->refcnt)) { 662 trace_bpf_prog_put_rcu(prog); 663 bpf_prog_kallsyms_del(prog); 664 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 665 } 666 } 667 EXPORT_SYMBOL_GPL(bpf_prog_put); 668 669 static int bpf_prog_release(struct inode *inode, struct file *filp) 670 { 671 struct bpf_prog *prog = filp->private_data; 672 673 bpf_prog_put(prog); 674 return 0; 675 } 676 677 #ifdef CONFIG_PROC_FS 678 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 679 { 680 const struct bpf_prog *prog = filp->private_data; 681 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 682 683 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 684 seq_printf(m, 685 "prog_type:\t%u\n" 686 "prog_jited:\t%u\n" 687 "prog_tag:\t%s\n" 688 "memlock:\t%llu\n", 689 prog->type, 690 prog->jited, 691 prog_tag, 692 prog->pages * 1ULL << PAGE_SHIFT); 693 } 694 #endif 695 696 static const struct file_operations bpf_prog_fops = { 697 #ifdef CONFIG_PROC_FS 698 .show_fdinfo = bpf_prog_show_fdinfo, 699 #endif 700 .release = bpf_prog_release, 701 }; 702 703 int bpf_prog_new_fd(struct bpf_prog *prog) 704 { 705 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 706 O_RDWR | O_CLOEXEC); 707 } 708 709 static struct bpf_prog *____bpf_prog_get(struct fd f) 710 { 711 if (!f.file) 712 return ERR_PTR(-EBADF); 713 if (f.file->f_op != &bpf_prog_fops) { 714 fdput(f); 715 return ERR_PTR(-EINVAL); 716 } 717 718 return f.file->private_data; 719 } 720 721 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 722 { 723 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 724 atomic_sub(i, &prog->aux->refcnt); 725 return ERR_PTR(-EBUSY); 726 } 727 return prog; 728 } 729 EXPORT_SYMBOL_GPL(bpf_prog_add); 730 731 void bpf_prog_sub(struct bpf_prog *prog, int i) 732 { 733 /* Only to be used for undoing previous bpf_prog_add() in some 734 * error path. We still know that another entity in our call 735 * path holds a reference to the program, thus atomic_sub() can 736 * be safely used in such cases! 737 */ 738 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 739 } 740 EXPORT_SYMBOL_GPL(bpf_prog_sub); 741 742 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 743 { 744 return bpf_prog_add(prog, 1); 745 } 746 EXPORT_SYMBOL_GPL(bpf_prog_inc); 747 748 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 749 { 750 struct fd f = fdget(ufd); 751 struct bpf_prog *prog; 752 753 prog = ____bpf_prog_get(f); 754 if (IS_ERR(prog)) 755 return prog; 756 if (type && prog->type != *type) { 757 prog = ERR_PTR(-EINVAL); 758 goto out; 759 } 760 761 prog = bpf_prog_inc(prog); 762 out: 763 fdput(f); 764 return prog; 765 } 766 767 struct bpf_prog *bpf_prog_get(u32 ufd) 768 { 769 return __bpf_prog_get(ufd, NULL); 770 } 771 772 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 773 { 774 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 775 776 if (!IS_ERR(prog)) 777 trace_bpf_prog_get_type(prog); 778 return prog; 779 } 780 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 781 782 /* last field in 'union bpf_attr' used by this command */ 783 #define BPF_PROG_LOAD_LAST_FIELD kern_version 784 785 static int bpf_prog_load(union bpf_attr *attr) 786 { 787 enum bpf_prog_type type = attr->prog_type; 788 struct bpf_prog *prog; 789 int err; 790 char license[128]; 791 bool is_gpl; 792 793 if (CHECK_ATTR(BPF_PROG_LOAD)) 794 return -EINVAL; 795 796 /* copy eBPF program license from user space */ 797 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 798 sizeof(license) - 1) < 0) 799 return -EFAULT; 800 license[sizeof(license) - 1] = 0; 801 802 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 803 is_gpl = license_is_gpl_compatible(license); 804 805 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 806 return -E2BIG; 807 808 if (type == BPF_PROG_TYPE_KPROBE && 809 attr->kern_version != LINUX_VERSION_CODE) 810 return -EINVAL; 811 812 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 813 return -EPERM; 814 815 /* plain bpf_prog allocation */ 816 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 817 if (!prog) 818 return -ENOMEM; 819 820 err = bpf_prog_charge_memlock(prog); 821 if (err) 822 goto free_prog_nouncharge; 823 824 prog->len = attr->insn_cnt; 825 826 err = -EFAULT; 827 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 828 bpf_prog_insn_size(prog)) != 0) 829 goto free_prog; 830 831 prog->orig_prog = NULL; 832 prog->jited = 0; 833 834 atomic_set(&prog->aux->refcnt, 1); 835 prog->gpl_compatible = is_gpl ? 1 : 0; 836 837 /* find program type: socket_filter vs tracing_filter */ 838 err = find_prog_type(type, prog); 839 if (err < 0) 840 goto free_prog; 841 842 /* run eBPF verifier */ 843 err = bpf_check(&prog, attr); 844 if (err < 0) 845 goto free_used_maps; 846 847 /* eBPF program is ready to be JITed */ 848 prog = bpf_prog_select_runtime(prog, &err); 849 if (err < 0) 850 goto free_used_maps; 851 852 err = bpf_prog_new_fd(prog); 853 if (err < 0) 854 /* failed to allocate fd */ 855 goto free_used_maps; 856 857 bpf_prog_kallsyms_add(prog); 858 trace_bpf_prog_load(prog, err); 859 return err; 860 861 free_used_maps: 862 free_used_maps(prog->aux); 863 free_prog: 864 bpf_prog_uncharge_memlock(prog); 865 free_prog_nouncharge: 866 bpf_prog_free(prog); 867 return err; 868 } 869 870 #define BPF_OBJ_LAST_FIELD bpf_fd 871 872 static int bpf_obj_pin(const union bpf_attr *attr) 873 { 874 if (CHECK_ATTR(BPF_OBJ)) 875 return -EINVAL; 876 877 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 878 } 879 880 static int bpf_obj_get(const union bpf_attr *attr) 881 { 882 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 883 return -EINVAL; 884 885 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 886 } 887 888 #ifdef CONFIG_CGROUP_BPF 889 890 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 891 892 static int bpf_prog_attach(const union bpf_attr *attr) 893 { 894 enum bpf_prog_type ptype; 895 struct bpf_prog *prog; 896 struct cgroup *cgrp; 897 int ret; 898 899 if (!capable(CAP_NET_ADMIN)) 900 return -EPERM; 901 902 if (CHECK_ATTR(BPF_PROG_ATTACH)) 903 return -EINVAL; 904 905 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 906 return -EINVAL; 907 908 switch (attr->attach_type) { 909 case BPF_CGROUP_INET_INGRESS: 910 case BPF_CGROUP_INET_EGRESS: 911 ptype = BPF_PROG_TYPE_CGROUP_SKB; 912 break; 913 case BPF_CGROUP_INET_SOCK_CREATE: 914 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 915 break; 916 default: 917 return -EINVAL; 918 } 919 920 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 921 if (IS_ERR(prog)) 922 return PTR_ERR(prog); 923 924 cgrp = cgroup_get_from_fd(attr->target_fd); 925 if (IS_ERR(cgrp)) { 926 bpf_prog_put(prog); 927 return PTR_ERR(cgrp); 928 } 929 930 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 931 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 932 if (ret) 933 bpf_prog_put(prog); 934 cgroup_put(cgrp); 935 936 return ret; 937 } 938 939 #define BPF_PROG_DETACH_LAST_FIELD attach_type 940 941 static int bpf_prog_detach(const union bpf_attr *attr) 942 { 943 struct cgroup *cgrp; 944 int ret; 945 946 if (!capable(CAP_NET_ADMIN)) 947 return -EPERM; 948 949 if (CHECK_ATTR(BPF_PROG_DETACH)) 950 return -EINVAL; 951 952 switch (attr->attach_type) { 953 case BPF_CGROUP_INET_INGRESS: 954 case BPF_CGROUP_INET_EGRESS: 955 case BPF_CGROUP_INET_SOCK_CREATE: 956 cgrp = cgroup_get_from_fd(attr->target_fd); 957 if (IS_ERR(cgrp)) 958 return PTR_ERR(cgrp); 959 960 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 961 cgroup_put(cgrp); 962 break; 963 964 default: 965 return -EINVAL; 966 } 967 968 return ret; 969 } 970 #endif /* CONFIG_CGROUP_BPF */ 971 972 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 973 974 static int bpf_prog_test_run(const union bpf_attr *attr, 975 union bpf_attr __user *uattr) 976 { 977 struct bpf_prog *prog; 978 int ret = -ENOTSUPP; 979 980 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 981 return -EINVAL; 982 983 prog = bpf_prog_get(attr->test.prog_fd); 984 if (IS_ERR(prog)) 985 return PTR_ERR(prog); 986 987 if (prog->aux->ops->test_run) 988 ret = prog->aux->ops->test_run(prog, attr, uattr); 989 990 bpf_prog_put(prog); 991 return ret; 992 } 993 994 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 995 { 996 union bpf_attr attr = {}; 997 int err; 998 999 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1000 return -EPERM; 1001 1002 if (!access_ok(VERIFY_READ, uattr, 1)) 1003 return -EFAULT; 1004 1005 if (size > PAGE_SIZE) /* silly large */ 1006 return -E2BIG; 1007 1008 /* If we're handed a bigger struct than we know of, 1009 * ensure all the unknown bits are 0 - i.e. new 1010 * user-space does not rely on any kernel feature 1011 * extensions we dont know about yet. 1012 */ 1013 if (size > sizeof(attr)) { 1014 unsigned char __user *addr; 1015 unsigned char __user *end; 1016 unsigned char val; 1017 1018 addr = (void __user *)uattr + sizeof(attr); 1019 end = (void __user *)uattr + size; 1020 1021 for (; addr < end; addr++) { 1022 err = get_user(val, addr); 1023 if (err) 1024 return err; 1025 if (val) 1026 return -E2BIG; 1027 } 1028 size = sizeof(attr); 1029 } 1030 1031 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1032 if (copy_from_user(&attr, uattr, size) != 0) 1033 return -EFAULT; 1034 1035 switch (cmd) { 1036 case BPF_MAP_CREATE: 1037 err = map_create(&attr); 1038 break; 1039 case BPF_MAP_LOOKUP_ELEM: 1040 err = map_lookup_elem(&attr); 1041 break; 1042 case BPF_MAP_UPDATE_ELEM: 1043 err = map_update_elem(&attr); 1044 break; 1045 case BPF_MAP_DELETE_ELEM: 1046 err = map_delete_elem(&attr); 1047 break; 1048 case BPF_MAP_GET_NEXT_KEY: 1049 err = map_get_next_key(&attr); 1050 break; 1051 case BPF_PROG_LOAD: 1052 err = bpf_prog_load(&attr); 1053 break; 1054 case BPF_OBJ_PIN: 1055 err = bpf_obj_pin(&attr); 1056 break; 1057 case BPF_OBJ_GET: 1058 err = bpf_obj_get(&attr); 1059 break; 1060 #ifdef CONFIG_CGROUP_BPF 1061 case BPF_PROG_ATTACH: 1062 err = bpf_prog_attach(&attr); 1063 break; 1064 case BPF_PROG_DETACH: 1065 err = bpf_prog_detach(&attr); 1066 break; 1067 #endif 1068 case BPF_PROG_TEST_RUN: 1069 err = bpf_prog_test_run(&attr, uattr); 1070 break; 1071 default: 1072 err = -EINVAL; 1073 break; 1074 } 1075 1076 return err; 1077 } 1078