1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 26 DEFINE_PER_CPU(int, bpf_prog_active); 27 28 int sysctl_unprivileged_bpf_disabled __read_mostly; 29 30 static const struct bpf_map_ops * const bpf_map_types[] = { 31 #define BPF_PROG_TYPE(_id, _ops) 32 #define BPF_MAP_TYPE(_id, _ops) \ 33 [_id] = &_ops, 34 #include <linux/bpf_types.h> 35 #undef BPF_PROG_TYPE 36 #undef BPF_MAP_TYPE 37 }; 38 39 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 40 { 41 struct bpf_map *map; 42 43 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 44 !bpf_map_types[attr->map_type]) 45 return ERR_PTR(-EINVAL); 46 47 map = bpf_map_types[attr->map_type]->map_alloc(attr); 48 if (IS_ERR(map)) 49 return map; 50 map->ops = bpf_map_types[attr->map_type]; 51 map->map_type = attr->map_type; 52 return map; 53 } 54 55 void *bpf_map_area_alloc(size_t size) 56 { 57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 58 * trigger under memory pressure as we really just want to 59 * fail instead. 60 */ 61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 62 void *area; 63 64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 65 area = kmalloc(size, GFP_USER | flags); 66 if (area != NULL) 67 return area; 68 } 69 70 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, 71 PAGE_KERNEL); 72 } 73 74 void bpf_map_area_free(void *area) 75 { 76 kvfree(area); 77 } 78 79 int bpf_map_precharge_memlock(u32 pages) 80 { 81 struct user_struct *user = get_current_user(); 82 unsigned long memlock_limit, cur; 83 84 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 85 cur = atomic_long_read(&user->locked_vm); 86 free_uid(user); 87 if (cur + pages > memlock_limit) 88 return -EPERM; 89 return 0; 90 } 91 92 static int bpf_map_charge_memlock(struct bpf_map *map) 93 { 94 struct user_struct *user = get_current_user(); 95 unsigned long memlock_limit; 96 97 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 98 99 atomic_long_add(map->pages, &user->locked_vm); 100 101 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 102 atomic_long_sub(map->pages, &user->locked_vm); 103 free_uid(user); 104 return -EPERM; 105 } 106 map->user = user; 107 return 0; 108 } 109 110 static void bpf_map_uncharge_memlock(struct bpf_map *map) 111 { 112 struct user_struct *user = map->user; 113 114 atomic_long_sub(map->pages, &user->locked_vm); 115 free_uid(user); 116 } 117 118 /* called from workqueue */ 119 static void bpf_map_free_deferred(struct work_struct *work) 120 { 121 struct bpf_map *map = container_of(work, struct bpf_map, work); 122 123 bpf_map_uncharge_memlock(map); 124 /* implementation dependent freeing */ 125 map->ops->map_free(map); 126 } 127 128 static void bpf_map_put_uref(struct bpf_map *map) 129 { 130 if (atomic_dec_and_test(&map->usercnt)) { 131 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 132 bpf_fd_array_map_clear(map); 133 } 134 } 135 136 /* decrement map refcnt and schedule it for freeing via workqueue 137 * (unrelying map implementation ops->map_free() might sleep) 138 */ 139 void bpf_map_put(struct bpf_map *map) 140 { 141 if (atomic_dec_and_test(&map->refcnt)) { 142 INIT_WORK(&map->work, bpf_map_free_deferred); 143 schedule_work(&map->work); 144 } 145 } 146 147 void bpf_map_put_with_uref(struct bpf_map *map) 148 { 149 bpf_map_put_uref(map); 150 bpf_map_put(map); 151 } 152 153 static int bpf_map_release(struct inode *inode, struct file *filp) 154 { 155 struct bpf_map *map = filp->private_data; 156 157 if (map->ops->map_release) 158 map->ops->map_release(map, filp); 159 160 bpf_map_put_with_uref(map); 161 return 0; 162 } 163 164 #ifdef CONFIG_PROC_FS 165 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 166 { 167 const struct bpf_map *map = filp->private_data; 168 const struct bpf_array *array; 169 u32 owner_prog_type = 0; 170 171 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 172 array = container_of(map, struct bpf_array, map); 173 owner_prog_type = array->owner_prog_type; 174 } 175 176 seq_printf(m, 177 "map_type:\t%u\n" 178 "key_size:\t%u\n" 179 "value_size:\t%u\n" 180 "max_entries:\t%u\n" 181 "map_flags:\t%#x\n" 182 "memlock:\t%llu\n", 183 map->map_type, 184 map->key_size, 185 map->value_size, 186 map->max_entries, 187 map->map_flags, 188 map->pages * 1ULL << PAGE_SHIFT); 189 190 if (owner_prog_type) 191 seq_printf(m, "owner_prog_type:\t%u\n", 192 owner_prog_type); 193 } 194 #endif 195 196 static const struct file_operations bpf_map_fops = { 197 #ifdef CONFIG_PROC_FS 198 .show_fdinfo = bpf_map_show_fdinfo, 199 #endif 200 .release = bpf_map_release, 201 }; 202 203 int bpf_map_new_fd(struct bpf_map *map) 204 { 205 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 206 O_RDWR | O_CLOEXEC); 207 } 208 209 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 210 #define CHECK_ATTR(CMD) \ 211 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 212 sizeof(attr->CMD##_LAST_FIELD), 0, \ 213 sizeof(*attr) - \ 214 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 215 sizeof(attr->CMD##_LAST_FIELD)) != NULL 216 217 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd 218 /* called via syscall */ 219 static int map_create(union bpf_attr *attr) 220 { 221 struct bpf_map *map; 222 int err; 223 224 err = CHECK_ATTR(BPF_MAP_CREATE); 225 if (err) 226 return -EINVAL; 227 228 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 229 map = find_and_alloc_map(attr); 230 if (IS_ERR(map)) 231 return PTR_ERR(map); 232 233 atomic_set(&map->refcnt, 1); 234 atomic_set(&map->usercnt, 1); 235 236 err = bpf_map_charge_memlock(map); 237 if (err) 238 goto free_map_nouncharge; 239 240 err = bpf_map_new_fd(map); 241 if (err < 0) 242 /* failed to allocate fd */ 243 goto free_map; 244 245 trace_bpf_map_create(map, err); 246 return err; 247 248 free_map: 249 bpf_map_uncharge_memlock(map); 250 free_map_nouncharge: 251 map->ops->map_free(map); 252 return err; 253 } 254 255 /* if error is returned, fd is released. 256 * On success caller should complete fd access with matching fdput() 257 */ 258 struct bpf_map *__bpf_map_get(struct fd f) 259 { 260 if (!f.file) 261 return ERR_PTR(-EBADF); 262 if (f.file->f_op != &bpf_map_fops) { 263 fdput(f); 264 return ERR_PTR(-EINVAL); 265 } 266 267 return f.file->private_data; 268 } 269 270 /* prog's and map's refcnt limit */ 271 #define BPF_MAX_REFCNT 32768 272 273 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 274 { 275 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 276 atomic_dec(&map->refcnt); 277 return ERR_PTR(-EBUSY); 278 } 279 if (uref) 280 atomic_inc(&map->usercnt); 281 return map; 282 } 283 284 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 285 { 286 struct fd f = fdget(ufd); 287 struct bpf_map *map; 288 289 map = __bpf_map_get(f); 290 if (IS_ERR(map)) 291 return map; 292 293 map = bpf_map_inc(map, true); 294 fdput(f); 295 296 return map; 297 } 298 299 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 300 { 301 return -ENOTSUPP; 302 } 303 304 /* last field in 'union bpf_attr' used by this command */ 305 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 306 307 static int map_lookup_elem(union bpf_attr *attr) 308 { 309 void __user *ukey = u64_to_user_ptr(attr->key); 310 void __user *uvalue = u64_to_user_ptr(attr->value); 311 int ufd = attr->map_fd; 312 struct bpf_map *map; 313 void *key, *value, *ptr; 314 u32 value_size; 315 struct fd f; 316 int err; 317 318 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 319 return -EINVAL; 320 321 f = fdget(ufd); 322 map = __bpf_map_get(f); 323 if (IS_ERR(map)) 324 return PTR_ERR(map); 325 326 err = -ENOMEM; 327 key = kmalloc(map->key_size, GFP_USER); 328 if (!key) 329 goto err_put; 330 331 err = -EFAULT; 332 if (copy_from_user(key, ukey, map->key_size) != 0) 333 goto free_key; 334 335 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 336 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 337 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 338 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 339 else 340 value_size = map->value_size; 341 342 err = -ENOMEM; 343 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 344 if (!value) 345 goto free_key; 346 347 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 348 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 349 err = bpf_percpu_hash_copy(map, key, value); 350 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 351 err = bpf_percpu_array_copy(map, key, value); 352 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 353 err = bpf_stackmap_copy(map, key, value); 354 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 355 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 356 err = -ENOTSUPP; 357 } else { 358 rcu_read_lock(); 359 ptr = map->ops->map_lookup_elem(map, key); 360 if (ptr) 361 memcpy(value, ptr, value_size); 362 rcu_read_unlock(); 363 err = ptr ? 0 : -ENOENT; 364 } 365 366 if (err) 367 goto free_value; 368 369 err = -EFAULT; 370 if (copy_to_user(uvalue, value, value_size) != 0) 371 goto free_value; 372 373 trace_bpf_map_lookup_elem(map, ufd, key, value); 374 err = 0; 375 376 free_value: 377 kfree(value); 378 free_key: 379 kfree(key); 380 err_put: 381 fdput(f); 382 return err; 383 } 384 385 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 386 387 static int map_update_elem(union bpf_attr *attr) 388 { 389 void __user *ukey = u64_to_user_ptr(attr->key); 390 void __user *uvalue = u64_to_user_ptr(attr->value); 391 int ufd = attr->map_fd; 392 struct bpf_map *map; 393 void *key, *value; 394 u32 value_size; 395 struct fd f; 396 int err; 397 398 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 399 return -EINVAL; 400 401 f = fdget(ufd); 402 map = __bpf_map_get(f); 403 if (IS_ERR(map)) 404 return PTR_ERR(map); 405 406 err = -ENOMEM; 407 key = kmalloc(map->key_size, GFP_USER); 408 if (!key) 409 goto err_put; 410 411 err = -EFAULT; 412 if (copy_from_user(key, ukey, map->key_size) != 0) 413 goto free_key; 414 415 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 416 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 417 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 418 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 419 else 420 value_size = map->value_size; 421 422 err = -ENOMEM; 423 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 424 if (!value) 425 goto free_key; 426 427 err = -EFAULT; 428 if (copy_from_user(value, uvalue, value_size) != 0) 429 goto free_value; 430 431 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 432 * inside bpf map update or delete otherwise deadlocks are possible 433 */ 434 preempt_disable(); 435 __this_cpu_inc(bpf_prog_active); 436 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 437 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 438 err = bpf_percpu_hash_update(map, key, value, attr->flags); 439 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 440 err = bpf_percpu_array_update(map, key, value, attr->flags); 441 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 442 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 443 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 444 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 445 rcu_read_lock(); 446 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 447 attr->flags); 448 rcu_read_unlock(); 449 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 450 rcu_read_lock(); 451 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 452 attr->flags); 453 rcu_read_unlock(); 454 } else { 455 rcu_read_lock(); 456 err = map->ops->map_update_elem(map, key, value, attr->flags); 457 rcu_read_unlock(); 458 } 459 __this_cpu_dec(bpf_prog_active); 460 preempt_enable(); 461 462 if (!err) 463 trace_bpf_map_update_elem(map, ufd, key, value); 464 free_value: 465 kfree(value); 466 free_key: 467 kfree(key); 468 err_put: 469 fdput(f); 470 return err; 471 } 472 473 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 474 475 static int map_delete_elem(union bpf_attr *attr) 476 { 477 void __user *ukey = u64_to_user_ptr(attr->key); 478 int ufd = attr->map_fd; 479 struct bpf_map *map; 480 struct fd f; 481 void *key; 482 int err; 483 484 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 485 return -EINVAL; 486 487 f = fdget(ufd); 488 map = __bpf_map_get(f); 489 if (IS_ERR(map)) 490 return PTR_ERR(map); 491 492 err = -ENOMEM; 493 key = kmalloc(map->key_size, GFP_USER); 494 if (!key) 495 goto err_put; 496 497 err = -EFAULT; 498 if (copy_from_user(key, ukey, map->key_size) != 0) 499 goto free_key; 500 501 preempt_disable(); 502 __this_cpu_inc(bpf_prog_active); 503 rcu_read_lock(); 504 err = map->ops->map_delete_elem(map, key); 505 rcu_read_unlock(); 506 __this_cpu_dec(bpf_prog_active); 507 preempt_enable(); 508 509 if (!err) 510 trace_bpf_map_delete_elem(map, ufd, key); 511 free_key: 512 kfree(key); 513 err_put: 514 fdput(f); 515 return err; 516 } 517 518 /* last field in 'union bpf_attr' used by this command */ 519 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 520 521 static int map_get_next_key(union bpf_attr *attr) 522 { 523 void __user *ukey = u64_to_user_ptr(attr->key); 524 void __user *unext_key = u64_to_user_ptr(attr->next_key); 525 int ufd = attr->map_fd; 526 struct bpf_map *map; 527 void *key, *next_key; 528 struct fd f; 529 int err; 530 531 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 532 return -EINVAL; 533 534 f = fdget(ufd); 535 map = __bpf_map_get(f); 536 if (IS_ERR(map)) 537 return PTR_ERR(map); 538 539 if (ukey) { 540 err = -ENOMEM; 541 key = kmalloc(map->key_size, GFP_USER); 542 if (!key) 543 goto err_put; 544 545 err = -EFAULT; 546 if (copy_from_user(key, ukey, map->key_size) != 0) 547 goto free_key; 548 } else { 549 key = NULL; 550 } 551 552 err = -ENOMEM; 553 next_key = kmalloc(map->key_size, GFP_USER); 554 if (!next_key) 555 goto free_key; 556 557 rcu_read_lock(); 558 err = map->ops->map_get_next_key(map, key, next_key); 559 rcu_read_unlock(); 560 if (err) 561 goto free_next_key; 562 563 err = -EFAULT; 564 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 565 goto free_next_key; 566 567 trace_bpf_map_next_key(map, ufd, key, next_key); 568 err = 0; 569 570 free_next_key: 571 kfree(next_key); 572 free_key: 573 kfree(key); 574 err_put: 575 fdput(f); 576 return err; 577 } 578 579 static const struct bpf_verifier_ops * const bpf_prog_types[] = { 580 #define BPF_PROG_TYPE(_id, _ops) \ 581 [_id] = &_ops, 582 #define BPF_MAP_TYPE(_id, _ops) 583 #include <linux/bpf_types.h> 584 #undef BPF_PROG_TYPE 585 #undef BPF_MAP_TYPE 586 }; 587 588 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 589 { 590 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 591 return -EINVAL; 592 593 prog->aux->ops = bpf_prog_types[type]; 594 prog->type = type; 595 return 0; 596 } 597 598 /* drop refcnt on maps used by eBPF program and free auxilary data */ 599 static void free_used_maps(struct bpf_prog_aux *aux) 600 { 601 int i; 602 603 for (i = 0; i < aux->used_map_cnt; i++) 604 bpf_map_put(aux->used_maps[i]); 605 606 kfree(aux->used_maps); 607 } 608 609 int __bpf_prog_charge(struct user_struct *user, u32 pages) 610 { 611 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 612 unsigned long user_bufs; 613 614 if (user) { 615 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 616 if (user_bufs > memlock_limit) { 617 atomic_long_sub(pages, &user->locked_vm); 618 return -EPERM; 619 } 620 } 621 622 return 0; 623 } 624 625 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 626 { 627 if (user) 628 atomic_long_sub(pages, &user->locked_vm); 629 } 630 631 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 632 { 633 struct user_struct *user = get_current_user(); 634 int ret; 635 636 ret = __bpf_prog_charge(user, prog->pages); 637 if (ret) { 638 free_uid(user); 639 return ret; 640 } 641 642 prog->aux->user = user; 643 return 0; 644 } 645 646 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 647 { 648 struct user_struct *user = prog->aux->user; 649 650 __bpf_prog_uncharge(user, prog->pages); 651 free_uid(user); 652 } 653 654 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 655 { 656 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 657 658 free_used_maps(aux); 659 bpf_prog_uncharge_memlock(aux->prog); 660 bpf_prog_free(aux->prog); 661 } 662 663 void bpf_prog_put(struct bpf_prog *prog) 664 { 665 if (atomic_dec_and_test(&prog->aux->refcnt)) { 666 trace_bpf_prog_put_rcu(prog); 667 bpf_prog_kallsyms_del(prog); 668 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 669 } 670 } 671 EXPORT_SYMBOL_GPL(bpf_prog_put); 672 673 static int bpf_prog_release(struct inode *inode, struct file *filp) 674 { 675 struct bpf_prog *prog = filp->private_data; 676 677 bpf_prog_put(prog); 678 return 0; 679 } 680 681 #ifdef CONFIG_PROC_FS 682 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 683 { 684 const struct bpf_prog *prog = filp->private_data; 685 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 686 687 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 688 seq_printf(m, 689 "prog_type:\t%u\n" 690 "prog_jited:\t%u\n" 691 "prog_tag:\t%s\n" 692 "memlock:\t%llu\n", 693 prog->type, 694 prog->jited, 695 prog_tag, 696 prog->pages * 1ULL << PAGE_SHIFT); 697 } 698 #endif 699 700 static const struct file_operations bpf_prog_fops = { 701 #ifdef CONFIG_PROC_FS 702 .show_fdinfo = bpf_prog_show_fdinfo, 703 #endif 704 .release = bpf_prog_release, 705 }; 706 707 int bpf_prog_new_fd(struct bpf_prog *prog) 708 { 709 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 710 O_RDWR | O_CLOEXEC); 711 } 712 713 static struct bpf_prog *____bpf_prog_get(struct fd f) 714 { 715 if (!f.file) 716 return ERR_PTR(-EBADF); 717 if (f.file->f_op != &bpf_prog_fops) { 718 fdput(f); 719 return ERR_PTR(-EINVAL); 720 } 721 722 return f.file->private_data; 723 } 724 725 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 726 { 727 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 728 atomic_sub(i, &prog->aux->refcnt); 729 return ERR_PTR(-EBUSY); 730 } 731 return prog; 732 } 733 EXPORT_SYMBOL_GPL(bpf_prog_add); 734 735 void bpf_prog_sub(struct bpf_prog *prog, int i) 736 { 737 /* Only to be used for undoing previous bpf_prog_add() in some 738 * error path. We still know that another entity in our call 739 * path holds a reference to the program, thus atomic_sub() can 740 * be safely used in such cases! 741 */ 742 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 743 } 744 EXPORT_SYMBOL_GPL(bpf_prog_sub); 745 746 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 747 { 748 return bpf_prog_add(prog, 1); 749 } 750 EXPORT_SYMBOL_GPL(bpf_prog_inc); 751 752 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 753 { 754 struct fd f = fdget(ufd); 755 struct bpf_prog *prog; 756 757 prog = ____bpf_prog_get(f); 758 if (IS_ERR(prog)) 759 return prog; 760 if (type && prog->type != *type) { 761 prog = ERR_PTR(-EINVAL); 762 goto out; 763 } 764 765 prog = bpf_prog_inc(prog); 766 out: 767 fdput(f); 768 return prog; 769 } 770 771 struct bpf_prog *bpf_prog_get(u32 ufd) 772 { 773 return __bpf_prog_get(ufd, NULL); 774 } 775 776 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 777 { 778 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 779 780 if (!IS_ERR(prog)) 781 trace_bpf_prog_get_type(prog); 782 return prog; 783 } 784 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 785 786 /* last field in 'union bpf_attr' used by this command */ 787 #define BPF_PROG_LOAD_LAST_FIELD kern_version 788 789 static int bpf_prog_load(union bpf_attr *attr) 790 { 791 enum bpf_prog_type type = attr->prog_type; 792 struct bpf_prog *prog; 793 int err; 794 char license[128]; 795 bool is_gpl; 796 797 if (CHECK_ATTR(BPF_PROG_LOAD)) 798 return -EINVAL; 799 800 /* copy eBPF program license from user space */ 801 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 802 sizeof(license) - 1) < 0) 803 return -EFAULT; 804 license[sizeof(license) - 1] = 0; 805 806 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 807 is_gpl = license_is_gpl_compatible(license); 808 809 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 810 return -E2BIG; 811 812 if (type == BPF_PROG_TYPE_KPROBE && 813 attr->kern_version != LINUX_VERSION_CODE) 814 return -EINVAL; 815 816 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) 817 return -EPERM; 818 819 /* plain bpf_prog allocation */ 820 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 821 if (!prog) 822 return -ENOMEM; 823 824 err = bpf_prog_charge_memlock(prog); 825 if (err) 826 goto free_prog_nouncharge; 827 828 prog->len = attr->insn_cnt; 829 830 err = -EFAULT; 831 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 832 bpf_prog_insn_size(prog)) != 0) 833 goto free_prog; 834 835 prog->orig_prog = NULL; 836 prog->jited = 0; 837 838 atomic_set(&prog->aux->refcnt, 1); 839 prog->gpl_compatible = is_gpl ? 1 : 0; 840 841 /* find program type: socket_filter vs tracing_filter */ 842 err = find_prog_type(type, prog); 843 if (err < 0) 844 goto free_prog; 845 846 /* run eBPF verifier */ 847 err = bpf_check(&prog, attr); 848 if (err < 0) 849 goto free_used_maps; 850 851 /* eBPF program is ready to be JITed */ 852 prog = bpf_prog_select_runtime(prog, &err); 853 if (err < 0) 854 goto free_used_maps; 855 856 err = bpf_prog_new_fd(prog); 857 if (err < 0) 858 /* failed to allocate fd */ 859 goto free_used_maps; 860 861 bpf_prog_kallsyms_add(prog); 862 trace_bpf_prog_load(prog, err); 863 return err; 864 865 free_used_maps: 866 free_used_maps(prog->aux); 867 free_prog: 868 bpf_prog_uncharge_memlock(prog); 869 free_prog_nouncharge: 870 bpf_prog_free(prog); 871 return err; 872 } 873 874 #define BPF_OBJ_LAST_FIELD bpf_fd 875 876 static int bpf_obj_pin(const union bpf_attr *attr) 877 { 878 if (CHECK_ATTR(BPF_OBJ)) 879 return -EINVAL; 880 881 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 882 } 883 884 static int bpf_obj_get(const union bpf_attr *attr) 885 { 886 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) 887 return -EINVAL; 888 889 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); 890 } 891 892 #ifdef CONFIG_CGROUP_BPF 893 894 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 895 896 static int bpf_prog_attach(const union bpf_attr *attr) 897 { 898 enum bpf_prog_type ptype; 899 struct bpf_prog *prog; 900 struct cgroup *cgrp; 901 int ret; 902 903 if (!capable(CAP_NET_ADMIN)) 904 return -EPERM; 905 906 if (CHECK_ATTR(BPF_PROG_ATTACH)) 907 return -EINVAL; 908 909 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) 910 return -EINVAL; 911 912 switch (attr->attach_type) { 913 case BPF_CGROUP_INET_INGRESS: 914 case BPF_CGROUP_INET_EGRESS: 915 ptype = BPF_PROG_TYPE_CGROUP_SKB; 916 break; 917 case BPF_CGROUP_INET_SOCK_CREATE: 918 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 919 break; 920 default: 921 return -EINVAL; 922 } 923 924 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 925 if (IS_ERR(prog)) 926 return PTR_ERR(prog); 927 928 cgrp = cgroup_get_from_fd(attr->target_fd); 929 if (IS_ERR(cgrp)) { 930 bpf_prog_put(prog); 931 return PTR_ERR(cgrp); 932 } 933 934 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, 935 attr->attach_flags & BPF_F_ALLOW_OVERRIDE); 936 if (ret) 937 bpf_prog_put(prog); 938 cgroup_put(cgrp); 939 940 return ret; 941 } 942 943 #define BPF_PROG_DETACH_LAST_FIELD attach_type 944 945 static int bpf_prog_detach(const union bpf_attr *attr) 946 { 947 struct cgroup *cgrp; 948 int ret; 949 950 if (!capable(CAP_NET_ADMIN)) 951 return -EPERM; 952 953 if (CHECK_ATTR(BPF_PROG_DETACH)) 954 return -EINVAL; 955 956 switch (attr->attach_type) { 957 case BPF_CGROUP_INET_INGRESS: 958 case BPF_CGROUP_INET_EGRESS: 959 case BPF_CGROUP_INET_SOCK_CREATE: 960 cgrp = cgroup_get_from_fd(attr->target_fd); 961 if (IS_ERR(cgrp)) 962 return PTR_ERR(cgrp); 963 964 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); 965 cgroup_put(cgrp); 966 break; 967 968 default: 969 return -EINVAL; 970 } 971 972 return ret; 973 } 974 #endif /* CONFIG_CGROUP_BPF */ 975 976 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 977 978 static int bpf_prog_test_run(const union bpf_attr *attr, 979 union bpf_attr __user *uattr) 980 { 981 struct bpf_prog *prog; 982 int ret = -ENOTSUPP; 983 984 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 985 return -EINVAL; 986 987 prog = bpf_prog_get(attr->test.prog_fd); 988 if (IS_ERR(prog)) 989 return PTR_ERR(prog); 990 991 if (prog->aux->ops->test_run) 992 ret = prog->aux->ops->test_run(prog, attr, uattr); 993 994 bpf_prog_put(prog); 995 return ret; 996 } 997 998 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 999 { 1000 union bpf_attr attr = {}; 1001 int err; 1002 1003 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1004 return -EPERM; 1005 1006 if (!access_ok(VERIFY_READ, uattr, 1)) 1007 return -EFAULT; 1008 1009 if (size > PAGE_SIZE) /* silly large */ 1010 return -E2BIG; 1011 1012 /* If we're handed a bigger struct than we know of, 1013 * ensure all the unknown bits are 0 - i.e. new 1014 * user-space does not rely on any kernel feature 1015 * extensions we dont know about yet. 1016 */ 1017 if (size > sizeof(attr)) { 1018 unsigned char __user *addr; 1019 unsigned char __user *end; 1020 unsigned char val; 1021 1022 addr = (void __user *)uattr + sizeof(attr); 1023 end = (void __user *)uattr + size; 1024 1025 for (; addr < end; addr++) { 1026 err = get_user(val, addr); 1027 if (err) 1028 return err; 1029 if (val) 1030 return -E2BIG; 1031 } 1032 size = sizeof(attr); 1033 } 1034 1035 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1036 if (copy_from_user(&attr, uattr, size) != 0) 1037 return -EFAULT; 1038 1039 switch (cmd) { 1040 case BPF_MAP_CREATE: 1041 err = map_create(&attr); 1042 break; 1043 case BPF_MAP_LOOKUP_ELEM: 1044 err = map_lookup_elem(&attr); 1045 break; 1046 case BPF_MAP_UPDATE_ELEM: 1047 err = map_update_elem(&attr); 1048 break; 1049 case BPF_MAP_DELETE_ELEM: 1050 err = map_delete_elem(&attr); 1051 break; 1052 case BPF_MAP_GET_NEXT_KEY: 1053 err = map_get_next_key(&attr); 1054 break; 1055 case BPF_PROG_LOAD: 1056 err = bpf_prog_load(&attr); 1057 break; 1058 case BPF_OBJ_PIN: 1059 err = bpf_obj_pin(&attr); 1060 break; 1061 case BPF_OBJ_GET: 1062 err = bpf_obj_get(&attr); 1063 break; 1064 #ifdef CONFIG_CGROUP_BPF 1065 case BPF_PROG_ATTACH: 1066 err = bpf_prog_attach(&attr); 1067 break; 1068 case BPF_PROG_DETACH: 1069 err = bpf_prog_detach(&attr); 1070 break; 1071 #endif 1072 case BPF_PROG_TEST_RUN: 1073 err = bpf_prog_test_run(&attr, uattr); 1074 break; 1075 default: 1076 err = -EINVAL; 1077 break; 1078 } 1079 1080 return err; 1081 } 1082