1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 38 39 DEFINE_PER_CPU(int, bpf_prog_active); 40 static DEFINE_IDR(prog_idr); 41 static DEFINE_SPINLOCK(prog_idr_lock); 42 static DEFINE_IDR(map_idr); 43 static DEFINE_SPINLOCK(map_idr_lock); 44 45 int sysctl_unprivileged_bpf_disabled __read_mostly; 46 47 static const struct bpf_map_ops * const bpf_map_types[] = { 48 #define BPF_PROG_TYPE(_id, _ops) 49 #define BPF_MAP_TYPE(_id, _ops) \ 50 [_id] = &_ops, 51 #include <linux/bpf_types.h> 52 #undef BPF_PROG_TYPE 53 #undef BPF_MAP_TYPE 54 }; 55 56 /* 57 * If we're handed a bigger struct than we know of, ensure all the unknown bits 58 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 59 * we don't know about yet. 60 * 61 * There is a ToCToU between this function call and the following 62 * copy_from_user() call. However, this is not a concern since this function is 63 * meant to be a future-proofing of bits. 64 */ 65 static int check_uarg_tail_zero(void __user *uaddr, 66 size_t expected_size, 67 size_t actual_size) 68 { 69 unsigned char __user *addr; 70 unsigned char __user *end; 71 unsigned char val; 72 int err; 73 74 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 75 return -E2BIG; 76 77 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 78 return -EFAULT; 79 80 if (actual_size <= expected_size) 81 return 0; 82 83 addr = uaddr + expected_size; 84 end = uaddr + actual_size; 85 86 for (; addr < end; addr++) { 87 err = get_user(val, addr); 88 if (err) 89 return err; 90 if (val) 91 return -E2BIG; 92 } 93 94 return 0; 95 } 96 97 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 98 { 99 struct bpf_map *map; 100 101 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 102 !bpf_map_types[attr->map_type]) 103 return ERR_PTR(-EINVAL); 104 105 map = bpf_map_types[attr->map_type]->map_alloc(attr); 106 if (IS_ERR(map)) 107 return map; 108 map->ops = bpf_map_types[attr->map_type]; 109 map->map_type = attr->map_type; 110 return map; 111 } 112 113 void *bpf_map_area_alloc(size_t size, int numa_node) 114 { 115 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 116 * trigger under memory pressure as we really just want to 117 * fail instead. 118 */ 119 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 120 void *area; 121 122 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 123 area = kmalloc_node(size, GFP_USER | flags, numa_node); 124 if (area != NULL) 125 return area; 126 } 127 128 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 129 __builtin_return_address(0)); 130 } 131 132 void bpf_map_area_free(void *area) 133 { 134 kvfree(area); 135 } 136 137 int bpf_map_precharge_memlock(u32 pages) 138 { 139 struct user_struct *user = get_current_user(); 140 unsigned long memlock_limit, cur; 141 142 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 143 cur = atomic_long_read(&user->locked_vm); 144 free_uid(user); 145 if (cur + pages > memlock_limit) 146 return -EPERM; 147 return 0; 148 } 149 150 static int bpf_map_charge_memlock(struct bpf_map *map) 151 { 152 struct user_struct *user = get_current_user(); 153 unsigned long memlock_limit; 154 155 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 156 157 atomic_long_add(map->pages, &user->locked_vm); 158 159 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 160 atomic_long_sub(map->pages, &user->locked_vm); 161 free_uid(user); 162 return -EPERM; 163 } 164 map->user = user; 165 return 0; 166 } 167 168 static void bpf_map_uncharge_memlock(struct bpf_map *map) 169 { 170 struct user_struct *user = map->user; 171 172 atomic_long_sub(map->pages, &user->locked_vm); 173 free_uid(user); 174 } 175 176 static int bpf_map_alloc_id(struct bpf_map *map) 177 { 178 int id; 179 180 spin_lock_bh(&map_idr_lock); 181 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 182 if (id > 0) 183 map->id = id; 184 spin_unlock_bh(&map_idr_lock); 185 186 if (WARN_ON_ONCE(!id)) 187 return -ENOSPC; 188 189 return id > 0 ? 0 : id; 190 } 191 192 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 193 { 194 unsigned long flags; 195 196 if (do_idr_lock) 197 spin_lock_irqsave(&map_idr_lock, flags); 198 else 199 __acquire(&map_idr_lock); 200 201 idr_remove(&map_idr, map->id); 202 203 if (do_idr_lock) 204 spin_unlock_irqrestore(&map_idr_lock, flags); 205 else 206 __release(&map_idr_lock); 207 } 208 209 /* called from workqueue */ 210 static void bpf_map_free_deferred(struct work_struct *work) 211 { 212 struct bpf_map *map = container_of(work, struct bpf_map, work); 213 214 bpf_map_uncharge_memlock(map); 215 security_bpf_map_free(map); 216 /* implementation dependent freeing */ 217 map->ops->map_free(map); 218 } 219 220 static void bpf_map_put_uref(struct bpf_map *map) 221 { 222 if (atomic_dec_and_test(&map->usercnt)) { 223 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 224 bpf_fd_array_map_clear(map); 225 } 226 } 227 228 /* decrement map refcnt and schedule it for freeing via workqueue 229 * (unrelying map implementation ops->map_free() might sleep) 230 */ 231 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 232 { 233 if (atomic_dec_and_test(&map->refcnt)) { 234 /* bpf_map_free_id() must be called first */ 235 bpf_map_free_id(map, do_idr_lock); 236 INIT_WORK(&map->work, bpf_map_free_deferred); 237 schedule_work(&map->work); 238 } 239 } 240 241 void bpf_map_put(struct bpf_map *map) 242 { 243 __bpf_map_put(map, true); 244 } 245 246 void bpf_map_put_with_uref(struct bpf_map *map) 247 { 248 bpf_map_put_uref(map); 249 bpf_map_put(map); 250 } 251 252 static int bpf_map_release(struct inode *inode, struct file *filp) 253 { 254 struct bpf_map *map = filp->private_data; 255 256 if (map->ops->map_release) 257 map->ops->map_release(map, filp); 258 259 bpf_map_put_with_uref(map); 260 return 0; 261 } 262 263 #ifdef CONFIG_PROC_FS 264 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 265 { 266 const struct bpf_map *map = filp->private_data; 267 const struct bpf_array *array; 268 u32 owner_prog_type = 0; 269 u32 owner_jited = 0; 270 271 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 272 array = container_of(map, struct bpf_array, map); 273 owner_prog_type = array->owner_prog_type; 274 owner_jited = array->owner_jited; 275 } 276 277 seq_printf(m, 278 "map_type:\t%u\n" 279 "key_size:\t%u\n" 280 "value_size:\t%u\n" 281 "max_entries:\t%u\n" 282 "map_flags:\t%#x\n" 283 "memlock:\t%llu\n", 284 map->map_type, 285 map->key_size, 286 map->value_size, 287 map->max_entries, 288 map->map_flags, 289 map->pages * 1ULL << PAGE_SHIFT); 290 291 if (owner_prog_type) { 292 seq_printf(m, "owner_prog_type:\t%u\n", 293 owner_prog_type); 294 seq_printf(m, "owner_jited:\t%u\n", 295 owner_jited); 296 } 297 } 298 #endif 299 300 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 301 loff_t *ppos) 302 { 303 /* We need this handler such that alloc_file() enables 304 * f_mode with FMODE_CAN_READ. 305 */ 306 return -EINVAL; 307 } 308 309 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 310 size_t siz, loff_t *ppos) 311 { 312 /* We need this handler such that alloc_file() enables 313 * f_mode with FMODE_CAN_WRITE. 314 */ 315 return -EINVAL; 316 } 317 318 const struct file_operations bpf_map_fops = { 319 #ifdef CONFIG_PROC_FS 320 .show_fdinfo = bpf_map_show_fdinfo, 321 #endif 322 .release = bpf_map_release, 323 .read = bpf_dummy_read, 324 .write = bpf_dummy_write, 325 }; 326 327 int bpf_map_new_fd(struct bpf_map *map, int flags) 328 { 329 int ret; 330 331 ret = security_bpf_map(map, OPEN_FMODE(flags)); 332 if (ret < 0) 333 return ret; 334 335 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 336 flags | O_CLOEXEC); 337 } 338 339 int bpf_get_file_flag(int flags) 340 { 341 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 342 return -EINVAL; 343 if (flags & BPF_F_RDONLY) 344 return O_RDONLY; 345 if (flags & BPF_F_WRONLY) 346 return O_WRONLY; 347 return O_RDWR; 348 } 349 350 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 351 #define CHECK_ATTR(CMD) \ 352 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 353 sizeof(attr->CMD##_LAST_FIELD), 0, \ 354 sizeof(*attr) - \ 355 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 356 sizeof(attr->CMD##_LAST_FIELD)) != NULL 357 358 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 359 * Return 0 on success and < 0 on error. 360 */ 361 static int bpf_obj_name_cpy(char *dst, const char *src) 362 { 363 const char *end = src + BPF_OBJ_NAME_LEN; 364 365 memset(dst, 0, BPF_OBJ_NAME_LEN); 366 367 /* Copy all isalnum() and '_' char */ 368 while (src < end && *src) { 369 if (!isalnum(*src) && *src != '_') 370 return -EINVAL; 371 *dst++ = *src++; 372 } 373 374 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 375 if (src == end) 376 return -EINVAL; 377 378 return 0; 379 } 380 381 #define BPF_MAP_CREATE_LAST_FIELD map_name 382 /* called via syscall */ 383 static int map_create(union bpf_attr *attr) 384 { 385 int numa_node = bpf_map_attr_numa_node(attr); 386 struct bpf_map *map; 387 int f_flags; 388 int err; 389 390 err = CHECK_ATTR(BPF_MAP_CREATE); 391 if (err) 392 return -EINVAL; 393 394 f_flags = bpf_get_file_flag(attr->map_flags); 395 if (f_flags < 0) 396 return f_flags; 397 398 if (numa_node != NUMA_NO_NODE && 399 ((unsigned int)numa_node >= nr_node_ids || 400 !node_online(numa_node))) 401 return -EINVAL; 402 403 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 404 map = find_and_alloc_map(attr); 405 if (IS_ERR(map)) 406 return PTR_ERR(map); 407 408 err = bpf_obj_name_cpy(map->name, attr->map_name); 409 if (err) 410 goto free_map_nouncharge; 411 412 atomic_set(&map->refcnt, 1); 413 atomic_set(&map->usercnt, 1); 414 415 err = security_bpf_map_alloc(map); 416 if (err) 417 goto free_map_nouncharge; 418 419 err = bpf_map_charge_memlock(map); 420 if (err) 421 goto free_map_sec; 422 423 err = bpf_map_alloc_id(map); 424 if (err) 425 goto free_map; 426 427 err = bpf_map_new_fd(map, f_flags); 428 if (err < 0) { 429 /* failed to allocate fd. 430 * bpf_map_put() is needed because the above 431 * bpf_map_alloc_id() has published the map 432 * to the userspace and the userspace may 433 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 434 */ 435 bpf_map_put(map); 436 return err; 437 } 438 439 trace_bpf_map_create(map, err); 440 return err; 441 442 free_map: 443 bpf_map_uncharge_memlock(map); 444 free_map_sec: 445 security_bpf_map_free(map); 446 free_map_nouncharge: 447 map->ops->map_free(map); 448 return err; 449 } 450 451 /* if error is returned, fd is released. 452 * On success caller should complete fd access with matching fdput() 453 */ 454 struct bpf_map *__bpf_map_get(struct fd f) 455 { 456 if (!f.file) 457 return ERR_PTR(-EBADF); 458 if (f.file->f_op != &bpf_map_fops) { 459 fdput(f); 460 return ERR_PTR(-EINVAL); 461 } 462 463 return f.file->private_data; 464 } 465 466 /* prog's and map's refcnt limit */ 467 #define BPF_MAX_REFCNT 32768 468 469 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 470 { 471 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 472 atomic_dec(&map->refcnt); 473 return ERR_PTR(-EBUSY); 474 } 475 if (uref) 476 atomic_inc(&map->usercnt); 477 return map; 478 } 479 480 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 481 { 482 struct fd f = fdget(ufd); 483 struct bpf_map *map; 484 485 map = __bpf_map_get(f); 486 if (IS_ERR(map)) 487 return map; 488 489 map = bpf_map_inc(map, true); 490 fdput(f); 491 492 return map; 493 } 494 495 /* map_idr_lock should have been held */ 496 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 497 bool uref) 498 { 499 int refold; 500 501 refold = __atomic_add_unless(&map->refcnt, 1, 0); 502 503 if (refold >= BPF_MAX_REFCNT) { 504 __bpf_map_put(map, false); 505 return ERR_PTR(-EBUSY); 506 } 507 508 if (!refold) 509 return ERR_PTR(-ENOENT); 510 511 if (uref) 512 atomic_inc(&map->usercnt); 513 514 return map; 515 } 516 517 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 518 { 519 return -ENOTSUPP; 520 } 521 522 /* last field in 'union bpf_attr' used by this command */ 523 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 524 525 static int map_lookup_elem(union bpf_attr *attr) 526 { 527 void __user *ukey = u64_to_user_ptr(attr->key); 528 void __user *uvalue = u64_to_user_ptr(attr->value); 529 int ufd = attr->map_fd; 530 struct bpf_map *map; 531 void *key, *value, *ptr; 532 u32 value_size; 533 struct fd f; 534 int err; 535 536 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 537 return -EINVAL; 538 539 f = fdget(ufd); 540 map = __bpf_map_get(f); 541 if (IS_ERR(map)) 542 return PTR_ERR(map); 543 544 if (!(f.file->f_mode & FMODE_CAN_READ)) { 545 err = -EPERM; 546 goto err_put; 547 } 548 549 key = memdup_user(ukey, map->key_size); 550 if (IS_ERR(key)) { 551 err = PTR_ERR(key); 552 goto err_put; 553 } 554 555 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 556 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 557 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 558 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 559 else if (IS_FD_MAP(map)) 560 value_size = sizeof(u32); 561 else 562 value_size = map->value_size; 563 564 err = -ENOMEM; 565 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 566 if (!value) 567 goto free_key; 568 569 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 570 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 571 err = bpf_percpu_hash_copy(map, key, value); 572 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 573 err = bpf_percpu_array_copy(map, key, value); 574 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 575 err = bpf_stackmap_copy(map, key, value); 576 } else if (IS_FD_ARRAY(map)) { 577 err = bpf_fd_array_map_lookup_elem(map, key, value); 578 } else if (IS_FD_HASH(map)) { 579 err = bpf_fd_htab_map_lookup_elem(map, key, value); 580 } else { 581 rcu_read_lock(); 582 ptr = map->ops->map_lookup_elem(map, key); 583 if (ptr) 584 memcpy(value, ptr, value_size); 585 rcu_read_unlock(); 586 err = ptr ? 0 : -ENOENT; 587 } 588 589 if (err) 590 goto free_value; 591 592 err = -EFAULT; 593 if (copy_to_user(uvalue, value, value_size) != 0) 594 goto free_value; 595 596 trace_bpf_map_lookup_elem(map, ufd, key, value); 597 err = 0; 598 599 free_value: 600 kfree(value); 601 free_key: 602 kfree(key); 603 err_put: 604 fdput(f); 605 return err; 606 } 607 608 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 609 610 static int map_update_elem(union bpf_attr *attr) 611 { 612 void __user *ukey = u64_to_user_ptr(attr->key); 613 void __user *uvalue = u64_to_user_ptr(attr->value); 614 int ufd = attr->map_fd; 615 struct bpf_map *map; 616 void *key, *value; 617 u32 value_size; 618 struct fd f; 619 int err; 620 621 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 622 return -EINVAL; 623 624 f = fdget(ufd); 625 map = __bpf_map_get(f); 626 if (IS_ERR(map)) 627 return PTR_ERR(map); 628 629 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 630 err = -EPERM; 631 goto err_put; 632 } 633 634 key = memdup_user(ukey, map->key_size); 635 if (IS_ERR(key)) { 636 err = PTR_ERR(key); 637 goto err_put; 638 } 639 640 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 641 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 642 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 643 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 644 else 645 value_size = map->value_size; 646 647 err = -ENOMEM; 648 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 649 if (!value) 650 goto free_key; 651 652 err = -EFAULT; 653 if (copy_from_user(value, uvalue, value_size) != 0) 654 goto free_value; 655 656 /* Need to create a kthread, thus must support schedule */ 657 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 658 err = map->ops->map_update_elem(map, key, value, attr->flags); 659 goto out; 660 } 661 662 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 663 * inside bpf map update or delete otherwise deadlocks are possible 664 */ 665 preempt_disable(); 666 __this_cpu_inc(bpf_prog_active); 667 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 668 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 669 err = bpf_percpu_hash_update(map, key, value, attr->flags); 670 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 671 err = bpf_percpu_array_update(map, key, value, attr->flags); 672 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 673 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 674 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 675 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 676 rcu_read_lock(); 677 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 678 attr->flags); 679 rcu_read_unlock(); 680 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 681 rcu_read_lock(); 682 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 683 attr->flags); 684 rcu_read_unlock(); 685 } else { 686 rcu_read_lock(); 687 err = map->ops->map_update_elem(map, key, value, attr->flags); 688 rcu_read_unlock(); 689 } 690 __this_cpu_dec(bpf_prog_active); 691 preempt_enable(); 692 out: 693 if (!err) 694 trace_bpf_map_update_elem(map, ufd, key, value); 695 free_value: 696 kfree(value); 697 free_key: 698 kfree(key); 699 err_put: 700 fdput(f); 701 return err; 702 } 703 704 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 705 706 static int map_delete_elem(union bpf_attr *attr) 707 { 708 void __user *ukey = u64_to_user_ptr(attr->key); 709 int ufd = attr->map_fd; 710 struct bpf_map *map; 711 struct fd f; 712 void *key; 713 int err; 714 715 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 716 return -EINVAL; 717 718 f = fdget(ufd); 719 map = __bpf_map_get(f); 720 if (IS_ERR(map)) 721 return PTR_ERR(map); 722 723 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 724 err = -EPERM; 725 goto err_put; 726 } 727 728 key = memdup_user(ukey, map->key_size); 729 if (IS_ERR(key)) { 730 err = PTR_ERR(key); 731 goto err_put; 732 } 733 734 preempt_disable(); 735 __this_cpu_inc(bpf_prog_active); 736 rcu_read_lock(); 737 err = map->ops->map_delete_elem(map, key); 738 rcu_read_unlock(); 739 __this_cpu_dec(bpf_prog_active); 740 preempt_enable(); 741 742 if (!err) 743 trace_bpf_map_delete_elem(map, ufd, key); 744 kfree(key); 745 err_put: 746 fdput(f); 747 return err; 748 } 749 750 /* last field in 'union bpf_attr' used by this command */ 751 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 752 753 static int map_get_next_key(union bpf_attr *attr) 754 { 755 void __user *ukey = u64_to_user_ptr(attr->key); 756 void __user *unext_key = u64_to_user_ptr(attr->next_key); 757 int ufd = attr->map_fd; 758 struct bpf_map *map; 759 void *key, *next_key; 760 struct fd f; 761 int err; 762 763 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 764 return -EINVAL; 765 766 f = fdget(ufd); 767 map = __bpf_map_get(f); 768 if (IS_ERR(map)) 769 return PTR_ERR(map); 770 771 if (!(f.file->f_mode & FMODE_CAN_READ)) { 772 err = -EPERM; 773 goto err_put; 774 } 775 776 if (ukey) { 777 key = memdup_user(ukey, map->key_size); 778 if (IS_ERR(key)) { 779 err = PTR_ERR(key); 780 goto err_put; 781 } 782 } else { 783 key = NULL; 784 } 785 786 err = -ENOMEM; 787 next_key = kmalloc(map->key_size, GFP_USER); 788 if (!next_key) 789 goto free_key; 790 791 rcu_read_lock(); 792 err = map->ops->map_get_next_key(map, key, next_key); 793 rcu_read_unlock(); 794 if (err) 795 goto free_next_key; 796 797 err = -EFAULT; 798 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 799 goto free_next_key; 800 801 trace_bpf_map_next_key(map, ufd, key, next_key); 802 err = 0; 803 804 free_next_key: 805 kfree(next_key); 806 free_key: 807 kfree(key); 808 err_put: 809 fdput(f); 810 return err; 811 } 812 813 static const struct bpf_prog_ops * const bpf_prog_types[] = { 814 #define BPF_PROG_TYPE(_id, _name) \ 815 [_id] = & _name ## _prog_ops, 816 #define BPF_MAP_TYPE(_id, _ops) 817 #include <linux/bpf_types.h> 818 #undef BPF_PROG_TYPE 819 #undef BPF_MAP_TYPE 820 }; 821 822 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 823 { 824 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 825 return -EINVAL; 826 827 prog->aux->ops = bpf_prog_types[type]; 828 prog->type = type; 829 return 0; 830 } 831 832 /* drop refcnt on maps used by eBPF program and free auxilary data */ 833 static void free_used_maps(struct bpf_prog_aux *aux) 834 { 835 int i; 836 837 for (i = 0; i < aux->used_map_cnt; i++) 838 bpf_map_put(aux->used_maps[i]); 839 840 kfree(aux->used_maps); 841 } 842 843 int __bpf_prog_charge(struct user_struct *user, u32 pages) 844 { 845 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 846 unsigned long user_bufs; 847 848 if (user) { 849 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 850 if (user_bufs > memlock_limit) { 851 atomic_long_sub(pages, &user->locked_vm); 852 return -EPERM; 853 } 854 } 855 856 return 0; 857 } 858 859 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 860 { 861 if (user) 862 atomic_long_sub(pages, &user->locked_vm); 863 } 864 865 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 866 { 867 struct user_struct *user = get_current_user(); 868 int ret; 869 870 ret = __bpf_prog_charge(user, prog->pages); 871 if (ret) { 872 free_uid(user); 873 return ret; 874 } 875 876 prog->aux->user = user; 877 return 0; 878 } 879 880 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 881 { 882 struct user_struct *user = prog->aux->user; 883 884 __bpf_prog_uncharge(user, prog->pages); 885 free_uid(user); 886 } 887 888 static int bpf_prog_alloc_id(struct bpf_prog *prog) 889 { 890 int id; 891 892 spin_lock_bh(&prog_idr_lock); 893 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 894 if (id > 0) 895 prog->aux->id = id; 896 spin_unlock_bh(&prog_idr_lock); 897 898 /* id is in [1, INT_MAX) */ 899 if (WARN_ON_ONCE(!id)) 900 return -ENOSPC; 901 902 return id > 0 ? 0 : id; 903 } 904 905 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 906 { 907 /* cBPF to eBPF migrations are currently not in the idr store. */ 908 if (!prog->aux->id) 909 return; 910 911 if (do_idr_lock) 912 spin_lock_bh(&prog_idr_lock); 913 else 914 __acquire(&prog_idr_lock); 915 916 idr_remove(&prog_idr, prog->aux->id); 917 918 if (do_idr_lock) 919 spin_unlock_bh(&prog_idr_lock); 920 else 921 __release(&prog_idr_lock); 922 } 923 924 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 925 { 926 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 927 928 free_used_maps(aux); 929 bpf_prog_uncharge_memlock(aux->prog); 930 security_bpf_prog_free(aux); 931 bpf_prog_free(aux->prog); 932 } 933 934 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 935 { 936 if (atomic_dec_and_test(&prog->aux->refcnt)) { 937 trace_bpf_prog_put_rcu(prog); 938 /* bpf_prog_free_id() must be called first */ 939 bpf_prog_free_id(prog, do_idr_lock); 940 bpf_prog_kallsyms_del(prog); 941 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 942 } 943 } 944 945 void bpf_prog_put(struct bpf_prog *prog) 946 { 947 __bpf_prog_put(prog, true); 948 } 949 EXPORT_SYMBOL_GPL(bpf_prog_put); 950 951 static int bpf_prog_release(struct inode *inode, struct file *filp) 952 { 953 struct bpf_prog *prog = filp->private_data; 954 955 bpf_prog_put(prog); 956 return 0; 957 } 958 959 #ifdef CONFIG_PROC_FS 960 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 961 { 962 const struct bpf_prog *prog = filp->private_data; 963 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 964 965 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 966 seq_printf(m, 967 "prog_type:\t%u\n" 968 "prog_jited:\t%u\n" 969 "prog_tag:\t%s\n" 970 "memlock:\t%llu\n", 971 prog->type, 972 prog->jited, 973 prog_tag, 974 prog->pages * 1ULL << PAGE_SHIFT); 975 } 976 #endif 977 978 const struct file_operations bpf_prog_fops = { 979 #ifdef CONFIG_PROC_FS 980 .show_fdinfo = bpf_prog_show_fdinfo, 981 #endif 982 .release = bpf_prog_release, 983 .read = bpf_dummy_read, 984 .write = bpf_dummy_write, 985 }; 986 987 int bpf_prog_new_fd(struct bpf_prog *prog) 988 { 989 int ret; 990 991 ret = security_bpf_prog(prog); 992 if (ret < 0) 993 return ret; 994 995 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 996 O_RDWR | O_CLOEXEC); 997 } 998 999 static struct bpf_prog *____bpf_prog_get(struct fd f) 1000 { 1001 if (!f.file) 1002 return ERR_PTR(-EBADF); 1003 if (f.file->f_op != &bpf_prog_fops) { 1004 fdput(f); 1005 return ERR_PTR(-EINVAL); 1006 } 1007 1008 return f.file->private_data; 1009 } 1010 1011 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1012 { 1013 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1014 atomic_sub(i, &prog->aux->refcnt); 1015 return ERR_PTR(-EBUSY); 1016 } 1017 return prog; 1018 } 1019 EXPORT_SYMBOL_GPL(bpf_prog_add); 1020 1021 void bpf_prog_sub(struct bpf_prog *prog, int i) 1022 { 1023 /* Only to be used for undoing previous bpf_prog_add() in some 1024 * error path. We still know that another entity in our call 1025 * path holds a reference to the program, thus atomic_sub() can 1026 * be safely used in such cases! 1027 */ 1028 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1029 } 1030 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1031 1032 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1033 { 1034 return bpf_prog_add(prog, 1); 1035 } 1036 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1037 1038 /* prog_idr_lock should have been held */ 1039 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1040 { 1041 int refold; 1042 1043 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1044 1045 if (refold >= BPF_MAX_REFCNT) { 1046 __bpf_prog_put(prog, false); 1047 return ERR_PTR(-EBUSY); 1048 } 1049 1050 if (!refold) 1051 return ERR_PTR(-ENOENT); 1052 1053 return prog; 1054 } 1055 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1056 1057 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) 1058 { 1059 struct fd f = fdget(ufd); 1060 struct bpf_prog *prog; 1061 1062 prog = ____bpf_prog_get(f); 1063 if (IS_ERR(prog)) 1064 return prog; 1065 if (type && prog->type != *type) { 1066 prog = ERR_PTR(-EINVAL); 1067 goto out; 1068 } 1069 1070 prog = bpf_prog_inc(prog); 1071 out: 1072 fdput(f); 1073 return prog; 1074 } 1075 1076 struct bpf_prog *bpf_prog_get(u32 ufd) 1077 { 1078 return __bpf_prog_get(ufd, NULL); 1079 } 1080 1081 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) 1082 { 1083 struct bpf_prog *prog = __bpf_prog_get(ufd, &type); 1084 1085 if (!IS_ERR(prog)) 1086 trace_bpf_prog_get_type(prog); 1087 return prog; 1088 } 1089 EXPORT_SYMBOL_GPL(bpf_prog_get_type); 1090 1091 /* last field in 'union bpf_attr' used by this command */ 1092 #define BPF_PROG_LOAD_LAST_FIELD prog_name 1093 1094 static int bpf_prog_load(union bpf_attr *attr) 1095 { 1096 enum bpf_prog_type type = attr->prog_type; 1097 struct bpf_prog *prog; 1098 int err; 1099 char license[128]; 1100 bool is_gpl; 1101 1102 if (CHECK_ATTR(BPF_PROG_LOAD)) 1103 return -EINVAL; 1104 1105 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1106 return -EINVAL; 1107 1108 /* copy eBPF program license from user space */ 1109 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1110 sizeof(license) - 1) < 0) 1111 return -EFAULT; 1112 license[sizeof(license) - 1] = 0; 1113 1114 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1115 is_gpl = license_is_gpl_compatible(license); 1116 1117 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1118 return -E2BIG; 1119 1120 if (type == BPF_PROG_TYPE_KPROBE && 1121 attr->kern_version != LINUX_VERSION_CODE) 1122 return -EINVAL; 1123 1124 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1125 type != BPF_PROG_TYPE_CGROUP_SKB && 1126 !capable(CAP_SYS_ADMIN)) 1127 return -EPERM; 1128 1129 /* plain bpf_prog allocation */ 1130 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1131 if (!prog) 1132 return -ENOMEM; 1133 1134 err = security_bpf_prog_alloc(prog->aux); 1135 if (err) 1136 goto free_prog_nouncharge; 1137 1138 err = bpf_prog_charge_memlock(prog); 1139 if (err) 1140 goto free_prog_sec; 1141 1142 prog->len = attr->insn_cnt; 1143 1144 err = -EFAULT; 1145 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1146 bpf_prog_insn_size(prog)) != 0) 1147 goto free_prog; 1148 1149 prog->orig_prog = NULL; 1150 prog->jited = 0; 1151 1152 atomic_set(&prog->aux->refcnt, 1); 1153 prog->gpl_compatible = is_gpl ? 1 : 0; 1154 1155 /* find program type: socket_filter vs tracing_filter */ 1156 err = find_prog_type(type, prog); 1157 if (err < 0) 1158 goto free_prog; 1159 1160 prog->aux->load_time = ktime_get_boot_ns(); 1161 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1162 if (err) 1163 goto free_prog; 1164 1165 /* run eBPF verifier */ 1166 err = bpf_check(&prog, attr); 1167 if (err < 0) 1168 goto free_used_maps; 1169 1170 /* eBPF program is ready to be JITed */ 1171 prog = bpf_prog_select_runtime(prog, &err); 1172 if (err < 0) 1173 goto free_used_maps; 1174 1175 err = bpf_prog_alloc_id(prog); 1176 if (err) 1177 goto free_used_maps; 1178 1179 err = bpf_prog_new_fd(prog); 1180 if (err < 0) { 1181 /* failed to allocate fd. 1182 * bpf_prog_put() is needed because the above 1183 * bpf_prog_alloc_id() has published the prog 1184 * to the userspace and the userspace may 1185 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1186 */ 1187 bpf_prog_put(prog); 1188 return err; 1189 } 1190 1191 bpf_prog_kallsyms_add(prog); 1192 trace_bpf_prog_load(prog, err); 1193 return err; 1194 1195 free_used_maps: 1196 free_used_maps(prog->aux); 1197 free_prog: 1198 bpf_prog_uncharge_memlock(prog); 1199 free_prog_sec: 1200 security_bpf_prog_free(prog->aux); 1201 free_prog_nouncharge: 1202 bpf_prog_free(prog); 1203 return err; 1204 } 1205 1206 #define BPF_OBJ_LAST_FIELD file_flags 1207 1208 static int bpf_obj_pin(const union bpf_attr *attr) 1209 { 1210 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 1211 return -EINVAL; 1212 1213 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1214 } 1215 1216 static int bpf_obj_get(const union bpf_attr *attr) 1217 { 1218 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 1219 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 1220 return -EINVAL; 1221 1222 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1223 attr->file_flags); 1224 } 1225 1226 #ifdef CONFIG_CGROUP_BPF 1227 1228 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1229 1230 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1231 { 1232 struct bpf_prog *prog = NULL; 1233 int ufd = attr->target_fd; 1234 struct bpf_map *map; 1235 struct fd f; 1236 int err; 1237 1238 f = fdget(ufd); 1239 map = __bpf_map_get(f); 1240 if (IS_ERR(map)) 1241 return PTR_ERR(map); 1242 1243 if (attach) { 1244 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1245 BPF_PROG_TYPE_SK_SKB); 1246 if (IS_ERR(prog)) { 1247 fdput(f); 1248 return PTR_ERR(prog); 1249 } 1250 } 1251 1252 err = sock_map_prog(map, prog, attr->attach_type); 1253 if (err) { 1254 fdput(f); 1255 if (prog) 1256 bpf_prog_put(prog); 1257 return err; 1258 } 1259 1260 fdput(f); 1261 return 0; 1262 } 1263 1264 #define BPF_F_ATTACH_MASK \ 1265 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1266 1267 static int bpf_prog_attach(const union bpf_attr *attr) 1268 { 1269 enum bpf_prog_type ptype; 1270 struct bpf_prog *prog; 1271 struct cgroup *cgrp; 1272 int ret; 1273 1274 if (!capable(CAP_NET_ADMIN)) 1275 return -EPERM; 1276 1277 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1278 return -EINVAL; 1279 1280 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1281 return -EINVAL; 1282 1283 switch (attr->attach_type) { 1284 case BPF_CGROUP_INET_INGRESS: 1285 case BPF_CGROUP_INET_EGRESS: 1286 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1287 break; 1288 case BPF_CGROUP_INET_SOCK_CREATE: 1289 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1290 break; 1291 case BPF_CGROUP_SOCK_OPS: 1292 ptype = BPF_PROG_TYPE_SOCK_OPS; 1293 break; 1294 case BPF_SK_SKB_STREAM_PARSER: 1295 case BPF_SK_SKB_STREAM_VERDICT: 1296 return sockmap_get_from_fd(attr, true); 1297 default: 1298 return -EINVAL; 1299 } 1300 1301 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1302 if (IS_ERR(prog)) 1303 return PTR_ERR(prog); 1304 1305 cgrp = cgroup_get_from_fd(attr->target_fd); 1306 if (IS_ERR(cgrp)) { 1307 bpf_prog_put(prog); 1308 return PTR_ERR(cgrp); 1309 } 1310 1311 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1312 attr->attach_flags); 1313 if (ret) 1314 bpf_prog_put(prog); 1315 cgroup_put(cgrp); 1316 1317 return ret; 1318 } 1319 1320 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1321 1322 static int bpf_prog_detach(const union bpf_attr *attr) 1323 { 1324 enum bpf_prog_type ptype; 1325 struct bpf_prog *prog; 1326 struct cgroup *cgrp; 1327 int ret; 1328 1329 if (!capable(CAP_NET_ADMIN)) 1330 return -EPERM; 1331 1332 if (CHECK_ATTR(BPF_PROG_DETACH)) 1333 return -EINVAL; 1334 1335 switch (attr->attach_type) { 1336 case BPF_CGROUP_INET_INGRESS: 1337 case BPF_CGROUP_INET_EGRESS: 1338 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1339 break; 1340 case BPF_CGROUP_INET_SOCK_CREATE: 1341 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1342 break; 1343 case BPF_CGROUP_SOCK_OPS: 1344 ptype = BPF_PROG_TYPE_SOCK_OPS; 1345 break; 1346 case BPF_SK_SKB_STREAM_PARSER: 1347 case BPF_SK_SKB_STREAM_VERDICT: 1348 return sockmap_get_from_fd(attr, false); 1349 default: 1350 return -EINVAL; 1351 } 1352 1353 cgrp = cgroup_get_from_fd(attr->target_fd); 1354 if (IS_ERR(cgrp)) 1355 return PTR_ERR(cgrp); 1356 1357 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1358 if (IS_ERR(prog)) 1359 prog = NULL; 1360 1361 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1362 if (prog) 1363 bpf_prog_put(prog); 1364 cgroup_put(cgrp); 1365 return ret; 1366 } 1367 1368 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1369 1370 static int bpf_prog_query(const union bpf_attr *attr, 1371 union bpf_attr __user *uattr) 1372 { 1373 struct cgroup *cgrp; 1374 int ret; 1375 1376 if (!capable(CAP_NET_ADMIN)) 1377 return -EPERM; 1378 if (CHECK_ATTR(BPF_PROG_QUERY)) 1379 return -EINVAL; 1380 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1381 return -EINVAL; 1382 1383 switch (attr->query.attach_type) { 1384 case BPF_CGROUP_INET_INGRESS: 1385 case BPF_CGROUP_INET_EGRESS: 1386 case BPF_CGROUP_INET_SOCK_CREATE: 1387 case BPF_CGROUP_SOCK_OPS: 1388 break; 1389 default: 1390 return -EINVAL; 1391 } 1392 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1393 if (IS_ERR(cgrp)) 1394 return PTR_ERR(cgrp); 1395 ret = cgroup_bpf_query(cgrp, attr, uattr); 1396 cgroup_put(cgrp); 1397 return ret; 1398 } 1399 #endif /* CONFIG_CGROUP_BPF */ 1400 1401 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1402 1403 static int bpf_prog_test_run(const union bpf_attr *attr, 1404 union bpf_attr __user *uattr) 1405 { 1406 struct bpf_prog *prog; 1407 int ret = -ENOTSUPP; 1408 1409 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1410 return -EINVAL; 1411 1412 prog = bpf_prog_get(attr->test.prog_fd); 1413 if (IS_ERR(prog)) 1414 return PTR_ERR(prog); 1415 1416 if (prog->aux->ops->test_run) 1417 ret = prog->aux->ops->test_run(prog, attr, uattr); 1418 1419 bpf_prog_put(prog); 1420 return ret; 1421 } 1422 1423 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1424 1425 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1426 union bpf_attr __user *uattr, 1427 struct idr *idr, 1428 spinlock_t *lock) 1429 { 1430 u32 next_id = attr->start_id; 1431 int err = 0; 1432 1433 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1434 return -EINVAL; 1435 1436 if (!capable(CAP_SYS_ADMIN)) 1437 return -EPERM; 1438 1439 next_id++; 1440 spin_lock_bh(lock); 1441 if (!idr_get_next(idr, &next_id)) 1442 err = -ENOENT; 1443 spin_unlock_bh(lock); 1444 1445 if (!err) 1446 err = put_user(next_id, &uattr->next_id); 1447 1448 return err; 1449 } 1450 1451 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1452 1453 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1454 { 1455 struct bpf_prog *prog; 1456 u32 id = attr->prog_id; 1457 int fd; 1458 1459 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1460 return -EINVAL; 1461 1462 if (!capable(CAP_SYS_ADMIN)) 1463 return -EPERM; 1464 1465 spin_lock_bh(&prog_idr_lock); 1466 prog = idr_find(&prog_idr, id); 1467 if (prog) 1468 prog = bpf_prog_inc_not_zero(prog); 1469 else 1470 prog = ERR_PTR(-ENOENT); 1471 spin_unlock_bh(&prog_idr_lock); 1472 1473 if (IS_ERR(prog)) 1474 return PTR_ERR(prog); 1475 1476 fd = bpf_prog_new_fd(prog); 1477 if (fd < 0) 1478 bpf_prog_put(prog); 1479 1480 return fd; 1481 } 1482 1483 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 1484 1485 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1486 { 1487 struct bpf_map *map; 1488 u32 id = attr->map_id; 1489 int f_flags; 1490 int fd; 1491 1492 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 1493 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 1494 return -EINVAL; 1495 1496 if (!capable(CAP_SYS_ADMIN)) 1497 return -EPERM; 1498 1499 f_flags = bpf_get_file_flag(attr->open_flags); 1500 if (f_flags < 0) 1501 return f_flags; 1502 1503 spin_lock_bh(&map_idr_lock); 1504 map = idr_find(&map_idr, id); 1505 if (map) 1506 map = bpf_map_inc_not_zero(map, true); 1507 else 1508 map = ERR_PTR(-ENOENT); 1509 spin_unlock_bh(&map_idr_lock); 1510 1511 if (IS_ERR(map)) 1512 return PTR_ERR(map); 1513 1514 fd = bpf_map_new_fd(map, f_flags); 1515 if (fd < 0) 1516 bpf_map_put(map); 1517 1518 return fd; 1519 } 1520 1521 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1522 const union bpf_attr *attr, 1523 union bpf_attr __user *uattr) 1524 { 1525 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1526 struct bpf_prog_info info = {}; 1527 u32 info_len = attr->info.info_len; 1528 char __user *uinsns; 1529 u32 ulen; 1530 int err; 1531 1532 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1533 if (err) 1534 return err; 1535 info_len = min_t(u32, sizeof(info), info_len); 1536 1537 if (copy_from_user(&info, uinfo, info_len)) 1538 return -EFAULT; 1539 1540 info.type = prog->type; 1541 info.id = prog->aux->id; 1542 info.load_time = prog->aux->load_time; 1543 info.created_by_uid = from_kuid_munged(current_user_ns(), 1544 prog->aux->user->uid); 1545 1546 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1547 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1548 1549 ulen = info.nr_map_ids; 1550 info.nr_map_ids = prog->aux->used_map_cnt; 1551 ulen = min_t(u32, info.nr_map_ids, ulen); 1552 if (ulen) { 1553 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1554 u32 i; 1555 1556 for (i = 0; i < ulen; i++) 1557 if (put_user(prog->aux->used_maps[i]->id, 1558 &user_map_ids[i])) 1559 return -EFAULT; 1560 } 1561 1562 if (!capable(CAP_SYS_ADMIN)) { 1563 info.jited_prog_len = 0; 1564 info.xlated_prog_len = 0; 1565 goto done; 1566 } 1567 1568 ulen = info.jited_prog_len; 1569 info.jited_prog_len = prog->jited_len; 1570 if (info.jited_prog_len && ulen) { 1571 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1572 ulen = min_t(u32, info.jited_prog_len, ulen); 1573 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1574 return -EFAULT; 1575 } 1576 1577 ulen = info.xlated_prog_len; 1578 info.xlated_prog_len = bpf_prog_insn_size(prog); 1579 if (info.xlated_prog_len && ulen) { 1580 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1581 ulen = min_t(u32, info.xlated_prog_len, ulen); 1582 if (copy_to_user(uinsns, prog->insnsi, ulen)) 1583 return -EFAULT; 1584 } 1585 1586 done: 1587 if (copy_to_user(uinfo, &info, info_len) || 1588 put_user(info_len, &uattr->info.info_len)) 1589 return -EFAULT; 1590 1591 return 0; 1592 } 1593 1594 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1595 const union bpf_attr *attr, 1596 union bpf_attr __user *uattr) 1597 { 1598 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1599 struct bpf_map_info info = {}; 1600 u32 info_len = attr->info.info_len; 1601 int err; 1602 1603 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1604 if (err) 1605 return err; 1606 info_len = min_t(u32, sizeof(info), info_len); 1607 1608 info.type = map->map_type; 1609 info.id = map->id; 1610 info.key_size = map->key_size; 1611 info.value_size = map->value_size; 1612 info.max_entries = map->max_entries; 1613 info.map_flags = map->map_flags; 1614 memcpy(info.name, map->name, sizeof(map->name)); 1615 1616 if (copy_to_user(uinfo, &info, info_len) || 1617 put_user(info_len, &uattr->info.info_len)) 1618 return -EFAULT; 1619 1620 return 0; 1621 } 1622 1623 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1624 1625 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1626 union bpf_attr __user *uattr) 1627 { 1628 int ufd = attr->info.bpf_fd; 1629 struct fd f; 1630 int err; 1631 1632 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1633 return -EINVAL; 1634 1635 f = fdget(ufd); 1636 if (!f.file) 1637 return -EBADFD; 1638 1639 if (f.file->f_op == &bpf_prog_fops) 1640 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1641 uattr); 1642 else if (f.file->f_op == &bpf_map_fops) 1643 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1644 uattr); 1645 else 1646 err = -EINVAL; 1647 1648 fdput(f); 1649 return err; 1650 } 1651 1652 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1653 { 1654 union bpf_attr attr = {}; 1655 int err; 1656 1657 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1658 return -EPERM; 1659 1660 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1661 if (err) 1662 return err; 1663 size = min_t(u32, size, sizeof(attr)); 1664 1665 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1666 if (copy_from_user(&attr, uattr, size) != 0) 1667 return -EFAULT; 1668 1669 err = security_bpf(cmd, &attr, size); 1670 if (err < 0) 1671 return err; 1672 1673 switch (cmd) { 1674 case BPF_MAP_CREATE: 1675 err = map_create(&attr); 1676 break; 1677 case BPF_MAP_LOOKUP_ELEM: 1678 err = map_lookup_elem(&attr); 1679 break; 1680 case BPF_MAP_UPDATE_ELEM: 1681 err = map_update_elem(&attr); 1682 break; 1683 case BPF_MAP_DELETE_ELEM: 1684 err = map_delete_elem(&attr); 1685 break; 1686 case BPF_MAP_GET_NEXT_KEY: 1687 err = map_get_next_key(&attr); 1688 break; 1689 case BPF_PROG_LOAD: 1690 err = bpf_prog_load(&attr); 1691 break; 1692 case BPF_OBJ_PIN: 1693 err = bpf_obj_pin(&attr); 1694 break; 1695 case BPF_OBJ_GET: 1696 err = bpf_obj_get(&attr); 1697 break; 1698 #ifdef CONFIG_CGROUP_BPF 1699 case BPF_PROG_ATTACH: 1700 err = bpf_prog_attach(&attr); 1701 break; 1702 case BPF_PROG_DETACH: 1703 err = bpf_prog_detach(&attr); 1704 break; 1705 case BPF_PROG_QUERY: 1706 err = bpf_prog_query(&attr, uattr); 1707 break; 1708 #endif 1709 case BPF_PROG_TEST_RUN: 1710 err = bpf_prog_test_run(&attr, uattr); 1711 break; 1712 case BPF_PROG_GET_NEXT_ID: 1713 err = bpf_obj_get_next_id(&attr, uattr, 1714 &prog_idr, &prog_idr_lock); 1715 break; 1716 case BPF_MAP_GET_NEXT_ID: 1717 err = bpf_obj_get_next_id(&attr, uattr, 1718 &map_idr, &map_idr_lock); 1719 break; 1720 case BPF_PROG_GET_FD_BY_ID: 1721 err = bpf_prog_get_fd_by_id(&attr); 1722 break; 1723 case BPF_MAP_GET_FD_BY_ID: 1724 err = bpf_map_get_fd_by_id(&attr); 1725 break; 1726 case BPF_OBJ_GET_INFO_BY_FD: 1727 err = bpf_obj_get_info_by_fd(&attr, uattr); 1728 break; 1729 default: 1730 err = -EINVAL; 1731 break; 1732 } 1733 1734 return err; 1735 } 1736