1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 * 7 * This program is distributed in the hope that it will be useful, but 8 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * General Public License for more details. 11 */ 12 #include <linux/bpf.h> 13 #include <linux/bpf_trace.h> 14 #include <linux/syscalls.h> 15 #include <linux/slab.h> 16 #include <linux/sched/signal.h> 17 #include <linux/vmalloc.h> 18 #include <linux/mmzone.h> 19 #include <linux/anon_inodes.h> 20 #include <linux/file.h> 21 #include <linux/license.h> 22 #include <linux/filter.h> 23 #include <linux/version.h> 24 #include <linux/kernel.h> 25 #include <linux/idr.h> 26 #include <linux/cred.h> 27 #include <linux/timekeeping.h> 28 #include <linux/ctype.h> 29 30 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 31 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 32 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 33 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 34 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 35 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) 36 37 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 38 39 DEFINE_PER_CPU(int, bpf_prog_active); 40 static DEFINE_IDR(prog_idr); 41 static DEFINE_SPINLOCK(prog_idr_lock); 42 static DEFINE_IDR(map_idr); 43 static DEFINE_SPINLOCK(map_idr_lock); 44 45 int sysctl_unprivileged_bpf_disabled __read_mostly; 46 47 static const struct bpf_map_ops * const bpf_map_types[] = { 48 #define BPF_PROG_TYPE(_id, _ops) 49 #define BPF_MAP_TYPE(_id, _ops) \ 50 [_id] = &_ops, 51 #include <linux/bpf_types.h> 52 #undef BPF_PROG_TYPE 53 #undef BPF_MAP_TYPE 54 }; 55 56 /* 57 * If we're handed a bigger struct than we know of, ensure all the unknown bits 58 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 59 * we don't know about yet. 60 * 61 * There is a ToCToU between this function call and the following 62 * copy_from_user() call. However, this is not a concern since this function is 63 * meant to be a future-proofing of bits. 64 */ 65 static int check_uarg_tail_zero(void __user *uaddr, 66 size_t expected_size, 67 size_t actual_size) 68 { 69 unsigned char __user *addr; 70 unsigned char __user *end; 71 unsigned char val; 72 int err; 73 74 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 75 return -E2BIG; 76 77 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) 78 return -EFAULT; 79 80 if (actual_size <= expected_size) 81 return 0; 82 83 addr = uaddr + expected_size; 84 end = uaddr + actual_size; 85 86 for (; addr < end; addr++) { 87 err = get_user(val, addr); 88 if (err) 89 return err; 90 if (val) 91 return -E2BIG; 92 } 93 94 return 0; 95 } 96 97 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 98 { 99 struct bpf_map *map; 100 101 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || 102 !bpf_map_types[attr->map_type]) 103 return ERR_PTR(-EINVAL); 104 105 map = bpf_map_types[attr->map_type]->map_alloc(attr); 106 if (IS_ERR(map)) 107 return map; 108 map->ops = bpf_map_types[attr->map_type]; 109 map->map_type = attr->map_type; 110 return map; 111 } 112 113 void *bpf_map_area_alloc(size_t size, int numa_node) 114 { 115 /* We definitely need __GFP_NORETRY, so OOM killer doesn't 116 * trigger under memory pressure as we really just want to 117 * fail instead. 118 */ 119 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; 120 void *area; 121 122 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 123 area = kmalloc_node(size, GFP_USER | flags, numa_node); 124 if (area != NULL) 125 return area; 126 } 127 128 return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, 129 __builtin_return_address(0)); 130 } 131 132 void bpf_map_area_free(void *area) 133 { 134 kvfree(area); 135 } 136 137 int bpf_map_precharge_memlock(u32 pages) 138 { 139 struct user_struct *user = get_current_user(); 140 unsigned long memlock_limit, cur; 141 142 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 143 cur = atomic_long_read(&user->locked_vm); 144 free_uid(user); 145 if (cur + pages > memlock_limit) 146 return -EPERM; 147 return 0; 148 } 149 150 static int bpf_map_charge_memlock(struct bpf_map *map) 151 { 152 struct user_struct *user = get_current_user(); 153 unsigned long memlock_limit; 154 155 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 156 157 atomic_long_add(map->pages, &user->locked_vm); 158 159 if (atomic_long_read(&user->locked_vm) > memlock_limit) { 160 atomic_long_sub(map->pages, &user->locked_vm); 161 free_uid(user); 162 return -EPERM; 163 } 164 map->user = user; 165 return 0; 166 } 167 168 static void bpf_map_uncharge_memlock(struct bpf_map *map) 169 { 170 struct user_struct *user = map->user; 171 172 atomic_long_sub(map->pages, &user->locked_vm); 173 free_uid(user); 174 } 175 176 static int bpf_map_alloc_id(struct bpf_map *map) 177 { 178 int id; 179 180 spin_lock_bh(&map_idr_lock); 181 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 182 if (id > 0) 183 map->id = id; 184 spin_unlock_bh(&map_idr_lock); 185 186 if (WARN_ON_ONCE(!id)) 187 return -ENOSPC; 188 189 return id > 0 ? 0 : id; 190 } 191 192 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 193 { 194 unsigned long flags; 195 196 if (do_idr_lock) 197 spin_lock_irqsave(&map_idr_lock, flags); 198 else 199 __acquire(&map_idr_lock); 200 201 idr_remove(&map_idr, map->id); 202 203 if (do_idr_lock) 204 spin_unlock_irqrestore(&map_idr_lock, flags); 205 else 206 __release(&map_idr_lock); 207 } 208 209 /* called from workqueue */ 210 static void bpf_map_free_deferred(struct work_struct *work) 211 { 212 struct bpf_map *map = container_of(work, struct bpf_map, work); 213 214 bpf_map_uncharge_memlock(map); 215 security_bpf_map_free(map); 216 /* implementation dependent freeing */ 217 map->ops->map_free(map); 218 } 219 220 static void bpf_map_put_uref(struct bpf_map *map) 221 { 222 if (atomic_dec_and_test(&map->usercnt)) { 223 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) 224 bpf_fd_array_map_clear(map); 225 } 226 } 227 228 /* decrement map refcnt and schedule it for freeing via workqueue 229 * (unrelying map implementation ops->map_free() might sleep) 230 */ 231 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 232 { 233 if (atomic_dec_and_test(&map->refcnt)) { 234 /* bpf_map_free_id() must be called first */ 235 bpf_map_free_id(map, do_idr_lock); 236 INIT_WORK(&map->work, bpf_map_free_deferred); 237 schedule_work(&map->work); 238 } 239 } 240 241 void bpf_map_put(struct bpf_map *map) 242 { 243 __bpf_map_put(map, true); 244 } 245 246 void bpf_map_put_with_uref(struct bpf_map *map) 247 { 248 bpf_map_put_uref(map); 249 bpf_map_put(map); 250 } 251 252 static int bpf_map_release(struct inode *inode, struct file *filp) 253 { 254 struct bpf_map *map = filp->private_data; 255 256 if (map->ops->map_release) 257 map->ops->map_release(map, filp); 258 259 bpf_map_put_with_uref(map); 260 return 0; 261 } 262 263 #ifdef CONFIG_PROC_FS 264 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 265 { 266 const struct bpf_map *map = filp->private_data; 267 const struct bpf_array *array; 268 u32 owner_prog_type = 0; 269 u32 owner_jited = 0; 270 271 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 272 array = container_of(map, struct bpf_array, map); 273 owner_prog_type = array->owner_prog_type; 274 owner_jited = array->owner_jited; 275 } 276 277 seq_printf(m, 278 "map_type:\t%u\n" 279 "key_size:\t%u\n" 280 "value_size:\t%u\n" 281 "max_entries:\t%u\n" 282 "map_flags:\t%#x\n" 283 "memlock:\t%llu\n", 284 map->map_type, 285 map->key_size, 286 map->value_size, 287 map->max_entries, 288 map->map_flags, 289 map->pages * 1ULL << PAGE_SHIFT); 290 291 if (owner_prog_type) { 292 seq_printf(m, "owner_prog_type:\t%u\n", 293 owner_prog_type); 294 seq_printf(m, "owner_jited:\t%u\n", 295 owner_jited); 296 } 297 } 298 #endif 299 300 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 301 loff_t *ppos) 302 { 303 /* We need this handler such that alloc_file() enables 304 * f_mode with FMODE_CAN_READ. 305 */ 306 return -EINVAL; 307 } 308 309 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 310 size_t siz, loff_t *ppos) 311 { 312 /* We need this handler such that alloc_file() enables 313 * f_mode with FMODE_CAN_WRITE. 314 */ 315 return -EINVAL; 316 } 317 318 const struct file_operations bpf_map_fops = { 319 #ifdef CONFIG_PROC_FS 320 .show_fdinfo = bpf_map_show_fdinfo, 321 #endif 322 .release = bpf_map_release, 323 .read = bpf_dummy_read, 324 .write = bpf_dummy_write, 325 }; 326 327 int bpf_map_new_fd(struct bpf_map *map, int flags) 328 { 329 int ret; 330 331 ret = security_bpf_map(map, OPEN_FMODE(flags)); 332 if (ret < 0) 333 return ret; 334 335 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 336 flags | O_CLOEXEC); 337 } 338 339 int bpf_get_file_flag(int flags) 340 { 341 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 342 return -EINVAL; 343 if (flags & BPF_F_RDONLY) 344 return O_RDONLY; 345 if (flags & BPF_F_WRONLY) 346 return O_WRONLY; 347 return O_RDWR; 348 } 349 350 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 351 #define CHECK_ATTR(CMD) \ 352 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 353 sizeof(attr->CMD##_LAST_FIELD), 0, \ 354 sizeof(*attr) - \ 355 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 356 sizeof(attr->CMD##_LAST_FIELD)) != NULL 357 358 /* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. 359 * Return 0 on success and < 0 on error. 360 */ 361 static int bpf_obj_name_cpy(char *dst, const char *src) 362 { 363 const char *end = src + BPF_OBJ_NAME_LEN; 364 365 memset(dst, 0, BPF_OBJ_NAME_LEN); 366 367 /* Copy all isalnum() and '_' char */ 368 while (src < end && *src) { 369 if (!isalnum(*src) && *src != '_') 370 return -EINVAL; 371 *dst++ = *src++; 372 } 373 374 /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ 375 if (src == end) 376 return -EINVAL; 377 378 return 0; 379 } 380 381 #define BPF_MAP_CREATE_LAST_FIELD map_name 382 /* called via syscall */ 383 static int map_create(union bpf_attr *attr) 384 { 385 int numa_node = bpf_map_attr_numa_node(attr); 386 struct bpf_map *map; 387 int f_flags; 388 int err; 389 390 err = CHECK_ATTR(BPF_MAP_CREATE); 391 if (err) 392 return -EINVAL; 393 394 f_flags = bpf_get_file_flag(attr->map_flags); 395 if (f_flags < 0) 396 return f_flags; 397 398 if (numa_node != NUMA_NO_NODE && 399 ((unsigned int)numa_node >= nr_node_ids || 400 !node_online(numa_node))) 401 return -EINVAL; 402 403 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 404 map = find_and_alloc_map(attr); 405 if (IS_ERR(map)) 406 return PTR_ERR(map); 407 408 err = bpf_obj_name_cpy(map->name, attr->map_name); 409 if (err) 410 goto free_map_nouncharge; 411 412 atomic_set(&map->refcnt, 1); 413 atomic_set(&map->usercnt, 1); 414 415 err = security_bpf_map_alloc(map); 416 if (err) 417 goto free_map_nouncharge; 418 419 err = bpf_map_charge_memlock(map); 420 if (err) 421 goto free_map_sec; 422 423 err = bpf_map_alloc_id(map); 424 if (err) 425 goto free_map; 426 427 err = bpf_map_new_fd(map, f_flags); 428 if (err < 0) { 429 /* failed to allocate fd. 430 * bpf_map_put() is needed because the above 431 * bpf_map_alloc_id() has published the map 432 * to the userspace and the userspace may 433 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 434 */ 435 bpf_map_put(map); 436 return err; 437 } 438 439 trace_bpf_map_create(map, err); 440 return err; 441 442 free_map: 443 bpf_map_uncharge_memlock(map); 444 free_map_sec: 445 security_bpf_map_free(map); 446 free_map_nouncharge: 447 map->ops->map_free(map); 448 return err; 449 } 450 451 /* if error is returned, fd is released. 452 * On success caller should complete fd access with matching fdput() 453 */ 454 struct bpf_map *__bpf_map_get(struct fd f) 455 { 456 if (!f.file) 457 return ERR_PTR(-EBADF); 458 if (f.file->f_op != &bpf_map_fops) { 459 fdput(f); 460 return ERR_PTR(-EINVAL); 461 } 462 463 return f.file->private_data; 464 } 465 466 /* prog's and map's refcnt limit */ 467 #define BPF_MAX_REFCNT 32768 468 469 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 470 { 471 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 472 atomic_dec(&map->refcnt); 473 return ERR_PTR(-EBUSY); 474 } 475 if (uref) 476 atomic_inc(&map->usercnt); 477 return map; 478 } 479 480 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 481 { 482 struct fd f = fdget(ufd); 483 struct bpf_map *map; 484 485 map = __bpf_map_get(f); 486 if (IS_ERR(map)) 487 return map; 488 489 map = bpf_map_inc(map, true); 490 fdput(f); 491 492 return map; 493 } 494 495 /* map_idr_lock should have been held */ 496 static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, 497 bool uref) 498 { 499 int refold; 500 501 refold = __atomic_add_unless(&map->refcnt, 1, 0); 502 503 if (refold >= BPF_MAX_REFCNT) { 504 __bpf_map_put(map, false); 505 return ERR_PTR(-EBUSY); 506 } 507 508 if (!refold) 509 return ERR_PTR(-ENOENT); 510 511 if (uref) 512 atomic_inc(&map->usercnt); 513 514 return map; 515 } 516 517 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 518 { 519 return -ENOTSUPP; 520 } 521 522 /* last field in 'union bpf_attr' used by this command */ 523 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 524 525 static int map_lookup_elem(union bpf_attr *attr) 526 { 527 void __user *ukey = u64_to_user_ptr(attr->key); 528 void __user *uvalue = u64_to_user_ptr(attr->value); 529 int ufd = attr->map_fd; 530 struct bpf_map *map; 531 void *key, *value, *ptr; 532 u32 value_size; 533 struct fd f; 534 int err; 535 536 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 537 return -EINVAL; 538 539 f = fdget(ufd); 540 map = __bpf_map_get(f); 541 if (IS_ERR(map)) 542 return PTR_ERR(map); 543 544 if (!(f.file->f_mode & FMODE_CAN_READ)) { 545 err = -EPERM; 546 goto err_put; 547 } 548 549 key = memdup_user(ukey, map->key_size); 550 if (IS_ERR(key)) { 551 err = PTR_ERR(key); 552 goto err_put; 553 } 554 555 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 556 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 557 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 558 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 559 else if (IS_FD_MAP(map)) 560 value_size = sizeof(u32); 561 else 562 value_size = map->value_size; 563 564 err = -ENOMEM; 565 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 566 if (!value) 567 goto free_key; 568 569 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 570 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 571 err = bpf_percpu_hash_copy(map, key, value); 572 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 573 err = bpf_percpu_array_copy(map, key, value); 574 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 575 err = bpf_stackmap_copy(map, key, value); 576 } else if (IS_FD_ARRAY(map)) { 577 err = bpf_fd_array_map_lookup_elem(map, key, value); 578 } else if (IS_FD_HASH(map)) { 579 err = bpf_fd_htab_map_lookup_elem(map, key, value); 580 } else { 581 rcu_read_lock(); 582 ptr = map->ops->map_lookup_elem(map, key); 583 if (ptr) 584 memcpy(value, ptr, value_size); 585 rcu_read_unlock(); 586 err = ptr ? 0 : -ENOENT; 587 } 588 589 if (err) 590 goto free_value; 591 592 err = -EFAULT; 593 if (copy_to_user(uvalue, value, value_size) != 0) 594 goto free_value; 595 596 trace_bpf_map_lookup_elem(map, ufd, key, value); 597 err = 0; 598 599 free_value: 600 kfree(value); 601 free_key: 602 kfree(key); 603 err_put: 604 fdput(f); 605 return err; 606 } 607 608 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 609 610 static int map_update_elem(union bpf_attr *attr) 611 { 612 void __user *ukey = u64_to_user_ptr(attr->key); 613 void __user *uvalue = u64_to_user_ptr(attr->value); 614 int ufd = attr->map_fd; 615 struct bpf_map *map; 616 void *key, *value; 617 u32 value_size; 618 struct fd f; 619 int err; 620 621 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 622 return -EINVAL; 623 624 f = fdget(ufd); 625 map = __bpf_map_get(f); 626 if (IS_ERR(map)) 627 return PTR_ERR(map); 628 629 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 630 err = -EPERM; 631 goto err_put; 632 } 633 634 key = memdup_user(ukey, map->key_size); 635 if (IS_ERR(key)) { 636 err = PTR_ERR(key); 637 goto err_put; 638 } 639 640 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 641 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 642 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 643 value_size = round_up(map->value_size, 8) * num_possible_cpus(); 644 else 645 value_size = map->value_size; 646 647 err = -ENOMEM; 648 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 649 if (!value) 650 goto free_key; 651 652 err = -EFAULT; 653 if (copy_from_user(value, uvalue, value_size) != 0) 654 goto free_value; 655 656 /* Need to create a kthread, thus must support schedule */ 657 if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 658 err = map->ops->map_update_elem(map, key, value, attr->flags); 659 goto out; 660 } 661 662 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 663 * inside bpf map update or delete otherwise deadlocks are possible 664 */ 665 preempt_disable(); 666 __this_cpu_inc(bpf_prog_active); 667 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 668 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 669 err = bpf_percpu_hash_update(map, key, value, attr->flags); 670 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 671 err = bpf_percpu_array_update(map, key, value, attr->flags); 672 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || 673 map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 674 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || 675 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 676 rcu_read_lock(); 677 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 678 attr->flags); 679 rcu_read_unlock(); 680 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 681 rcu_read_lock(); 682 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 683 attr->flags); 684 rcu_read_unlock(); 685 } else { 686 rcu_read_lock(); 687 err = map->ops->map_update_elem(map, key, value, attr->flags); 688 rcu_read_unlock(); 689 } 690 __this_cpu_dec(bpf_prog_active); 691 preempt_enable(); 692 out: 693 if (!err) 694 trace_bpf_map_update_elem(map, ufd, key, value); 695 free_value: 696 kfree(value); 697 free_key: 698 kfree(key); 699 err_put: 700 fdput(f); 701 return err; 702 } 703 704 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 705 706 static int map_delete_elem(union bpf_attr *attr) 707 { 708 void __user *ukey = u64_to_user_ptr(attr->key); 709 int ufd = attr->map_fd; 710 struct bpf_map *map; 711 struct fd f; 712 void *key; 713 int err; 714 715 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 716 return -EINVAL; 717 718 f = fdget(ufd); 719 map = __bpf_map_get(f); 720 if (IS_ERR(map)) 721 return PTR_ERR(map); 722 723 if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 724 err = -EPERM; 725 goto err_put; 726 } 727 728 key = memdup_user(ukey, map->key_size); 729 if (IS_ERR(key)) { 730 err = PTR_ERR(key); 731 goto err_put; 732 } 733 734 preempt_disable(); 735 __this_cpu_inc(bpf_prog_active); 736 rcu_read_lock(); 737 err = map->ops->map_delete_elem(map, key); 738 rcu_read_unlock(); 739 __this_cpu_dec(bpf_prog_active); 740 preempt_enable(); 741 742 if (!err) 743 trace_bpf_map_delete_elem(map, ufd, key); 744 kfree(key); 745 err_put: 746 fdput(f); 747 return err; 748 } 749 750 /* last field in 'union bpf_attr' used by this command */ 751 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 752 753 static int map_get_next_key(union bpf_attr *attr) 754 { 755 void __user *ukey = u64_to_user_ptr(attr->key); 756 void __user *unext_key = u64_to_user_ptr(attr->next_key); 757 int ufd = attr->map_fd; 758 struct bpf_map *map; 759 void *key, *next_key; 760 struct fd f; 761 int err; 762 763 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 764 return -EINVAL; 765 766 f = fdget(ufd); 767 map = __bpf_map_get(f); 768 if (IS_ERR(map)) 769 return PTR_ERR(map); 770 771 if (!(f.file->f_mode & FMODE_CAN_READ)) { 772 err = -EPERM; 773 goto err_put; 774 } 775 776 if (ukey) { 777 key = memdup_user(ukey, map->key_size); 778 if (IS_ERR(key)) { 779 err = PTR_ERR(key); 780 goto err_put; 781 } 782 } else { 783 key = NULL; 784 } 785 786 err = -ENOMEM; 787 next_key = kmalloc(map->key_size, GFP_USER); 788 if (!next_key) 789 goto free_key; 790 791 rcu_read_lock(); 792 err = map->ops->map_get_next_key(map, key, next_key); 793 rcu_read_unlock(); 794 if (err) 795 goto free_next_key; 796 797 err = -EFAULT; 798 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 799 goto free_next_key; 800 801 trace_bpf_map_next_key(map, ufd, key, next_key); 802 err = 0; 803 804 free_next_key: 805 kfree(next_key); 806 free_key: 807 kfree(key); 808 err_put: 809 fdput(f); 810 return err; 811 } 812 813 static const struct bpf_prog_ops * const bpf_prog_types[] = { 814 #define BPF_PROG_TYPE(_id, _name) \ 815 [_id] = & _name ## _prog_ops, 816 #define BPF_MAP_TYPE(_id, _ops) 817 #include <linux/bpf_types.h> 818 #undef BPF_PROG_TYPE 819 #undef BPF_MAP_TYPE 820 }; 821 822 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 823 { 824 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) 825 return -EINVAL; 826 827 if (!bpf_prog_is_dev_bound(prog->aux)) 828 prog->aux->ops = bpf_prog_types[type]; 829 else 830 prog->aux->ops = &bpf_offload_prog_ops; 831 prog->type = type; 832 return 0; 833 } 834 835 /* drop refcnt on maps used by eBPF program and free auxilary data */ 836 static void free_used_maps(struct bpf_prog_aux *aux) 837 { 838 int i; 839 840 for (i = 0; i < aux->used_map_cnt; i++) 841 bpf_map_put(aux->used_maps[i]); 842 843 kfree(aux->used_maps); 844 } 845 846 int __bpf_prog_charge(struct user_struct *user, u32 pages) 847 { 848 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 849 unsigned long user_bufs; 850 851 if (user) { 852 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 853 if (user_bufs > memlock_limit) { 854 atomic_long_sub(pages, &user->locked_vm); 855 return -EPERM; 856 } 857 } 858 859 return 0; 860 } 861 862 void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 863 { 864 if (user) 865 atomic_long_sub(pages, &user->locked_vm); 866 } 867 868 static int bpf_prog_charge_memlock(struct bpf_prog *prog) 869 { 870 struct user_struct *user = get_current_user(); 871 int ret; 872 873 ret = __bpf_prog_charge(user, prog->pages); 874 if (ret) { 875 free_uid(user); 876 return ret; 877 } 878 879 prog->aux->user = user; 880 return 0; 881 } 882 883 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 884 { 885 struct user_struct *user = prog->aux->user; 886 887 __bpf_prog_uncharge(user, prog->pages); 888 free_uid(user); 889 } 890 891 static int bpf_prog_alloc_id(struct bpf_prog *prog) 892 { 893 int id; 894 895 spin_lock_bh(&prog_idr_lock); 896 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 897 if (id > 0) 898 prog->aux->id = id; 899 spin_unlock_bh(&prog_idr_lock); 900 901 /* id is in [1, INT_MAX) */ 902 if (WARN_ON_ONCE(!id)) 903 return -ENOSPC; 904 905 return id > 0 ? 0 : id; 906 } 907 908 static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 909 { 910 /* cBPF to eBPF migrations are currently not in the idr store. */ 911 if (!prog->aux->id) 912 return; 913 914 if (do_idr_lock) 915 spin_lock_bh(&prog_idr_lock); 916 else 917 __acquire(&prog_idr_lock); 918 919 idr_remove(&prog_idr, prog->aux->id); 920 921 if (do_idr_lock) 922 spin_unlock_bh(&prog_idr_lock); 923 else 924 __release(&prog_idr_lock); 925 } 926 927 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 928 { 929 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 930 931 free_used_maps(aux); 932 bpf_prog_uncharge_memlock(aux->prog); 933 security_bpf_prog_free(aux); 934 bpf_prog_free(aux->prog); 935 } 936 937 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 938 { 939 if (atomic_dec_and_test(&prog->aux->refcnt)) { 940 trace_bpf_prog_put_rcu(prog); 941 /* bpf_prog_free_id() must be called first */ 942 bpf_prog_free_id(prog, do_idr_lock); 943 bpf_prog_kallsyms_del(prog); 944 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 945 } 946 } 947 948 void bpf_prog_put(struct bpf_prog *prog) 949 { 950 __bpf_prog_put(prog, true); 951 } 952 EXPORT_SYMBOL_GPL(bpf_prog_put); 953 954 static int bpf_prog_release(struct inode *inode, struct file *filp) 955 { 956 struct bpf_prog *prog = filp->private_data; 957 958 bpf_prog_put(prog); 959 return 0; 960 } 961 962 #ifdef CONFIG_PROC_FS 963 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 964 { 965 const struct bpf_prog *prog = filp->private_data; 966 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 967 968 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 969 seq_printf(m, 970 "prog_type:\t%u\n" 971 "prog_jited:\t%u\n" 972 "prog_tag:\t%s\n" 973 "memlock:\t%llu\n", 974 prog->type, 975 prog->jited, 976 prog_tag, 977 prog->pages * 1ULL << PAGE_SHIFT); 978 } 979 #endif 980 981 const struct file_operations bpf_prog_fops = { 982 #ifdef CONFIG_PROC_FS 983 .show_fdinfo = bpf_prog_show_fdinfo, 984 #endif 985 .release = bpf_prog_release, 986 .read = bpf_dummy_read, 987 .write = bpf_dummy_write, 988 }; 989 990 int bpf_prog_new_fd(struct bpf_prog *prog) 991 { 992 int ret; 993 994 ret = security_bpf_prog(prog); 995 if (ret < 0) 996 return ret; 997 998 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 999 O_RDWR | O_CLOEXEC); 1000 } 1001 1002 static struct bpf_prog *____bpf_prog_get(struct fd f) 1003 { 1004 if (!f.file) 1005 return ERR_PTR(-EBADF); 1006 if (f.file->f_op != &bpf_prog_fops) { 1007 fdput(f); 1008 return ERR_PTR(-EINVAL); 1009 } 1010 1011 return f.file->private_data; 1012 } 1013 1014 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1015 { 1016 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1017 atomic_sub(i, &prog->aux->refcnt); 1018 return ERR_PTR(-EBUSY); 1019 } 1020 return prog; 1021 } 1022 EXPORT_SYMBOL_GPL(bpf_prog_add); 1023 1024 void bpf_prog_sub(struct bpf_prog *prog, int i) 1025 { 1026 /* Only to be used for undoing previous bpf_prog_add() in some 1027 * error path. We still know that another entity in our call 1028 * path holds a reference to the program, thus atomic_sub() can 1029 * be safely used in such cases! 1030 */ 1031 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1032 } 1033 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1034 1035 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1036 { 1037 return bpf_prog_add(prog, 1); 1038 } 1039 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1040 1041 /* prog_idr_lock should have been held */ 1042 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1043 { 1044 int refold; 1045 1046 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0); 1047 1048 if (refold >= BPF_MAX_REFCNT) { 1049 __bpf_prog_put(prog, false); 1050 return ERR_PTR(-EBUSY); 1051 } 1052 1053 if (!refold) 1054 return ERR_PTR(-ENOENT); 1055 1056 return prog; 1057 } 1058 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1059 1060 static bool bpf_prog_get_ok(struct bpf_prog *prog, 1061 enum bpf_prog_type *attach_type, bool attach_drv) 1062 { 1063 /* not an attachment, just a refcount inc, always allow */ 1064 if (!attach_type) 1065 return true; 1066 1067 if (prog->type != *attach_type) 1068 return false; 1069 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) 1070 return false; 1071 1072 return true; 1073 } 1074 1075 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 1076 bool attach_drv) 1077 { 1078 struct fd f = fdget(ufd); 1079 struct bpf_prog *prog; 1080 1081 prog = ____bpf_prog_get(f); 1082 if (IS_ERR(prog)) 1083 return prog; 1084 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 1085 prog = ERR_PTR(-EINVAL); 1086 goto out; 1087 } 1088 1089 prog = bpf_prog_inc(prog); 1090 out: 1091 fdput(f); 1092 return prog; 1093 } 1094 1095 struct bpf_prog *bpf_prog_get(u32 ufd) 1096 { 1097 return __bpf_prog_get(ufd, NULL, false); 1098 } 1099 1100 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 1101 bool attach_drv) 1102 { 1103 struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); 1104 1105 if (!IS_ERR(prog)) 1106 trace_bpf_prog_get_type(prog); 1107 return prog; 1108 } 1109 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1110 1111 /* last field in 'union bpf_attr' used by this command */ 1112 #define BPF_PROG_LOAD_LAST_FIELD prog_ifindex 1113 1114 static int bpf_prog_load(union bpf_attr *attr) 1115 { 1116 enum bpf_prog_type type = attr->prog_type; 1117 struct bpf_prog *prog; 1118 int err; 1119 char license[128]; 1120 bool is_gpl; 1121 1122 if (CHECK_ATTR(BPF_PROG_LOAD)) 1123 return -EINVAL; 1124 1125 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) 1126 return -EINVAL; 1127 1128 /* copy eBPF program license from user space */ 1129 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), 1130 sizeof(license) - 1) < 0) 1131 return -EFAULT; 1132 license[sizeof(license) - 1] = 0; 1133 1134 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 1135 is_gpl = license_is_gpl_compatible(license); 1136 1137 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) 1138 return -E2BIG; 1139 1140 if (type == BPF_PROG_TYPE_KPROBE && 1141 attr->kern_version != LINUX_VERSION_CODE) 1142 return -EINVAL; 1143 1144 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 1145 type != BPF_PROG_TYPE_CGROUP_SKB && 1146 !capable(CAP_SYS_ADMIN)) 1147 return -EPERM; 1148 1149 /* plain bpf_prog allocation */ 1150 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 1151 if (!prog) 1152 return -ENOMEM; 1153 1154 err = security_bpf_prog_alloc(prog->aux); 1155 if (err) 1156 goto free_prog_nouncharge; 1157 1158 err = bpf_prog_charge_memlock(prog); 1159 if (err) 1160 goto free_prog_sec; 1161 1162 prog->len = attr->insn_cnt; 1163 1164 err = -EFAULT; 1165 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), 1166 bpf_prog_insn_size(prog)) != 0) 1167 goto free_prog; 1168 1169 prog->orig_prog = NULL; 1170 prog->jited = 0; 1171 1172 atomic_set(&prog->aux->refcnt, 1); 1173 prog->gpl_compatible = is_gpl ? 1 : 0; 1174 1175 if (attr->prog_ifindex) { 1176 err = bpf_prog_offload_init(prog, attr); 1177 if (err) 1178 goto free_prog; 1179 } 1180 1181 /* find program type: socket_filter vs tracing_filter */ 1182 err = find_prog_type(type, prog); 1183 if (err < 0) 1184 goto free_prog; 1185 1186 prog->aux->load_time = ktime_get_boot_ns(); 1187 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); 1188 if (err) 1189 goto free_prog; 1190 1191 /* run eBPF verifier */ 1192 err = bpf_check(&prog, attr); 1193 if (err < 0) 1194 goto free_used_maps; 1195 1196 /* eBPF program is ready to be JITed */ 1197 prog = bpf_prog_select_runtime(prog, &err); 1198 if (err < 0) 1199 goto free_used_maps; 1200 1201 err = bpf_prog_alloc_id(prog); 1202 if (err) 1203 goto free_used_maps; 1204 1205 err = bpf_prog_new_fd(prog); 1206 if (err < 0) { 1207 /* failed to allocate fd. 1208 * bpf_prog_put() is needed because the above 1209 * bpf_prog_alloc_id() has published the prog 1210 * to the userspace and the userspace may 1211 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. 1212 */ 1213 bpf_prog_put(prog); 1214 return err; 1215 } 1216 1217 bpf_prog_kallsyms_add(prog); 1218 trace_bpf_prog_load(prog, err); 1219 return err; 1220 1221 free_used_maps: 1222 free_used_maps(prog->aux); 1223 free_prog: 1224 bpf_prog_uncharge_memlock(prog); 1225 free_prog_sec: 1226 security_bpf_prog_free(prog->aux); 1227 free_prog_nouncharge: 1228 bpf_prog_free(prog); 1229 return err; 1230 } 1231 1232 #define BPF_OBJ_LAST_FIELD file_flags 1233 1234 static int bpf_obj_pin(const union bpf_attr *attr) 1235 { 1236 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 1237 return -EINVAL; 1238 1239 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 1240 } 1241 1242 static int bpf_obj_get(const union bpf_attr *attr) 1243 { 1244 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 1245 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 1246 return -EINVAL; 1247 1248 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1249 attr->file_flags); 1250 } 1251 1252 #ifdef CONFIG_CGROUP_BPF 1253 1254 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1255 1256 static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) 1257 { 1258 struct bpf_prog *prog = NULL; 1259 int ufd = attr->target_fd; 1260 struct bpf_map *map; 1261 struct fd f; 1262 int err; 1263 1264 f = fdget(ufd); 1265 map = __bpf_map_get(f); 1266 if (IS_ERR(map)) 1267 return PTR_ERR(map); 1268 1269 if (attach) { 1270 prog = bpf_prog_get_type(attr->attach_bpf_fd, 1271 BPF_PROG_TYPE_SK_SKB); 1272 if (IS_ERR(prog)) { 1273 fdput(f); 1274 return PTR_ERR(prog); 1275 } 1276 } 1277 1278 err = sock_map_prog(map, prog, attr->attach_type); 1279 if (err) { 1280 fdput(f); 1281 if (prog) 1282 bpf_prog_put(prog); 1283 return err; 1284 } 1285 1286 fdput(f); 1287 return 0; 1288 } 1289 1290 #define BPF_F_ATTACH_MASK \ 1291 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1292 1293 static int bpf_prog_attach(const union bpf_attr *attr) 1294 { 1295 enum bpf_prog_type ptype; 1296 struct bpf_prog *prog; 1297 struct cgroup *cgrp; 1298 int ret; 1299 1300 if (!capable(CAP_NET_ADMIN)) 1301 return -EPERM; 1302 1303 if (CHECK_ATTR(BPF_PROG_ATTACH)) 1304 return -EINVAL; 1305 1306 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 1307 return -EINVAL; 1308 1309 switch (attr->attach_type) { 1310 case BPF_CGROUP_INET_INGRESS: 1311 case BPF_CGROUP_INET_EGRESS: 1312 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1313 break; 1314 case BPF_CGROUP_INET_SOCK_CREATE: 1315 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1316 break; 1317 case BPF_CGROUP_SOCK_OPS: 1318 ptype = BPF_PROG_TYPE_SOCK_OPS; 1319 break; 1320 case BPF_CGROUP_DEVICE: 1321 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1322 break; 1323 case BPF_SK_SKB_STREAM_PARSER: 1324 case BPF_SK_SKB_STREAM_VERDICT: 1325 return sockmap_get_from_fd(attr, true); 1326 default: 1327 return -EINVAL; 1328 } 1329 1330 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1331 if (IS_ERR(prog)) 1332 return PTR_ERR(prog); 1333 1334 cgrp = cgroup_get_from_fd(attr->target_fd); 1335 if (IS_ERR(cgrp)) { 1336 bpf_prog_put(prog); 1337 return PTR_ERR(cgrp); 1338 } 1339 1340 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 1341 attr->attach_flags); 1342 if (ret) 1343 bpf_prog_put(prog); 1344 cgroup_put(cgrp); 1345 1346 return ret; 1347 } 1348 1349 #define BPF_PROG_DETACH_LAST_FIELD attach_type 1350 1351 static int bpf_prog_detach(const union bpf_attr *attr) 1352 { 1353 enum bpf_prog_type ptype; 1354 struct bpf_prog *prog; 1355 struct cgroup *cgrp; 1356 int ret; 1357 1358 if (!capable(CAP_NET_ADMIN)) 1359 return -EPERM; 1360 1361 if (CHECK_ATTR(BPF_PROG_DETACH)) 1362 return -EINVAL; 1363 1364 switch (attr->attach_type) { 1365 case BPF_CGROUP_INET_INGRESS: 1366 case BPF_CGROUP_INET_EGRESS: 1367 ptype = BPF_PROG_TYPE_CGROUP_SKB; 1368 break; 1369 case BPF_CGROUP_INET_SOCK_CREATE: 1370 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1371 break; 1372 case BPF_CGROUP_SOCK_OPS: 1373 ptype = BPF_PROG_TYPE_SOCK_OPS; 1374 break; 1375 case BPF_CGROUP_DEVICE: 1376 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1377 break; 1378 case BPF_SK_SKB_STREAM_PARSER: 1379 case BPF_SK_SKB_STREAM_VERDICT: 1380 return sockmap_get_from_fd(attr, false); 1381 default: 1382 return -EINVAL; 1383 } 1384 1385 cgrp = cgroup_get_from_fd(attr->target_fd); 1386 if (IS_ERR(cgrp)) 1387 return PTR_ERR(cgrp); 1388 1389 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1390 if (IS_ERR(prog)) 1391 prog = NULL; 1392 1393 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); 1394 if (prog) 1395 bpf_prog_put(prog); 1396 cgroup_put(cgrp); 1397 return ret; 1398 } 1399 1400 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1401 1402 static int bpf_prog_query(const union bpf_attr *attr, 1403 union bpf_attr __user *uattr) 1404 { 1405 struct cgroup *cgrp; 1406 int ret; 1407 1408 if (!capable(CAP_NET_ADMIN)) 1409 return -EPERM; 1410 if (CHECK_ATTR(BPF_PROG_QUERY)) 1411 return -EINVAL; 1412 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 1413 return -EINVAL; 1414 1415 switch (attr->query.attach_type) { 1416 case BPF_CGROUP_INET_INGRESS: 1417 case BPF_CGROUP_INET_EGRESS: 1418 case BPF_CGROUP_INET_SOCK_CREATE: 1419 case BPF_CGROUP_SOCK_OPS: 1420 case BPF_CGROUP_DEVICE: 1421 break; 1422 default: 1423 return -EINVAL; 1424 } 1425 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1426 if (IS_ERR(cgrp)) 1427 return PTR_ERR(cgrp); 1428 ret = cgroup_bpf_query(cgrp, attr, uattr); 1429 cgroup_put(cgrp); 1430 return ret; 1431 } 1432 #endif /* CONFIG_CGROUP_BPF */ 1433 1434 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1435 1436 static int bpf_prog_test_run(const union bpf_attr *attr, 1437 union bpf_attr __user *uattr) 1438 { 1439 struct bpf_prog *prog; 1440 int ret = -ENOTSUPP; 1441 1442 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1443 return -EINVAL; 1444 1445 prog = bpf_prog_get(attr->test.prog_fd); 1446 if (IS_ERR(prog)) 1447 return PTR_ERR(prog); 1448 1449 if (prog->aux->ops->test_run) 1450 ret = prog->aux->ops->test_run(prog, attr, uattr); 1451 1452 bpf_prog_put(prog); 1453 return ret; 1454 } 1455 1456 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 1457 1458 static int bpf_obj_get_next_id(const union bpf_attr *attr, 1459 union bpf_attr __user *uattr, 1460 struct idr *idr, 1461 spinlock_t *lock) 1462 { 1463 u32 next_id = attr->start_id; 1464 int err = 0; 1465 1466 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 1467 return -EINVAL; 1468 1469 if (!capable(CAP_SYS_ADMIN)) 1470 return -EPERM; 1471 1472 next_id++; 1473 spin_lock_bh(lock); 1474 if (!idr_get_next(idr, &next_id)) 1475 err = -ENOENT; 1476 spin_unlock_bh(lock); 1477 1478 if (!err) 1479 err = put_user(next_id, &uattr->next_id); 1480 1481 return err; 1482 } 1483 1484 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 1485 1486 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 1487 { 1488 struct bpf_prog *prog; 1489 u32 id = attr->prog_id; 1490 int fd; 1491 1492 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 1493 return -EINVAL; 1494 1495 if (!capable(CAP_SYS_ADMIN)) 1496 return -EPERM; 1497 1498 spin_lock_bh(&prog_idr_lock); 1499 prog = idr_find(&prog_idr, id); 1500 if (prog) 1501 prog = bpf_prog_inc_not_zero(prog); 1502 else 1503 prog = ERR_PTR(-ENOENT); 1504 spin_unlock_bh(&prog_idr_lock); 1505 1506 if (IS_ERR(prog)) 1507 return PTR_ERR(prog); 1508 1509 fd = bpf_prog_new_fd(prog); 1510 if (fd < 0) 1511 bpf_prog_put(prog); 1512 1513 return fd; 1514 } 1515 1516 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 1517 1518 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 1519 { 1520 struct bpf_map *map; 1521 u32 id = attr->map_id; 1522 int f_flags; 1523 int fd; 1524 1525 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 1526 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 1527 return -EINVAL; 1528 1529 if (!capable(CAP_SYS_ADMIN)) 1530 return -EPERM; 1531 1532 f_flags = bpf_get_file_flag(attr->open_flags); 1533 if (f_flags < 0) 1534 return f_flags; 1535 1536 spin_lock_bh(&map_idr_lock); 1537 map = idr_find(&map_idr, id); 1538 if (map) 1539 map = bpf_map_inc_not_zero(map, true); 1540 else 1541 map = ERR_PTR(-ENOENT); 1542 spin_unlock_bh(&map_idr_lock); 1543 1544 if (IS_ERR(map)) 1545 return PTR_ERR(map); 1546 1547 fd = bpf_map_new_fd(map, f_flags); 1548 if (fd < 0) 1549 bpf_map_put(map); 1550 1551 return fd; 1552 } 1553 1554 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, 1555 const union bpf_attr *attr, 1556 union bpf_attr __user *uattr) 1557 { 1558 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1559 struct bpf_prog_info info = {}; 1560 u32 info_len = attr->info.info_len; 1561 char __user *uinsns; 1562 u32 ulen; 1563 int err; 1564 1565 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1566 if (err) 1567 return err; 1568 info_len = min_t(u32, sizeof(info), info_len); 1569 1570 if (copy_from_user(&info, uinfo, info_len)) 1571 return -EFAULT; 1572 1573 info.type = prog->type; 1574 info.id = prog->aux->id; 1575 info.load_time = prog->aux->load_time; 1576 info.created_by_uid = from_kuid_munged(current_user_ns(), 1577 prog->aux->user->uid); 1578 1579 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 1580 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 1581 1582 ulen = info.nr_map_ids; 1583 info.nr_map_ids = prog->aux->used_map_cnt; 1584 ulen = min_t(u32, info.nr_map_ids, ulen); 1585 if (ulen) { 1586 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 1587 u32 i; 1588 1589 for (i = 0; i < ulen; i++) 1590 if (put_user(prog->aux->used_maps[i]->id, 1591 &user_map_ids[i])) 1592 return -EFAULT; 1593 } 1594 1595 if (!capable(CAP_SYS_ADMIN)) { 1596 info.jited_prog_len = 0; 1597 info.xlated_prog_len = 0; 1598 goto done; 1599 } 1600 1601 ulen = info.jited_prog_len; 1602 info.jited_prog_len = prog->jited_len; 1603 if (info.jited_prog_len && ulen) { 1604 uinsns = u64_to_user_ptr(info.jited_prog_insns); 1605 ulen = min_t(u32, info.jited_prog_len, ulen); 1606 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1607 return -EFAULT; 1608 } 1609 1610 ulen = info.xlated_prog_len; 1611 info.xlated_prog_len = bpf_prog_insn_size(prog); 1612 if (info.xlated_prog_len && ulen) { 1613 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 1614 ulen = min_t(u32, info.xlated_prog_len, ulen); 1615 if (copy_to_user(uinsns, prog->insnsi, ulen)) 1616 return -EFAULT; 1617 } 1618 1619 done: 1620 if (copy_to_user(uinfo, &info, info_len) || 1621 put_user(info_len, &uattr->info.info_len)) 1622 return -EFAULT; 1623 1624 return 0; 1625 } 1626 1627 static int bpf_map_get_info_by_fd(struct bpf_map *map, 1628 const union bpf_attr *attr, 1629 union bpf_attr __user *uattr) 1630 { 1631 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 1632 struct bpf_map_info info = {}; 1633 u32 info_len = attr->info.info_len; 1634 int err; 1635 1636 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); 1637 if (err) 1638 return err; 1639 info_len = min_t(u32, sizeof(info), info_len); 1640 1641 info.type = map->map_type; 1642 info.id = map->id; 1643 info.key_size = map->key_size; 1644 info.value_size = map->value_size; 1645 info.max_entries = map->max_entries; 1646 info.map_flags = map->map_flags; 1647 memcpy(info.name, map->name, sizeof(map->name)); 1648 1649 if (copy_to_user(uinfo, &info, info_len) || 1650 put_user(info_len, &uattr->info.info_len)) 1651 return -EFAULT; 1652 1653 return 0; 1654 } 1655 1656 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 1657 1658 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 1659 union bpf_attr __user *uattr) 1660 { 1661 int ufd = attr->info.bpf_fd; 1662 struct fd f; 1663 int err; 1664 1665 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 1666 return -EINVAL; 1667 1668 f = fdget(ufd); 1669 if (!f.file) 1670 return -EBADFD; 1671 1672 if (f.file->f_op == &bpf_prog_fops) 1673 err = bpf_prog_get_info_by_fd(f.file->private_data, attr, 1674 uattr); 1675 else if (f.file->f_op == &bpf_map_fops) 1676 err = bpf_map_get_info_by_fd(f.file->private_data, attr, 1677 uattr); 1678 else 1679 err = -EINVAL; 1680 1681 fdput(f); 1682 return err; 1683 } 1684 1685 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 1686 { 1687 union bpf_attr attr = {}; 1688 int err; 1689 1690 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) 1691 return -EPERM; 1692 1693 err = check_uarg_tail_zero(uattr, sizeof(attr), size); 1694 if (err) 1695 return err; 1696 size = min_t(u32, size, sizeof(attr)); 1697 1698 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 1699 if (copy_from_user(&attr, uattr, size) != 0) 1700 return -EFAULT; 1701 1702 err = security_bpf(cmd, &attr, size); 1703 if (err < 0) 1704 return err; 1705 1706 switch (cmd) { 1707 case BPF_MAP_CREATE: 1708 err = map_create(&attr); 1709 break; 1710 case BPF_MAP_LOOKUP_ELEM: 1711 err = map_lookup_elem(&attr); 1712 break; 1713 case BPF_MAP_UPDATE_ELEM: 1714 err = map_update_elem(&attr); 1715 break; 1716 case BPF_MAP_DELETE_ELEM: 1717 err = map_delete_elem(&attr); 1718 break; 1719 case BPF_MAP_GET_NEXT_KEY: 1720 err = map_get_next_key(&attr); 1721 break; 1722 case BPF_PROG_LOAD: 1723 err = bpf_prog_load(&attr); 1724 break; 1725 case BPF_OBJ_PIN: 1726 err = bpf_obj_pin(&attr); 1727 break; 1728 case BPF_OBJ_GET: 1729 err = bpf_obj_get(&attr); 1730 break; 1731 #ifdef CONFIG_CGROUP_BPF 1732 case BPF_PROG_ATTACH: 1733 err = bpf_prog_attach(&attr); 1734 break; 1735 case BPF_PROG_DETACH: 1736 err = bpf_prog_detach(&attr); 1737 break; 1738 case BPF_PROG_QUERY: 1739 err = bpf_prog_query(&attr, uattr); 1740 break; 1741 #endif 1742 case BPF_PROG_TEST_RUN: 1743 err = bpf_prog_test_run(&attr, uattr); 1744 break; 1745 case BPF_PROG_GET_NEXT_ID: 1746 err = bpf_obj_get_next_id(&attr, uattr, 1747 &prog_idr, &prog_idr_lock); 1748 break; 1749 case BPF_MAP_GET_NEXT_ID: 1750 err = bpf_obj_get_next_id(&attr, uattr, 1751 &map_idr, &map_idr_lock); 1752 break; 1753 case BPF_PROG_GET_FD_BY_ID: 1754 err = bpf_prog_get_fd_by_id(&attr); 1755 break; 1756 case BPF_MAP_GET_FD_BY_ID: 1757 err = bpf_map_get_fd_by_id(&attr); 1758 break; 1759 case BPF_OBJ_GET_INFO_BY_FD: 1760 err = bpf_obj_get_info_by_fd(&attr, uattr); 1761 break; 1762 default: 1763 err = -EINVAL; 1764 break; 1765 } 1766 1767 return err; 1768 } 1769