1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 #include <linux/rcupdate_trace.h> 9 10 struct bpf_iter_target_info { 11 struct list_head list; 12 const struct bpf_iter_reg *reg_info; 13 u32 btf_id; /* cached value */ 14 }; 15 16 struct bpf_iter_link { 17 struct bpf_link link; 18 struct bpf_iter_aux_info aux; 19 struct bpf_iter_target_info *tinfo; 20 }; 21 22 struct bpf_iter_priv_data { 23 struct bpf_iter_target_info *tinfo; 24 const struct bpf_iter_seq_info *seq_info; 25 struct bpf_prog *prog; 26 u64 session_id; 27 u64 seq_num; 28 bool done_stop; 29 u8 target_private[] __aligned(8); 30 }; 31 32 static struct list_head targets = LIST_HEAD_INIT(targets); 33 static DEFINE_MUTEX(targets_mutex); 34 35 /* protect bpf_iter_link changes */ 36 static DEFINE_MUTEX(link_mutex); 37 38 /* incremented on every opened seq_file */ 39 static atomic64_t session_id; 40 41 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 42 const struct bpf_iter_seq_info *seq_info); 43 44 static void bpf_iter_inc_seq_num(struct seq_file *seq) 45 { 46 struct bpf_iter_priv_data *iter_priv; 47 48 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 49 target_private); 50 iter_priv->seq_num++; 51 } 52 53 static void bpf_iter_dec_seq_num(struct seq_file *seq) 54 { 55 struct bpf_iter_priv_data *iter_priv; 56 57 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 58 target_private); 59 iter_priv->seq_num--; 60 } 61 62 static void bpf_iter_done_stop(struct seq_file *seq) 63 { 64 struct bpf_iter_priv_data *iter_priv; 65 66 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 67 target_private); 68 iter_priv->done_stop = true; 69 } 70 71 static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo) 72 { 73 return tinfo->reg_info->feature & BPF_ITER_RESCHED; 74 } 75 76 static bool bpf_iter_support_resched(struct seq_file *seq) 77 { 78 struct bpf_iter_priv_data *iter_priv; 79 80 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 81 target_private); 82 return bpf_iter_target_support_resched(iter_priv->tinfo); 83 } 84 85 /* maximum visited objects before bailing out */ 86 #define MAX_ITER_OBJECTS 1000000 87 88 /* bpf_seq_read, a customized and simpler version for bpf iterator. 89 * The following are differences from seq_read(): 90 * . fixed buffer size (PAGE_SIZE) 91 * . assuming NULL ->llseek() 92 * . stop() may call bpf program, handling potential overflow there 93 */ 94 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 95 loff_t *ppos) 96 { 97 struct seq_file *seq = file->private_data; 98 size_t n, offs, copied = 0; 99 int err = 0, num_objs = 0; 100 bool can_resched; 101 void *p; 102 103 mutex_lock(&seq->lock); 104 105 if (!seq->buf) { 106 seq->size = PAGE_SIZE << 3; 107 seq->buf = kvmalloc(seq->size, GFP_KERNEL); 108 if (!seq->buf) { 109 err = -ENOMEM; 110 goto done; 111 } 112 } 113 114 if (seq->count) { 115 n = min(seq->count, size); 116 err = copy_to_user(buf, seq->buf + seq->from, n); 117 if (err) { 118 err = -EFAULT; 119 goto done; 120 } 121 seq->count -= n; 122 seq->from += n; 123 copied = n; 124 goto done; 125 } 126 127 seq->from = 0; 128 p = seq->op->start(seq, &seq->index); 129 if (!p) 130 goto stop; 131 if (IS_ERR(p)) { 132 err = PTR_ERR(p); 133 seq->op->stop(seq, p); 134 seq->count = 0; 135 goto done; 136 } 137 138 err = seq->op->show(seq, p); 139 if (err > 0) { 140 /* object is skipped, decrease seq_num, so next 141 * valid object can reuse the same seq_num. 142 */ 143 bpf_iter_dec_seq_num(seq); 144 seq->count = 0; 145 } else if (err < 0 || seq_has_overflowed(seq)) { 146 if (!err) 147 err = -E2BIG; 148 seq->op->stop(seq, p); 149 seq->count = 0; 150 goto done; 151 } 152 153 can_resched = bpf_iter_support_resched(seq); 154 while (1) { 155 loff_t pos = seq->index; 156 157 num_objs++; 158 offs = seq->count; 159 p = seq->op->next(seq, p, &seq->index); 160 if (pos == seq->index) { 161 pr_info_ratelimited("buggy seq_file .next function %ps " 162 "did not updated position index\n", 163 seq->op->next); 164 seq->index++; 165 } 166 167 if (IS_ERR_OR_NULL(p)) 168 break; 169 170 /* got a valid next object, increase seq_num */ 171 bpf_iter_inc_seq_num(seq); 172 173 if (seq->count >= size) 174 break; 175 176 if (num_objs >= MAX_ITER_OBJECTS) { 177 if (offs == 0) { 178 err = -EAGAIN; 179 seq->op->stop(seq, p); 180 goto done; 181 } 182 break; 183 } 184 185 err = seq->op->show(seq, p); 186 if (err > 0) { 187 bpf_iter_dec_seq_num(seq); 188 seq->count = offs; 189 } else if (err < 0 || seq_has_overflowed(seq)) { 190 seq->count = offs; 191 if (offs == 0) { 192 if (!err) 193 err = -E2BIG; 194 seq->op->stop(seq, p); 195 goto done; 196 } 197 break; 198 } 199 200 if (can_resched) 201 cond_resched(); 202 } 203 stop: 204 offs = seq->count; 205 /* bpf program called if !p */ 206 seq->op->stop(seq, p); 207 if (!p) { 208 if (!seq_has_overflowed(seq)) { 209 bpf_iter_done_stop(seq); 210 } else { 211 seq->count = offs; 212 if (offs == 0) { 213 err = -E2BIG; 214 goto done; 215 } 216 } 217 } 218 219 n = min(seq->count, size); 220 err = copy_to_user(buf, seq->buf, n); 221 if (err) { 222 err = -EFAULT; 223 goto done; 224 } 225 copied = n; 226 seq->count -= n; 227 seq->from = n; 228 done: 229 if (!copied) 230 copied = err; 231 else 232 *ppos += copied; 233 mutex_unlock(&seq->lock); 234 return copied; 235 } 236 237 static const struct bpf_iter_seq_info * 238 __get_seq_info(struct bpf_iter_link *link) 239 { 240 const struct bpf_iter_seq_info *seq_info; 241 242 if (link->aux.map) { 243 seq_info = link->aux.map->ops->iter_seq_info; 244 if (seq_info) 245 return seq_info; 246 } 247 248 return link->tinfo->reg_info->seq_info; 249 } 250 251 static int iter_open(struct inode *inode, struct file *file) 252 { 253 struct bpf_iter_link *link = inode->i_private; 254 255 return prepare_seq_file(file, link, __get_seq_info(link)); 256 } 257 258 static int iter_release(struct inode *inode, struct file *file) 259 { 260 struct bpf_iter_priv_data *iter_priv; 261 struct seq_file *seq; 262 263 seq = file->private_data; 264 if (!seq) 265 return 0; 266 267 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 268 target_private); 269 270 if (iter_priv->seq_info->fini_seq_private) 271 iter_priv->seq_info->fini_seq_private(seq->private); 272 273 bpf_prog_put(iter_priv->prog); 274 seq->private = iter_priv; 275 276 return seq_release_private(inode, file); 277 } 278 279 const struct file_operations bpf_iter_fops = { 280 .open = iter_open, 281 .llseek = no_llseek, 282 .read = bpf_seq_read, 283 .release = iter_release, 284 }; 285 286 /* The argument reg_info will be cached in bpf_iter_target_info. 287 * The common practice is to declare target reg_info as 288 * a const static variable and passed as an argument to 289 * bpf_iter_reg_target(). 290 */ 291 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 292 { 293 struct bpf_iter_target_info *tinfo; 294 295 tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); 296 if (!tinfo) 297 return -ENOMEM; 298 299 tinfo->reg_info = reg_info; 300 INIT_LIST_HEAD(&tinfo->list); 301 302 mutex_lock(&targets_mutex); 303 list_add(&tinfo->list, &targets); 304 mutex_unlock(&targets_mutex); 305 306 return 0; 307 } 308 309 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 310 { 311 struct bpf_iter_target_info *tinfo; 312 bool found = false; 313 314 mutex_lock(&targets_mutex); 315 list_for_each_entry(tinfo, &targets, list) { 316 if (reg_info == tinfo->reg_info) { 317 list_del(&tinfo->list); 318 kfree(tinfo); 319 found = true; 320 break; 321 } 322 } 323 mutex_unlock(&targets_mutex); 324 325 WARN_ON(found == false); 326 } 327 328 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 329 struct bpf_prog *prog) 330 { 331 tinfo->btf_id = prog->aux->attach_btf_id; 332 } 333 334 bool bpf_iter_prog_supported(struct bpf_prog *prog) 335 { 336 const char *attach_fname = prog->aux->attach_func_name; 337 struct bpf_iter_target_info *tinfo = NULL, *iter; 338 u32 prog_btf_id = prog->aux->attach_btf_id; 339 const char *prefix = BPF_ITER_FUNC_PREFIX; 340 int prefix_len = strlen(prefix); 341 342 if (strncmp(attach_fname, prefix, prefix_len)) 343 return false; 344 345 mutex_lock(&targets_mutex); 346 list_for_each_entry(iter, &targets, list) { 347 if (iter->btf_id && iter->btf_id == prog_btf_id) { 348 tinfo = iter; 349 break; 350 } 351 if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { 352 cache_btf_id(iter, prog); 353 tinfo = iter; 354 break; 355 } 356 } 357 mutex_unlock(&targets_mutex); 358 359 if (tinfo) { 360 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 361 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 362 } 363 364 return tinfo != NULL; 365 } 366 367 const struct bpf_func_proto * 368 bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 369 { 370 const struct bpf_iter_target_info *tinfo; 371 const struct bpf_func_proto *fn = NULL; 372 373 mutex_lock(&targets_mutex); 374 list_for_each_entry(tinfo, &targets, list) { 375 if (tinfo->btf_id == prog->aux->attach_btf_id) { 376 const struct bpf_iter_reg *reg_info; 377 378 reg_info = tinfo->reg_info; 379 if (reg_info->get_func_proto) 380 fn = reg_info->get_func_proto(func_id, prog); 381 break; 382 } 383 } 384 mutex_unlock(&targets_mutex); 385 386 return fn; 387 } 388 389 static void bpf_iter_link_release(struct bpf_link *link) 390 { 391 struct bpf_iter_link *iter_link = 392 container_of(link, struct bpf_iter_link, link); 393 394 if (iter_link->tinfo->reg_info->detach_target) 395 iter_link->tinfo->reg_info->detach_target(&iter_link->aux); 396 } 397 398 static void bpf_iter_link_dealloc(struct bpf_link *link) 399 { 400 struct bpf_iter_link *iter_link = 401 container_of(link, struct bpf_iter_link, link); 402 403 kfree(iter_link); 404 } 405 406 static int bpf_iter_link_replace(struct bpf_link *link, 407 struct bpf_prog *new_prog, 408 struct bpf_prog *old_prog) 409 { 410 int ret = 0; 411 412 mutex_lock(&link_mutex); 413 if (old_prog && link->prog != old_prog) { 414 ret = -EPERM; 415 goto out_unlock; 416 } 417 418 if (link->prog->type != new_prog->type || 419 link->prog->expected_attach_type != new_prog->expected_attach_type || 420 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 421 ret = -EINVAL; 422 goto out_unlock; 423 } 424 425 old_prog = xchg(&link->prog, new_prog); 426 bpf_prog_put(old_prog); 427 428 out_unlock: 429 mutex_unlock(&link_mutex); 430 return ret; 431 } 432 433 static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, 434 struct seq_file *seq) 435 { 436 struct bpf_iter_link *iter_link = 437 container_of(link, struct bpf_iter_link, link); 438 bpf_iter_show_fdinfo_t show_fdinfo; 439 440 seq_printf(seq, 441 "target_name:\t%s\n", 442 iter_link->tinfo->reg_info->target); 443 444 show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; 445 if (show_fdinfo) 446 show_fdinfo(&iter_link->aux, seq); 447 } 448 449 static int bpf_iter_link_fill_link_info(const struct bpf_link *link, 450 struct bpf_link_info *info) 451 { 452 struct bpf_iter_link *iter_link = 453 container_of(link, struct bpf_iter_link, link); 454 char __user *ubuf = u64_to_user_ptr(info->iter.target_name); 455 bpf_iter_fill_link_info_t fill_link_info; 456 u32 ulen = info->iter.target_name_len; 457 const char *target_name; 458 u32 target_len; 459 460 if (!ulen ^ !ubuf) 461 return -EINVAL; 462 463 target_name = iter_link->tinfo->reg_info->target; 464 target_len = strlen(target_name); 465 info->iter.target_name_len = target_len + 1; 466 467 if (ubuf) { 468 if (ulen >= target_len + 1) { 469 if (copy_to_user(ubuf, target_name, target_len + 1)) 470 return -EFAULT; 471 } else { 472 char zero = '\0'; 473 474 if (copy_to_user(ubuf, target_name, ulen - 1)) 475 return -EFAULT; 476 if (put_user(zero, ubuf + ulen - 1)) 477 return -EFAULT; 478 return -ENOSPC; 479 } 480 } 481 482 fill_link_info = iter_link->tinfo->reg_info->fill_link_info; 483 if (fill_link_info) 484 return fill_link_info(&iter_link->aux, info); 485 486 return 0; 487 } 488 489 static const struct bpf_link_ops bpf_iter_link_lops = { 490 .release = bpf_iter_link_release, 491 .dealloc = bpf_iter_link_dealloc, 492 .update_prog = bpf_iter_link_replace, 493 .show_fdinfo = bpf_iter_link_show_fdinfo, 494 .fill_link_info = bpf_iter_link_fill_link_info, 495 }; 496 497 bool bpf_link_is_iter(struct bpf_link *link) 498 { 499 return link->ops == &bpf_iter_link_lops; 500 } 501 502 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, 503 struct bpf_prog *prog) 504 { 505 struct bpf_iter_target_info *tinfo = NULL, *iter; 506 struct bpf_link_primer link_primer; 507 union bpf_iter_link_info linfo; 508 struct bpf_iter_link *link; 509 u32 prog_btf_id, linfo_len; 510 bpfptr_t ulinfo; 511 int err; 512 513 if (attr->link_create.target_fd || attr->link_create.flags) 514 return -EINVAL; 515 516 memset(&linfo, 0, sizeof(union bpf_iter_link_info)); 517 518 ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); 519 linfo_len = attr->link_create.iter_info_len; 520 if (bpfptr_is_null(ulinfo) ^ !linfo_len) 521 return -EINVAL; 522 523 if (!bpfptr_is_null(ulinfo)) { 524 err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), 525 linfo_len); 526 if (err) 527 return err; 528 linfo_len = min_t(u32, linfo_len, sizeof(linfo)); 529 if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) 530 return -EFAULT; 531 } 532 533 prog_btf_id = prog->aux->attach_btf_id; 534 mutex_lock(&targets_mutex); 535 list_for_each_entry(iter, &targets, list) { 536 if (iter->btf_id == prog_btf_id) { 537 tinfo = iter; 538 break; 539 } 540 } 541 mutex_unlock(&targets_mutex); 542 if (!tinfo) 543 return -ENOENT; 544 545 /* Only allow sleepable program for resched-able iterator */ 546 if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo)) 547 return -EINVAL; 548 549 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 550 if (!link) 551 return -ENOMEM; 552 553 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 554 link->tinfo = tinfo; 555 556 err = bpf_link_prime(&link->link, &link_primer); 557 if (err) { 558 kfree(link); 559 return err; 560 } 561 562 if (tinfo->reg_info->attach_target) { 563 err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); 564 if (err) { 565 bpf_link_cleanup(&link_primer); 566 return err; 567 } 568 } 569 570 return bpf_link_settle(&link_primer); 571 } 572 573 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 574 struct bpf_iter_target_info *tinfo, 575 const struct bpf_iter_seq_info *seq_info, 576 struct bpf_prog *prog) 577 { 578 priv_data->tinfo = tinfo; 579 priv_data->seq_info = seq_info; 580 priv_data->prog = prog; 581 priv_data->session_id = atomic64_inc_return(&session_id); 582 priv_data->seq_num = 0; 583 priv_data->done_stop = false; 584 } 585 586 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, 587 const struct bpf_iter_seq_info *seq_info) 588 { 589 struct bpf_iter_priv_data *priv_data; 590 struct bpf_iter_target_info *tinfo; 591 struct bpf_prog *prog; 592 u32 total_priv_dsize; 593 struct seq_file *seq; 594 int err = 0; 595 596 mutex_lock(&link_mutex); 597 prog = link->link.prog; 598 bpf_prog_inc(prog); 599 mutex_unlock(&link_mutex); 600 601 tinfo = link->tinfo; 602 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 603 seq_info->seq_priv_size; 604 priv_data = __seq_open_private(file, seq_info->seq_ops, 605 total_priv_dsize); 606 if (!priv_data) { 607 err = -ENOMEM; 608 goto release_prog; 609 } 610 611 if (seq_info->init_seq_private) { 612 err = seq_info->init_seq_private(priv_data->target_private, &link->aux); 613 if (err) 614 goto release_seq_file; 615 } 616 617 init_seq_meta(priv_data, tinfo, seq_info, prog); 618 seq = file->private_data; 619 seq->private = priv_data->target_private; 620 621 return 0; 622 623 release_seq_file: 624 seq_release_private(file->f_inode, file); 625 file->private_data = NULL; 626 release_prog: 627 bpf_prog_put(prog); 628 return err; 629 } 630 631 int bpf_iter_new_fd(struct bpf_link *link) 632 { 633 struct bpf_iter_link *iter_link; 634 struct file *file; 635 unsigned int flags; 636 int err, fd; 637 638 if (link->ops != &bpf_iter_link_lops) 639 return -EINVAL; 640 641 flags = O_RDONLY | O_CLOEXEC; 642 fd = get_unused_fd_flags(flags); 643 if (fd < 0) 644 return fd; 645 646 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 647 if (IS_ERR(file)) { 648 err = PTR_ERR(file); 649 goto free_fd; 650 } 651 652 iter_link = container_of(link, struct bpf_iter_link, link); 653 err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); 654 if (err) 655 goto free_file; 656 657 fd_install(fd, file); 658 return fd; 659 660 free_file: 661 fput(file); 662 free_fd: 663 put_unused_fd(fd); 664 return err; 665 } 666 667 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 668 { 669 struct bpf_iter_priv_data *iter_priv; 670 struct seq_file *seq; 671 void *seq_priv; 672 673 seq = meta->seq; 674 if (seq->file->f_op != &bpf_iter_fops) 675 return NULL; 676 677 seq_priv = seq->private; 678 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 679 target_private); 680 681 if (in_stop && iter_priv->done_stop) 682 return NULL; 683 684 meta->session_id = iter_priv->session_id; 685 meta->seq_num = iter_priv->seq_num; 686 687 return iter_priv->prog; 688 } 689 690 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 691 { 692 int ret; 693 694 if (prog->aux->sleepable) { 695 rcu_read_lock_trace(); 696 migrate_disable(); 697 might_fault(); 698 ret = bpf_prog_run(prog, ctx); 699 migrate_enable(); 700 rcu_read_unlock_trace(); 701 } else { 702 rcu_read_lock(); 703 migrate_disable(); 704 ret = bpf_prog_run(prog, ctx); 705 migrate_enable(); 706 rcu_read_unlock(); 707 } 708 709 /* bpf program can only return 0 or 1: 710 * 0 : okay 711 * 1 : retry the same object 712 * The bpf_iter_run_prog() return value 713 * will be seq_ops->show() return value. 714 */ 715 return ret == 0 ? 0 : -EAGAIN; 716 } 717 718 BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, 719 void *, callback_ctx, u64, flags) 720 { 721 return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); 722 } 723 724 const struct bpf_func_proto bpf_for_each_map_elem_proto = { 725 .func = bpf_for_each_map_elem, 726 .gpl_only = false, 727 .ret_type = RET_INTEGER, 728 .arg1_type = ARG_CONST_MAP_PTR, 729 .arg2_type = ARG_PTR_TO_FUNC, 730 .arg3_type = ARG_PTR_TO_STACK_OR_NULL, 731 .arg4_type = ARG_ANYTHING, 732 }; 733 734 BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, 735 u64, flags) 736 { 737 bpf_callback_t callback = (bpf_callback_t)callback_fn; 738 u64 ret; 739 u32 i; 740 741 /* Note: these safety checks are also verified when bpf_loop 742 * is inlined, be careful to modify this code in sync. See 743 * function verifier.c:inline_bpf_loop. 744 */ 745 if (flags) 746 return -EINVAL; 747 if (nr_loops > BPF_MAX_LOOPS) 748 return -E2BIG; 749 750 for (i = 0; i < nr_loops; i++) { 751 ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); 752 /* return value: 0 - continue, 1 - stop and return */ 753 if (ret) 754 return i + 1; 755 } 756 757 return i; 758 } 759 760 const struct bpf_func_proto bpf_loop_proto = { 761 .func = bpf_loop, 762 .gpl_only = false, 763 .ret_type = RET_INTEGER, 764 .arg1_type = ARG_ANYTHING, 765 .arg2_type = ARG_PTR_TO_FUNC, 766 .arg3_type = ARG_PTR_TO_STACK_OR_NULL, 767 .arg4_type = ARG_ANYTHING, 768 }; 769