1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/init.h> 5 #include <linux/namei.h> 6 #include <linux/pid_namespace.h> 7 #include <linux/fs.h> 8 #include <linux/fdtable.h> 9 #include <linux/filter.h> 10 #include <linux/btf_ids.h> 11 #include "mmap_unlock_work.h" 12 13 static const char * const iter_task_type_names[] = { 14 "ALL", 15 "TID", 16 "PID", 17 }; 18 19 struct bpf_iter_seq_task_common { 20 struct pid_namespace *ns; 21 enum bpf_iter_task_type type; 22 u32 pid; 23 u32 pid_visiting; 24 }; 25 26 struct bpf_iter_seq_task_info { 27 /* The first field must be struct bpf_iter_seq_task_common. 28 * this is assumed by {init, fini}_seq_pidns() callback functions. 29 */ 30 struct bpf_iter_seq_task_common common; 31 u32 tid; 32 }; 33 34 static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common, 35 u32 *tid, 36 bool skip_if_dup_files) 37 { 38 struct task_struct *task; 39 struct pid *pid; 40 u32 next_tid; 41 42 if (!*tid) { 43 /* The first time, the iterator calls this function. */ 44 pid = find_pid_ns(common->pid, common->ns); 45 task = get_pid_task(pid, PIDTYPE_TGID); 46 if (!task) 47 return NULL; 48 49 *tid = common->pid; 50 common->pid_visiting = common->pid; 51 52 return task; 53 } 54 55 /* If the control returns to user space and comes back to the 56 * kernel again, *tid and common->pid_visiting should be the 57 * same for task_seq_start() to pick up the correct task. 58 */ 59 if (*tid == common->pid_visiting) { 60 pid = find_pid_ns(common->pid_visiting, common->ns); 61 task = get_pid_task(pid, PIDTYPE_PID); 62 63 return task; 64 } 65 66 task = find_task_by_pid_ns(common->pid_visiting, common->ns); 67 if (!task) 68 return NULL; 69 70 retry: 71 task = next_thread(task); 72 73 next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns); 74 if (!next_tid || next_tid == common->pid) { 75 /* Run out of tasks of a process. The tasks of a 76 * thread_group are linked as circular linked list. 77 */ 78 return NULL; 79 } 80 81 if (skip_if_dup_files && task->files == task->group_leader->files) 82 goto retry; 83 84 *tid = common->pid_visiting = next_tid; 85 get_task_struct(task); 86 return task; 87 } 88 89 static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, 90 u32 *tid, 91 bool skip_if_dup_files) 92 { 93 struct task_struct *task = NULL; 94 struct pid *pid; 95 96 if (common->type == BPF_TASK_ITER_TID) { 97 if (*tid && *tid != common->pid) 98 return NULL; 99 rcu_read_lock(); 100 pid = find_pid_ns(common->pid, common->ns); 101 if (pid) { 102 task = get_pid_task(pid, PIDTYPE_TGID); 103 *tid = common->pid; 104 } 105 rcu_read_unlock(); 106 107 return task; 108 } 109 110 if (common->type == BPF_TASK_ITER_TGID) { 111 rcu_read_lock(); 112 task = task_group_seq_get_next(common, tid, skip_if_dup_files); 113 rcu_read_unlock(); 114 115 return task; 116 } 117 118 rcu_read_lock(); 119 retry: 120 pid = find_ge_pid(*tid, common->ns); 121 if (pid) { 122 *tid = pid_nr_ns(pid, common->ns); 123 task = get_pid_task(pid, PIDTYPE_PID); 124 if (!task) { 125 ++*tid; 126 goto retry; 127 } else if (skip_if_dup_files && !thread_group_leader(task) && 128 task->files == task->group_leader->files) { 129 put_task_struct(task); 130 task = NULL; 131 ++*tid; 132 goto retry; 133 } 134 } 135 rcu_read_unlock(); 136 137 return task; 138 } 139 140 static void *task_seq_start(struct seq_file *seq, loff_t *pos) 141 { 142 struct bpf_iter_seq_task_info *info = seq->private; 143 struct task_struct *task; 144 145 task = task_seq_get_next(&info->common, &info->tid, false); 146 if (!task) 147 return NULL; 148 149 if (*pos == 0) 150 ++*pos; 151 return task; 152 } 153 154 static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) 155 { 156 struct bpf_iter_seq_task_info *info = seq->private; 157 struct task_struct *task; 158 159 ++*pos; 160 ++info->tid; 161 put_task_struct((struct task_struct *)v); 162 task = task_seq_get_next(&info->common, &info->tid, false); 163 if (!task) 164 return NULL; 165 166 return task; 167 } 168 169 struct bpf_iter__task { 170 __bpf_md_ptr(struct bpf_iter_meta *, meta); 171 __bpf_md_ptr(struct task_struct *, task); 172 }; 173 174 DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task) 175 176 static int __task_seq_show(struct seq_file *seq, struct task_struct *task, 177 bool in_stop) 178 { 179 struct bpf_iter_meta meta; 180 struct bpf_iter__task ctx; 181 struct bpf_prog *prog; 182 183 meta.seq = seq; 184 prog = bpf_iter_get_info(&meta, in_stop); 185 if (!prog) 186 return 0; 187 188 ctx.meta = &meta; 189 ctx.task = task; 190 return bpf_iter_run_prog(prog, &ctx); 191 } 192 193 static int task_seq_show(struct seq_file *seq, void *v) 194 { 195 return __task_seq_show(seq, v, false); 196 } 197 198 static void task_seq_stop(struct seq_file *seq, void *v) 199 { 200 if (!v) 201 (void)__task_seq_show(seq, v, true); 202 else 203 put_task_struct((struct task_struct *)v); 204 } 205 206 static int bpf_iter_attach_task(struct bpf_prog *prog, 207 union bpf_iter_link_info *linfo, 208 struct bpf_iter_aux_info *aux) 209 { 210 unsigned int flags; 211 struct pid *pid; 212 pid_t tgid; 213 214 if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1) 215 return -EINVAL; 216 217 aux->task.type = BPF_TASK_ITER_ALL; 218 if (linfo->task.tid != 0) { 219 aux->task.type = BPF_TASK_ITER_TID; 220 aux->task.pid = linfo->task.tid; 221 } 222 if (linfo->task.pid != 0) { 223 aux->task.type = BPF_TASK_ITER_TGID; 224 aux->task.pid = linfo->task.pid; 225 } 226 if (linfo->task.pid_fd != 0) { 227 aux->task.type = BPF_TASK_ITER_TGID; 228 229 pid = pidfd_get_pid(linfo->task.pid_fd, &flags); 230 if (IS_ERR(pid)) 231 return PTR_ERR(pid); 232 233 tgid = pid_nr_ns(pid, task_active_pid_ns(current)); 234 aux->task.pid = tgid; 235 put_pid(pid); 236 } 237 238 return 0; 239 } 240 241 static const struct seq_operations task_seq_ops = { 242 .start = task_seq_start, 243 .next = task_seq_next, 244 .stop = task_seq_stop, 245 .show = task_seq_show, 246 }; 247 248 struct bpf_iter_seq_task_file_info { 249 /* The first field must be struct bpf_iter_seq_task_common. 250 * this is assumed by {init, fini}_seq_pidns() callback functions. 251 */ 252 struct bpf_iter_seq_task_common common; 253 struct task_struct *task; 254 u32 tid; 255 u32 fd; 256 }; 257 258 static struct file * 259 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) 260 { 261 u32 saved_tid = info->tid; 262 struct task_struct *curr_task; 263 unsigned int curr_fd = info->fd; 264 265 /* If this function returns a non-NULL file object, 266 * it held a reference to the task/file. 267 * Otherwise, it does not hold any reference. 268 */ 269 again: 270 if (info->task) { 271 curr_task = info->task; 272 curr_fd = info->fd; 273 } else { 274 curr_task = task_seq_get_next(&info->common, &info->tid, true); 275 if (!curr_task) { 276 info->task = NULL; 277 return NULL; 278 } 279 280 /* set info->task */ 281 info->task = curr_task; 282 if (saved_tid == info->tid) 283 curr_fd = info->fd; 284 else 285 curr_fd = 0; 286 } 287 288 rcu_read_lock(); 289 for (;; curr_fd++) { 290 struct file *f; 291 f = task_lookup_next_fd_rcu(curr_task, &curr_fd); 292 if (!f) 293 break; 294 if (!get_file_rcu(f)) 295 continue; 296 297 /* set info->fd */ 298 info->fd = curr_fd; 299 rcu_read_unlock(); 300 return f; 301 } 302 303 /* the current task is done, go to the next task */ 304 rcu_read_unlock(); 305 put_task_struct(curr_task); 306 307 if (info->common.type == BPF_TASK_ITER_TID) { 308 info->task = NULL; 309 return NULL; 310 } 311 312 info->task = NULL; 313 info->fd = 0; 314 saved_tid = ++(info->tid); 315 goto again; 316 } 317 318 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) 319 { 320 struct bpf_iter_seq_task_file_info *info = seq->private; 321 struct file *file; 322 323 info->task = NULL; 324 file = task_file_seq_get_next(info); 325 if (file && *pos == 0) 326 ++*pos; 327 328 return file; 329 } 330 331 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) 332 { 333 struct bpf_iter_seq_task_file_info *info = seq->private; 334 335 ++*pos; 336 ++info->fd; 337 fput((struct file *)v); 338 return task_file_seq_get_next(info); 339 } 340 341 struct bpf_iter__task_file { 342 __bpf_md_ptr(struct bpf_iter_meta *, meta); 343 __bpf_md_ptr(struct task_struct *, task); 344 u32 fd __aligned(8); 345 __bpf_md_ptr(struct file *, file); 346 }; 347 348 DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta, 349 struct task_struct *task, u32 fd, 350 struct file *file) 351 352 static int __task_file_seq_show(struct seq_file *seq, struct file *file, 353 bool in_stop) 354 { 355 struct bpf_iter_seq_task_file_info *info = seq->private; 356 struct bpf_iter__task_file ctx; 357 struct bpf_iter_meta meta; 358 struct bpf_prog *prog; 359 360 meta.seq = seq; 361 prog = bpf_iter_get_info(&meta, in_stop); 362 if (!prog) 363 return 0; 364 365 ctx.meta = &meta; 366 ctx.task = info->task; 367 ctx.fd = info->fd; 368 ctx.file = file; 369 return bpf_iter_run_prog(prog, &ctx); 370 } 371 372 static int task_file_seq_show(struct seq_file *seq, void *v) 373 { 374 return __task_file_seq_show(seq, v, false); 375 } 376 377 static void task_file_seq_stop(struct seq_file *seq, void *v) 378 { 379 struct bpf_iter_seq_task_file_info *info = seq->private; 380 381 if (!v) { 382 (void)__task_file_seq_show(seq, v, true); 383 } else { 384 fput((struct file *)v); 385 put_task_struct(info->task); 386 info->task = NULL; 387 } 388 } 389 390 static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) 391 { 392 struct bpf_iter_seq_task_common *common = priv_data; 393 394 common->ns = get_pid_ns(task_active_pid_ns(current)); 395 common->type = aux->task.type; 396 common->pid = aux->task.pid; 397 398 return 0; 399 } 400 401 static void fini_seq_pidns(void *priv_data) 402 { 403 struct bpf_iter_seq_task_common *common = priv_data; 404 405 put_pid_ns(common->ns); 406 } 407 408 static const struct seq_operations task_file_seq_ops = { 409 .start = task_file_seq_start, 410 .next = task_file_seq_next, 411 .stop = task_file_seq_stop, 412 .show = task_file_seq_show, 413 }; 414 415 struct bpf_iter_seq_task_vma_info { 416 /* The first field must be struct bpf_iter_seq_task_common. 417 * this is assumed by {init, fini}_seq_pidns() callback functions. 418 */ 419 struct bpf_iter_seq_task_common common; 420 struct task_struct *task; 421 struct mm_struct *mm; 422 struct vm_area_struct *vma; 423 u32 tid; 424 unsigned long prev_vm_start; 425 unsigned long prev_vm_end; 426 }; 427 428 enum bpf_task_vma_iter_find_op { 429 task_vma_iter_first_vma, /* use find_vma() with addr 0 */ 430 task_vma_iter_next_vma, /* use vma_next() with curr_vma */ 431 task_vma_iter_find_vma, /* use find_vma() to find next vma */ 432 }; 433 434 static struct vm_area_struct * 435 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) 436 { 437 enum bpf_task_vma_iter_find_op op; 438 struct vm_area_struct *curr_vma; 439 struct task_struct *curr_task; 440 struct mm_struct *curr_mm; 441 u32 saved_tid = info->tid; 442 443 /* If this function returns a non-NULL vma, it holds a reference to 444 * the task_struct, holds a refcount on mm->mm_users, and holds 445 * read lock on vma->mm->mmap_lock. 446 * If this function returns NULL, it does not hold any reference or 447 * lock. 448 */ 449 if (info->task) { 450 curr_task = info->task; 451 curr_vma = info->vma; 452 curr_mm = info->mm; 453 /* In case of lock contention, drop mmap_lock to unblock 454 * the writer. 455 * 456 * After relock, call find(mm, prev_vm_end - 1) to find 457 * new vma to process. 458 * 459 * +------+------+-----------+ 460 * | VMA1 | VMA2 | VMA3 | 461 * +------+------+-----------+ 462 * | | | | 463 * 4k 8k 16k 400k 464 * 465 * For example, curr_vma == VMA2. Before unlock, we set 466 * 467 * prev_vm_start = 8k 468 * prev_vm_end = 16k 469 * 470 * There are a few cases: 471 * 472 * 1) VMA2 is freed, but VMA3 exists. 473 * 474 * find_vma() will return VMA3, just process VMA3. 475 * 476 * 2) VMA2 still exists. 477 * 478 * find_vma() will return VMA2, process VMA2->next. 479 * 480 * 3) no more vma in this mm. 481 * 482 * Process the next task. 483 * 484 * 4) find_vma() returns a different vma, VMA2'. 485 * 486 * 4.1) If VMA2 covers same range as VMA2', skip VMA2', 487 * because we already covered the range; 488 * 4.2) VMA2 and VMA2' covers different ranges, process 489 * VMA2'. 490 */ 491 if (mmap_lock_is_contended(curr_mm)) { 492 info->prev_vm_start = curr_vma->vm_start; 493 info->prev_vm_end = curr_vma->vm_end; 494 op = task_vma_iter_find_vma; 495 mmap_read_unlock(curr_mm); 496 if (mmap_read_lock_killable(curr_mm)) { 497 mmput(curr_mm); 498 goto finish; 499 } 500 } else { 501 op = task_vma_iter_next_vma; 502 } 503 } else { 504 again: 505 curr_task = task_seq_get_next(&info->common, &info->tid, true); 506 if (!curr_task) { 507 info->tid++; 508 goto finish; 509 } 510 511 if (saved_tid != info->tid) { 512 /* new task, process the first vma */ 513 op = task_vma_iter_first_vma; 514 } else { 515 /* Found the same tid, which means the user space 516 * finished data in previous buffer and read more. 517 * We dropped mmap_lock before returning to user 518 * space, so it is necessary to use find_vma() to 519 * find the next vma to process. 520 */ 521 op = task_vma_iter_find_vma; 522 } 523 524 curr_mm = get_task_mm(curr_task); 525 if (!curr_mm) 526 goto next_task; 527 528 if (mmap_read_lock_killable(curr_mm)) { 529 mmput(curr_mm); 530 goto finish; 531 } 532 } 533 534 switch (op) { 535 case task_vma_iter_first_vma: 536 curr_vma = find_vma(curr_mm, 0); 537 break; 538 case task_vma_iter_next_vma: 539 curr_vma = find_vma(curr_mm, curr_vma->vm_end); 540 break; 541 case task_vma_iter_find_vma: 542 /* We dropped mmap_lock so it is necessary to use find_vma 543 * to find the next vma. This is similar to the mechanism 544 * in show_smaps_rollup(). 545 */ 546 curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); 547 /* case 1) and 4.2) above just use curr_vma */ 548 549 /* check for case 2) or case 4.1) above */ 550 if (curr_vma && 551 curr_vma->vm_start == info->prev_vm_start && 552 curr_vma->vm_end == info->prev_vm_end) 553 curr_vma = find_vma(curr_mm, curr_vma->vm_end); 554 break; 555 } 556 if (!curr_vma) { 557 /* case 3) above, or case 2) 4.1) with vma->next == NULL */ 558 mmap_read_unlock(curr_mm); 559 mmput(curr_mm); 560 goto next_task; 561 } 562 info->task = curr_task; 563 info->vma = curr_vma; 564 info->mm = curr_mm; 565 return curr_vma; 566 567 next_task: 568 if (info->common.type == BPF_TASK_ITER_TID) 569 goto finish; 570 571 put_task_struct(curr_task); 572 info->task = NULL; 573 info->mm = NULL; 574 info->tid++; 575 goto again; 576 577 finish: 578 if (curr_task) 579 put_task_struct(curr_task); 580 info->task = NULL; 581 info->vma = NULL; 582 info->mm = NULL; 583 return NULL; 584 } 585 586 static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos) 587 { 588 struct bpf_iter_seq_task_vma_info *info = seq->private; 589 struct vm_area_struct *vma; 590 591 vma = task_vma_seq_get_next(info); 592 if (vma && *pos == 0) 593 ++*pos; 594 595 return vma; 596 } 597 598 static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos) 599 { 600 struct bpf_iter_seq_task_vma_info *info = seq->private; 601 602 ++*pos; 603 return task_vma_seq_get_next(info); 604 } 605 606 struct bpf_iter__task_vma { 607 __bpf_md_ptr(struct bpf_iter_meta *, meta); 608 __bpf_md_ptr(struct task_struct *, task); 609 __bpf_md_ptr(struct vm_area_struct *, vma); 610 }; 611 612 DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta, 613 struct task_struct *task, struct vm_area_struct *vma) 614 615 static int __task_vma_seq_show(struct seq_file *seq, bool in_stop) 616 { 617 struct bpf_iter_seq_task_vma_info *info = seq->private; 618 struct bpf_iter__task_vma ctx; 619 struct bpf_iter_meta meta; 620 struct bpf_prog *prog; 621 622 meta.seq = seq; 623 prog = bpf_iter_get_info(&meta, in_stop); 624 if (!prog) 625 return 0; 626 627 ctx.meta = &meta; 628 ctx.task = info->task; 629 ctx.vma = info->vma; 630 return bpf_iter_run_prog(prog, &ctx); 631 } 632 633 static int task_vma_seq_show(struct seq_file *seq, void *v) 634 { 635 return __task_vma_seq_show(seq, false); 636 } 637 638 static void task_vma_seq_stop(struct seq_file *seq, void *v) 639 { 640 struct bpf_iter_seq_task_vma_info *info = seq->private; 641 642 if (!v) { 643 (void)__task_vma_seq_show(seq, true); 644 } else { 645 /* info->vma has not been seen by the BPF program. If the 646 * user space reads more, task_vma_seq_get_next should 647 * return this vma again. Set prev_vm_start to ~0UL, 648 * so that we don't skip the vma returned by the next 649 * find_vma() (case task_vma_iter_find_vma in 650 * task_vma_seq_get_next()). 651 */ 652 info->prev_vm_start = ~0UL; 653 info->prev_vm_end = info->vma->vm_end; 654 mmap_read_unlock(info->mm); 655 mmput(info->mm); 656 info->mm = NULL; 657 put_task_struct(info->task); 658 info->task = NULL; 659 } 660 } 661 662 static const struct seq_operations task_vma_seq_ops = { 663 .start = task_vma_seq_start, 664 .next = task_vma_seq_next, 665 .stop = task_vma_seq_stop, 666 .show = task_vma_seq_show, 667 }; 668 669 static const struct bpf_iter_seq_info task_seq_info = { 670 .seq_ops = &task_seq_ops, 671 .init_seq_private = init_seq_pidns, 672 .fini_seq_private = fini_seq_pidns, 673 .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), 674 }; 675 676 static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info) 677 { 678 switch (aux->task.type) { 679 case BPF_TASK_ITER_TID: 680 info->iter.task.tid = aux->task.pid; 681 break; 682 case BPF_TASK_ITER_TGID: 683 info->iter.task.pid = aux->task.pid; 684 break; 685 default: 686 break; 687 } 688 return 0; 689 } 690 691 static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq) 692 { 693 seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]); 694 if (aux->task.type == BPF_TASK_ITER_TID) 695 seq_printf(seq, "tid:\t%u\n", aux->task.pid); 696 else if (aux->task.type == BPF_TASK_ITER_TGID) 697 seq_printf(seq, "pid:\t%u\n", aux->task.pid); 698 } 699 700 static struct bpf_iter_reg task_reg_info = { 701 .target = "task", 702 .attach_target = bpf_iter_attach_task, 703 .feature = BPF_ITER_RESCHED, 704 .ctx_arg_info_size = 1, 705 .ctx_arg_info = { 706 { offsetof(struct bpf_iter__task, task), 707 PTR_TO_BTF_ID_OR_NULL }, 708 }, 709 .seq_info = &task_seq_info, 710 .fill_link_info = bpf_iter_fill_link_info, 711 .show_fdinfo = bpf_iter_task_show_fdinfo, 712 }; 713 714 static const struct bpf_iter_seq_info task_file_seq_info = { 715 .seq_ops = &task_file_seq_ops, 716 .init_seq_private = init_seq_pidns, 717 .fini_seq_private = fini_seq_pidns, 718 .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info), 719 }; 720 721 static struct bpf_iter_reg task_file_reg_info = { 722 .target = "task_file", 723 .attach_target = bpf_iter_attach_task, 724 .feature = BPF_ITER_RESCHED, 725 .ctx_arg_info_size = 2, 726 .ctx_arg_info = { 727 { offsetof(struct bpf_iter__task_file, task), 728 PTR_TO_BTF_ID_OR_NULL }, 729 { offsetof(struct bpf_iter__task_file, file), 730 PTR_TO_BTF_ID_OR_NULL }, 731 }, 732 .seq_info = &task_file_seq_info, 733 .fill_link_info = bpf_iter_fill_link_info, 734 .show_fdinfo = bpf_iter_task_show_fdinfo, 735 }; 736 737 static const struct bpf_iter_seq_info task_vma_seq_info = { 738 .seq_ops = &task_vma_seq_ops, 739 .init_seq_private = init_seq_pidns, 740 .fini_seq_private = fini_seq_pidns, 741 .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info), 742 }; 743 744 static struct bpf_iter_reg task_vma_reg_info = { 745 .target = "task_vma", 746 .attach_target = bpf_iter_attach_task, 747 .feature = BPF_ITER_RESCHED, 748 .ctx_arg_info_size = 2, 749 .ctx_arg_info = { 750 { offsetof(struct bpf_iter__task_vma, task), 751 PTR_TO_BTF_ID_OR_NULL }, 752 { offsetof(struct bpf_iter__task_vma, vma), 753 PTR_TO_BTF_ID_OR_NULL }, 754 }, 755 .seq_info = &task_vma_seq_info, 756 .fill_link_info = bpf_iter_fill_link_info, 757 .show_fdinfo = bpf_iter_task_show_fdinfo, 758 }; 759 760 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, 761 bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags) 762 { 763 struct mmap_unlock_irq_work *work = NULL; 764 struct vm_area_struct *vma; 765 bool irq_work_busy = false; 766 struct mm_struct *mm; 767 int ret = -ENOENT; 768 769 if (flags) 770 return -EINVAL; 771 772 if (!task) 773 return -ENOENT; 774 775 mm = task->mm; 776 if (!mm) 777 return -ENOENT; 778 779 irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); 780 781 if (irq_work_busy || !mmap_read_trylock(mm)) 782 return -EBUSY; 783 784 vma = find_vma(mm, start); 785 786 if (vma && vma->vm_start <= start && vma->vm_end > start) { 787 callback_fn((u64)(long)task, (u64)(long)vma, 788 (u64)(long)callback_ctx, 0, 0); 789 ret = 0; 790 } 791 bpf_mmap_unlock_mm(work, mm); 792 return ret; 793 } 794 795 const struct bpf_func_proto bpf_find_vma_proto = { 796 .func = bpf_find_vma, 797 .ret_type = RET_INTEGER, 798 .arg1_type = ARG_PTR_TO_BTF_ID, 799 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 800 .arg2_type = ARG_ANYTHING, 801 .arg3_type = ARG_PTR_TO_FUNC, 802 .arg4_type = ARG_PTR_TO_STACK_OR_NULL, 803 .arg5_type = ARG_ANYTHING, 804 }; 805 806 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); 807 808 static void do_mmap_read_unlock(struct irq_work *entry) 809 { 810 struct mmap_unlock_irq_work *work; 811 812 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) 813 return; 814 815 work = container_of(entry, struct mmap_unlock_irq_work, irq_work); 816 mmap_read_unlock_non_owner(work->mm); 817 } 818 819 static int __init task_iter_init(void) 820 { 821 struct mmap_unlock_irq_work *work; 822 int ret, cpu; 823 824 for_each_possible_cpu(cpu) { 825 work = per_cpu_ptr(&mmap_unlock_work, cpu); 826 init_irq_work(&work->irq_work, do_mmap_read_unlock); 827 } 828 829 task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 830 ret = bpf_iter_reg_target(&task_reg_info); 831 if (ret) 832 return ret; 833 834 task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 835 task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE]; 836 ret = bpf_iter_reg_target(&task_file_reg_info); 837 if (ret) 838 return ret; 839 840 task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 841 task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; 842 return bpf_iter_reg_target(&task_vma_reg_info); 843 } 844 late_initcall(task_iter_init); 845