1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/hash.h> 4 #include <linux/bpf.h> 5 #include <linux/filter.h> 6 #include <linux/ftrace.h> 7 #include <linux/rbtree_latch.h> 8 #include <linux/perf_event.h> 9 #include <linux/btf.h> 10 #include <linux/rcupdate_trace.h> 11 #include <linux/rcupdate_wait.h> 12 #include <linux/static_call.h> 13 #include <linux/bpf_verifier.h> 14 #include <linux/bpf_lsm.h> 15 #include <linux/delay.h> 16 17 /* dummy _ops. The verifier will operate on target program's ops. */ 18 const struct bpf_verifier_ops bpf_extension_verifier_ops = { 19 }; 20 const struct bpf_prog_ops bpf_extension_prog_ops = { 21 }; 22 23 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ 24 #define TRAMPOLINE_HASH_BITS 10 25 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) 26 27 static struct hlist_head trampoline_key_table[TRAMPOLINE_TABLE_SIZE]; 28 static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE]; 29 30 /* serializes access to trampoline tables */ 31 static DEFINE_MUTEX(trampoline_mutex); 32 33 /* 34 * Keep 32 trampoline locks (5 bits) in the pool so trampoline_lock_all() 35 * stays below MAX_LOCK_DEPTH. Each pool slot has a distinct lockdep 36 * class because trampoline_lock_all() takes all pool mutexes at once; 37 * otherwise lockdep would report recursive locking on same-class mutexes. 38 */ 39 #define TRAMPOLINE_LOCKS_BITS 5 40 #define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS) 41 42 static struct { 43 struct mutex mutex; 44 struct lock_class_key key; 45 } trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE]; 46 47 static struct mutex *select_trampoline_lock(struct bpf_trampoline *tr) 48 { 49 return &trampoline_locks[hash_ptr(tr, TRAMPOLINE_LOCKS_BITS)].mutex; 50 } 51 52 static void trampoline_lock(struct bpf_trampoline *tr) 53 { 54 mutex_lock(select_trampoline_lock(tr)); 55 } 56 57 static void trampoline_unlock(struct bpf_trampoline *tr) 58 { 59 mutex_unlock(select_trampoline_lock(tr)); 60 } 61 62 struct bpf_trampoline_ops { 63 int (*register_fentry)(struct bpf_trampoline *tr, struct bpf_tramp_image *im, void *data); 64 int (*unregister_fentry)(struct bpf_trampoline *tr, u32 orig_flags, void *data); 65 int (*modify_fentry)(struct bpf_trampoline *tr, u32 orig_flags, struct bpf_tramp_image *im, 66 bool lock_direct_mutex, void *data); 67 }; 68 69 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 70 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex, 71 const struct bpf_trampoline_ops *ops, void *data); 72 static const struct bpf_trampoline_ops trampoline_ops; 73 74 #ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS 75 static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip) 76 { 77 struct hlist_head *head_ip; 78 struct bpf_trampoline *tr; 79 80 mutex_lock(&trampoline_mutex); 81 head_ip = &trampoline_ip_table[hash_64(ip, TRAMPOLINE_HASH_BITS)]; 82 hlist_for_each_entry(tr, head_ip, hlist_ip) { 83 if (tr->ip == ip) 84 goto out; 85 } 86 tr = NULL; 87 out: 88 mutex_unlock(&trampoline_mutex); 89 return tr; 90 } 91 #else 92 static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip) 93 { 94 return ops->private; 95 } 96 #endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */ 97 98 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip, 99 enum ftrace_ops_cmd cmd) 100 { 101 struct bpf_trampoline *tr; 102 int ret = 0; 103 104 tr = direct_ops_ip_lookup(ops, ip); 105 if (!tr) 106 return -EINVAL; 107 108 if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { 109 /* This is called inside register_ftrace_direct_multi(), so 110 * trampoline's mutex is already locked. 111 */ 112 lockdep_assert_held_once(select_trampoline_lock(tr)); 113 114 /* Instead of updating the trampoline here, we propagate 115 * -EAGAIN to register_ftrace_direct(). Then we can 116 * retry register_ftrace_direct() after updating the 117 * trampoline. 118 */ 119 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && 120 !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { 121 if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) 122 return -EBUSY; 123 124 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; 125 return -EAGAIN; 126 } 127 128 return 0; 129 } 130 131 /* The normal locking order is 132 * select_trampoline_lock(tr) => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) 133 * 134 * The following two commands are called from 135 * 136 * prepare_direct_functions_for_ipmodify 137 * cleanup_direct_functions_after_ipmodify 138 * 139 * In both cases, direct_mutex is already locked. Use 140 * mutex_trylock(select_trampoline_lock(tr)) to avoid deadlock in race condition 141 * (something else holds the same pool lock). 142 */ 143 if (!mutex_trylock(select_trampoline_lock(tr))) { 144 /* sleep 1 ms to make sure whatever holding select_trampoline_lock(tr) 145 * makes some progress. 146 */ 147 msleep(1); 148 return -EAGAIN; 149 } 150 151 switch (cmd) { 152 case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: 153 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; 154 155 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && 156 !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) 157 ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */, 158 &trampoline_ops, NULL); 159 break; 160 case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: 161 tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; 162 163 if (tr->flags & BPF_TRAMP_F_ORIG_STACK) 164 ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */, 165 &trampoline_ops, NULL); 166 break; 167 default: 168 ret = -EINVAL; 169 break; 170 } 171 172 trampoline_unlock(tr); 173 return ret; 174 } 175 #endif 176 177 bool bpf_prog_has_trampoline(const struct bpf_prog *prog) 178 { 179 enum bpf_attach_type eatype = prog->expected_attach_type; 180 enum bpf_prog_type ptype = prog->type; 181 182 switch (ptype) { 183 case BPF_PROG_TYPE_TRACING: 184 if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || 185 eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION || 186 eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI || 187 eatype == BPF_TRACE_FSESSION_MULTI) 188 return true; 189 return false; 190 case BPF_PROG_TYPE_LSM: 191 return eatype == BPF_LSM_MAC; 192 default: 193 return false; 194 } 195 } 196 197 void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym) 198 { 199 ksym->start = (unsigned long) data; 200 ksym->end = ksym->start + size; 201 } 202 203 void bpf_image_ksym_add(struct bpf_ksym *ksym) 204 { 205 bpf_ksym_add(ksym); 206 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 207 PAGE_SIZE, false, ksym->name); 208 } 209 210 void bpf_image_ksym_del(struct bpf_ksym *ksym) 211 { 212 bpf_ksym_del(ksym); 213 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 214 PAGE_SIZE, true, ksym->name); 215 } 216 217 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 218 #ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS 219 /* 220 * We have only single direct_ops which contains all the direct call 221 * sites and is the only global ftrace_ops for all trampolines. 222 * 223 * We use 'update_ftrace_direct_*' api for attachment. 224 */ 225 struct ftrace_ops direct_ops = { 226 .ops_func = bpf_tramp_ftrace_ops_func, 227 }; 228 229 static int direct_ops_alloc(struct bpf_trampoline *tr) 230 { 231 tr->fops = &direct_ops; 232 return 0; 233 } 234 235 static void direct_ops_free(struct bpf_trampoline *tr) { } 236 237 static struct ftrace_hash *hash_from_ip(struct bpf_trampoline *tr, void *ptr) 238 { 239 unsigned long ip, addr = (unsigned long) ptr; 240 struct ftrace_hash *hash; 241 242 ip = ftrace_location(tr->ip); 243 if (!ip) 244 return NULL; 245 hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); 246 if (!hash) 247 return NULL; 248 if (bpf_trampoline_use_jmp(tr->flags)) 249 addr = ftrace_jmp_set(addr); 250 if (!add_ftrace_hash_entry_direct(hash, ip, addr)) { 251 free_ftrace_hash(hash); 252 return NULL; 253 } 254 return hash; 255 } 256 257 static int direct_ops_add(struct bpf_trampoline *tr, void *addr) 258 { 259 struct ftrace_hash *hash = hash_from_ip(tr, addr); 260 int err; 261 262 if (!hash) 263 return -ENOMEM; 264 err = update_ftrace_direct_add(tr->fops, hash); 265 free_ftrace_hash(hash); 266 return err; 267 } 268 269 static int direct_ops_del(struct bpf_trampoline *tr, void *addr) 270 { 271 struct ftrace_hash *hash = hash_from_ip(tr, addr); 272 int err; 273 274 if (!hash) 275 return -ENOMEM; 276 err = update_ftrace_direct_del(tr->fops, hash); 277 free_ftrace_hash(hash); 278 return err; 279 } 280 281 static int direct_ops_mod(struct bpf_trampoline *tr, void *addr, bool lock_direct_mutex) 282 { 283 struct ftrace_hash *hash = hash_from_ip(tr, addr); 284 int err; 285 286 if (!hash) 287 return -ENOMEM; 288 err = update_ftrace_direct_mod(tr->fops, hash, lock_direct_mutex); 289 free_ftrace_hash(hash); 290 return err; 291 } 292 #else 293 /* 294 * We allocate ftrace_ops object for each trampoline and it contains 295 * call site specific for that trampoline. 296 * 297 * We use *_ftrace_direct api for attachment. 298 */ 299 static int direct_ops_alloc(struct bpf_trampoline *tr) 300 { 301 tr->fops = kzalloc_obj(struct ftrace_ops); 302 if (!tr->fops) 303 return -ENOMEM; 304 tr->fops->private = tr; 305 tr->fops->ops_func = bpf_tramp_ftrace_ops_func; 306 return 0; 307 } 308 309 static void direct_ops_free(struct bpf_trampoline *tr) 310 { 311 if (!tr->fops) 312 return; 313 ftrace_free_filter(tr->fops); 314 kfree(tr->fops); 315 } 316 317 static int direct_ops_add(struct bpf_trampoline *tr, void *ptr) 318 { 319 unsigned long addr = (unsigned long) ptr; 320 struct ftrace_ops *ops = tr->fops; 321 int ret; 322 323 if (bpf_trampoline_use_jmp(tr->flags)) 324 addr = ftrace_jmp_set(addr); 325 326 ret = ftrace_set_filter_ip(ops, tr->ip, 0, 1); 327 if (ret) 328 return ret; 329 return register_ftrace_direct(ops, addr); 330 } 331 332 static int direct_ops_del(struct bpf_trampoline *tr, void *addr) 333 { 334 return unregister_ftrace_direct(tr->fops, (long)addr, false); 335 } 336 337 static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex) 338 { 339 unsigned long addr = (unsigned long) ptr; 340 struct ftrace_ops *ops = tr->fops; 341 342 if (bpf_trampoline_use_jmp(tr->flags)) 343 addr = ftrace_jmp_set(addr); 344 if (lock_direct_mutex) 345 return modify_ftrace_direct(ops, addr); 346 return modify_ftrace_direct_nolock(ops, addr); 347 } 348 #endif /* CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */ 349 #else 350 static void direct_ops_free(struct bpf_trampoline *tr) { } 351 352 static int direct_ops_alloc(struct bpf_trampoline *tr) 353 { 354 return 0; 355 } 356 357 static int direct_ops_add(struct bpf_trampoline *tr, void *addr) 358 { 359 return -ENODEV; 360 } 361 362 static int direct_ops_del(struct bpf_trampoline *tr, void *addr) 363 { 364 return -ENODEV; 365 } 366 367 static int direct_ops_mod(struct bpf_trampoline *tr, void *ptr, bool lock_direct_mutex) 368 { 369 return -ENODEV; 370 } 371 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ 372 373 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip) 374 { 375 struct bpf_trampoline *tr; 376 struct hlist_head *head; 377 int i; 378 379 mutex_lock(&trampoline_mutex); 380 head = &trampoline_key_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; 381 hlist_for_each_entry(tr, head, hlist_key) { 382 if (tr->key == key) { 383 refcount_inc(&tr->refcnt); 384 goto out; 385 } 386 } 387 tr = kzalloc_obj(*tr); 388 if (!tr) 389 goto out; 390 if (direct_ops_alloc(tr)) { 391 kfree(tr); 392 tr = NULL; 393 goto out; 394 } 395 396 tr->key = key; 397 tr->ip = ftrace_location(ip); 398 INIT_HLIST_NODE(&tr->hlist_key); 399 INIT_HLIST_NODE(&tr->hlist_ip); 400 hlist_add_head(&tr->hlist_key, head); 401 head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)]; 402 hlist_add_head(&tr->hlist_ip, head); 403 refcount_set(&tr->refcnt, 1); 404 for (i = 0; i < BPF_TRAMP_MAX; i++) 405 INIT_HLIST_HEAD(&tr->progs_hlist[i]); 406 out: 407 mutex_unlock(&trampoline_mutex); 408 return tr; 409 } 410 411 static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flags, 412 void *old_addr, void *new_addr) 413 { 414 enum bpf_text_poke_type new_t = BPF_MOD_CALL, old_t = BPF_MOD_CALL; 415 void *ip = tr->func.addr; 416 417 if (!new_addr) 418 new_t = BPF_MOD_NOP; 419 else if (bpf_trampoline_use_jmp(tr->flags)) 420 new_t = BPF_MOD_JUMP; 421 422 if (!old_addr) 423 old_t = BPF_MOD_NOP; 424 else if (bpf_trampoline_use_jmp(orig_flags)) 425 old_t = BPF_MOD_JUMP; 426 427 return bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr); 428 } 429 430 static void bpf_tramp_image_put(struct bpf_tramp_image *im); 431 432 static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags, void *data __maybe_unused) 433 { 434 void *old_addr = tr->cur_image->image; 435 int ret; 436 437 if (tr->func.ftrace_managed) 438 ret = direct_ops_del(tr, old_addr); 439 else 440 ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL); 441 442 if (ret) 443 return ret; 444 445 bpf_tramp_image_put(tr->cur_image); 446 tr->cur_image = NULL; 447 return 0; 448 } 449 450 static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags, struct bpf_tramp_image *im, 451 bool lock_direct_mutex, void *data __maybe_unused) 452 { 453 void *old_addr = tr->cur_image->image; 454 void *new_addr = im->image; 455 int ret; 456 457 if (tr->func.ftrace_managed) { 458 ret = direct_ops_mod(tr, new_addr, lock_direct_mutex); 459 } else { 460 ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, 461 new_addr); 462 } 463 464 if (ret) 465 return ret; 466 467 bpf_tramp_image_put(tr->cur_image); 468 tr->cur_image = im; 469 return 0; 470 } 471 472 /* first time registering */ 473 static int register_fentry(struct bpf_trampoline *tr, struct bpf_tramp_image *im, 474 void *data __maybe_unused) 475 { 476 void *new_addr = im->image; 477 void *ip = tr->func.addr; 478 unsigned long faddr; 479 int ret; 480 481 faddr = ftrace_location((unsigned long)ip); 482 if (faddr) { 483 if (!tr->fops) 484 return -ENOTSUPP; 485 tr->func.ftrace_managed = true; 486 } 487 488 if (tr->func.ftrace_managed) { 489 ret = direct_ops_add(tr, new_addr); 490 } else { 491 ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr); 492 } 493 494 if (ret) 495 return ret; 496 497 tr->cur_image = im; 498 return 0; 499 } 500 501 static const struct bpf_trampoline_ops trampoline_ops = { 502 .register_fentry = register_fentry, 503 .unregister_fentry = unregister_fentry, 504 .modify_fentry = modify_fentry, 505 }; 506 507 static struct bpf_tramp_nodes * 508 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) 509 { 510 struct bpf_tramp_node *node, **nodes; 511 struct bpf_tramp_nodes *tnodes; 512 int kind; 513 514 *total = 0; 515 tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX); 516 if (!tnodes) 517 return ERR_PTR(-ENOMEM); 518 519 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 520 tnodes[kind].nr_nodes = tr->progs_cnt[kind]; 521 *total += tr->progs_cnt[kind]; 522 nodes = tnodes[kind].nodes; 523 524 hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) { 525 *ip_arg |= node->link->prog->call_get_func_ip; 526 *nodes++ = node; 527 } 528 } 529 return tnodes; 530 } 531 532 static void bpf_tramp_image_free(struct bpf_tramp_image *im) 533 { 534 bpf_image_ksym_del(&im->ksym); 535 arch_free_bpf_trampoline(im->image, im->size); 536 bpf_jit_uncharge_modmem(im->size); 537 percpu_ref_exit(&im->pcref); 538 kfree_rcu(im, rcu); 539 } 540 541 static void __bpf_tramp_image_put_deferred(struct work_struct *work) 542 { 543 struct bpf_tramp_image *im; 544 545 im = container_of(work, struct bpf_tramp_image, work); 546 bpf_tramp_image_free(im); 547 } 548 549 /* callback, fexit step 3 or fentry step 2 */ 550 static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu) 551 { 552 struct bpf_tramp_image *im; 553 554 im = container_of(rcu, struct bpf_tramp_image, rcu); 555 INIT_WORK(&im->work, __bpf_tramp_image_put_deferred); 556 schedule_work(&im->work); 557 } 558 559 /* callback, fexit step 2. Called after percpu_ref_kill confirms. */ 560 static void __bpf_tramp_image_release(struct percpu_ref *pcref) 561 { 562 struct bpf_tramp_image *im; 563 564 im = container_of(pcref, struct bpf_tramp_image, pcref); 565 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); 566 } 567 568 /* callback, fexit or fentry step 1 */ 569 static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu) 570 { 571 struct bpf_tramp_image *im; 572 573 im = container_of(rcu, struct bpf_tramp_image, rcu); 574 if (im->ip_after_call) 575 /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */ 576 percpu_ref_kill(&im->pcref); 577 else 578 /* the case of fentry trampoline */ 579 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); 580 } 581 582 static void bpf_tramp_image_put(struct bpf_tramp_image *im) 583 { 584 /* The trampoline image that calls original function is using: 585 * rcu_read_lock_trace to protect sleepable bpf progs 586 * rcu_read_lock to protect normal bpf progs 587 * percpu_ref to protect trampoline itself 588 * rcu tasks to protect trampoline asm not covered by percpu_ref 589 * (which are few asm insns before __bpf_tramp_enter and 590 * after __bpf_tramp_exit) 591 * 592 * The trampoline is unreachable before bpf_tramp_image_put(). 593 * 594 * First, patch the trampoline to avoid calling into fexit progs. 595 * The progs will be freed even if the original function is still 596 * executing or sleeping. 597 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on 598 * first few asm instructions to execute and call into 599 * __bpf_tramp_enter->percpu_ref_get. 600 * Then use percpu_ref_kill to wait for the trampoline and the original 601 * function to finish. 602 * Then use call_rcu_tasks() to make sure few asm insns in 603 * the trampoline epilogue are done as well. 604 * 605 * In !PREEMPT case the task that got interrupted in the first asm 606 * insns won't go through an RCU quiescent state which the 607 * percpu_ref_kill will be waiting for. Hence the first 608 * call_rcu_tasks() is not necessary. 609 */ 610 if (im->ip_after_call) { 611 int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_NOP, 612 BPF_MOD_JUMP, NULL, 613 im->ip_epilogue); 614 WARN_ON(err); 615 if (IS_ENABLED(CONFIG_TASKS_RCU)) 616 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks); 617 else 618 percpu_ref_kill(&im->pcref); 619 return; 620 } 621 622 /* The trampoline without fexit and fmod_ret progs doesn't call original 623 * function and doesn't use percpu_ref. 624 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish. 625 * Then use call_rcu_tasks() to wait for the rest of trampoline asm 626 * and normal progs. 627 */ 628 call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); 629 } 630 631 static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size) 632 { 633 struct bpf_tramp_image *im; 634 struct bpf_ksym *ksym; 635 void *image; 636 int err = -ENOMEM; 637 638 im = kzalloc_obj(*im); 639 if (!im) 640 goto out; 641 642 err = bpf_jit_charge_modmem(size); 643 if (err) 644 goto out_free_im; 645 im->size = size; 646 647 err = -ENOMEM; 648 im->image = image = arch_alloc_bpf_trampoline(size); 649 if (!image) 650 goto out_uncharge; 651 652 err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); 653 if (err) 654 goto out_free_image; 655 656 ksym = &im->ksym; 657 INIT_LIST_HEAD_RCU(&ksym->lnode); 658 snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key); 659 bpf_image_ksym_init(image, size, ksym); 660 bpf_image_ksym_add(ksym); 661 return im; 662 663 out_free_image: 664 arch_free_bpf_trampoline(im->image, im->size); 665 out_uncharge: 666 bpf_jit_uncharge_modmem(size); 667 out_free_im: 668 kfree(im); 669 out: 670 return ERR_PTR(err); 671 } 672 673 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex, 674 const struct bpf_trampoline_ops *ops, void *data) 675 { 676 struct bpf_tramp_image *im; 677 struct bpf_tramp_nodes *tnodes; 678 u32 orig_flags = tr->flags; 679 bool ip_arg = false; 680 int err, total, size; 681 682 tnodes = bpf_trampoline_get_progs(tr, &total, &ip_arg); 683 if (IS_ERR(tnodes)) 684 return PTR_ERR(tnodes); 685 686 if (total == 0) { 687 err = ops->unregister_fentry(tr, orig_flags, data); 688 goto out; 689 } 690 691 /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ 692 tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); 693 694 if (tnodes[BPF_TRAMP_FEXIT].nr_nodes || 695 tnodes[BPF_TRAMP_MODIFY_RETURN].nr_nodes) { 696 /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME 697 * should not be set together. 698 */ 699 tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; 700 } else { 701 tr->flags |= BPF_TRAMP_F_RESTORE_REGS; 702 } 703 704 if (ip_arg) 705 tr->flags |= BPF_TRAMP_F_IP_ARG; 706 707 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 708 again: 709 if (tr->flags & BPF_TRAMP_F_CALL_ORIG) { 710 if (tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) { 711 /* The BPF_TRAMP_F_SKIP_FRAME can be cleared in the 712 * first try, reset it in the second try. 713 */ 714 tr->flags |= BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SKIP_FRAME; 715 } else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_JMP)) { 716 /* Use "jmp" instead of "call" for the trampoline 717 * in the origin call case, and we don't need to 718 * skip the frame. 719 */ 720 tr->flags &= ~BPF_TRAMP_F_SKIP_FRAME; 721 } 722 } 723 #endif 724 725 size = arch_bpf_trampoline_size(&tr->func.model, tr->flags, 726 tnodes, tr->func.addr); 727 if (size < 0) { 728 err = size; 729 goto out; 730 } 731 732 if (size > PAGE_SIZE) { 733 err = -E2BIG; 734 goto out; 735 } 736 737 im = bpf_tramp_image_alloc(tr->key, size); 738 if (IS_ERR(im)) { 739 err = PTR_ERR(im); 740 goto out; 741 } 742 743 err = arch_prepare_bpf_trampoline(im, im->image, im->image + size, 744 &tr->func.model, tr->flags, tnodes, 745 tr->func.addr); 746 if (err < 0) 747 goto out_free; 748 749 err = arch_protect_bpf_trampoline(im->image, im->size); 750 if (err) 751 goto out_free; 752 753 if (tr->cur_image) 754 /* progs already running at this address */ 755 err = ops->modify_fentry(tr, orig_flags, im, lock_direct_mutex, data); 756 else 757 /* first time registering */ 758 err = ops->register_fentry(tr, im, data); 759 760 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 761 if (err == -EAGAIN) { 762 /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now 763 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the 764 * trampoline again, and retry register. 765 */ 766 bpf_tramp_image_free(im); 767 goto again; 768 } 769 #endif 770 771 out_free: 772 if (err) 773 bpf_tramp_image_free(im); 774 out: 775 /* If any error happens, restore previous flags */ 776 if (err) 777 tr->flags = orig_flags; 778 kfree(tnodes); 779 return err; 780 } 781 782 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) 783 { 784 switch (prog->expected_attach_type) { 785 case BPF_TRACE_FENTRY: 786 case BPF_TRACE_FENTRY_MULTI: 787 return BPF_TRAMP_FENTRY; 788 case BPF_MODIFY_RETURN: 789 return BPF_TRAMP_MODIFY_RETURN; 790 case BPF_TRACE_FEXIT: 791 case BPF_TRACE_FEXIT_MULTI: 792 return BPF_TRAMP_FEXIT; 793 case BPF_TRACE_FSESSION: 794 case BPF_TRACE_FSESSION_MULTI: 795 return BPF_TRAMP_FSESSION; 796 case BPF_LSM_MAC: 797 if (!prog->aux->attach_func_proto->type) 798 /* The function returns void, we cannot modify its 799 * return value. 800 */ 801 return BPF_TRAMP_FEXIT; 802 else 803 return BPF_TRAMP_MODIFY_RETURN; 804 default: 805 return BPF_TRAMP_REPLACE; 806 } 807 } 808 809 static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) 810 { 811 struct bpf_prog_aux *aux = tgt_prog->aux; 812 813 guard(mutex)(&aux->ext_mutex); 814 if (aux->prog_array_member_cnt) 815 /* Program extensions can not extend target prog when the target 816 * prog has been updated to any prog_array map as tail callee. 817 * It's to prevent a potential infinite loop like: 818 * tgt prog entry -> tgt prog subprog -> freplace prog entry 819 * --tailcall-> tgt prog entry. 820 */ 821 return -EBUSY; 822 823 aux->is_extended = true; 824 return 0; 825 } 826 827 static struct bpf_tramp_node *fsession_exit(struct bpf_tramp_node *node) 828 { 829 if (node->link->type == BPF_LINK_TYPE_TRACING) { 830 struct bpf_tracing_link *link; 831 832 link = container_of(node->link, struct bpf_tracing_link, link.link); 833 return &link->fexit; 834 } else if (node->link->type == BPF_LINK_TYPE_TRACING_MULTI) { 835 struct bpf_tracing_multi_link *link; 836 struct bpf_tracing_multi_node *mnode; 837 838 link = container_of(node->link, struct bpf_tracing_multi_link, link); 839 mnode = container_of(node, struct bpf_tracing_multi_node, node); 840 return &link->fexits[mnode - link->nodes]; 841 } 842 return NULL; 843 } 844 845 static int bpf_trampoline_add_prog(struct bpf_trampoline *tr, 846 struct bpf_tramp_node *node, 847 int cnt) 848 { 849 enum bpf_tramp_prog_type kind; 850 struct bpf_tramp_node *node_existing, *fexit; 851 struct hlist_head *prog_list; 852 853 kind = bpf_attach_type_to_tramp(node->link->prog); 854 if (kind == BPF_TRAMP_FSESSION) { 855 prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY]; 856 cnt++; 857 } else { 858 prog_list = &tr->progs_hlist[kind]; 859 } 860 if (cnt >= BPF_MAX_TRAMP_LINKS) 861 return -E2BIG; 862 if (!hlist_unhashed(&node->tramp_hlist)) 863 /* prog already linked */ 864 return -EBUSY; 865 hlist_for_each_entry(node_existing, prog_list, tramp_hlist) { 866 if (node_existing->link->prog != node->link->prog) 867 continue; 868 /* prog already linked */ 869 return -EBUSY; 870 } 871 872 hlist_add_head(&node->tramp_hlist, prog_list); 873 if (kind == BPF_TRAMP_FSESSION) { 874 tr->progs_cnt[BPF_TRAMP_FENTRY]++; 875 fexit = fsession_exit(node); 876 if (WARN_ON_ONCE(!fexit)) 877 return -EINVAL; 878 hlist_add_head(&fexit->tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]); 879 tr->progs_cnt[BPF_TRAMP_FEXIT]++; 880 } else { 881 tr->progs_cnt[kind]++; 882 } 883 return 0; 884 } 885 886 static void bpf_trampoline_remove_prog(struct bpf_trampoline *tr, 887 struct bpf_tramp_node *node) 888 { 889 enum bpf_tramp_prog_type kind; 890 struct bpf_tramp_node *fexit; 891 892 kind = bpf_attach_type_to_tramp(node->link->prog); 893 if (kind == BPF_TRAMP_FSESSION) { 894 fexit = fsession_exit(node); 895 if (WARN_ON_ONCE(!fexit)) 896 return; 897 hlist_del_init(&fexit->tramp_hlist); 898 tr->progs_cnt[BPF_TRAMP_FEXIT]--; 899 kind = BPF_TRAMP_FENTRY; 900 } 901 hlist_del_init(&node->tramp_hlist); 902 tr->progs_cnt[kind]--; 903 } 904 905 static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node, 906 struct bpf_trampoline *tr, 907 struct bpf_prog *tgt_prog, 908 const struct bpf_trampoline_ops *ops, 909 void *data) 910 { 911 enum bpf_tramp_prog_type kind; 912 int err = 0; 913 int cnt = 0, i; 914 915 kind = bpf_attach_type_to_tramp(node->link->prog); 916 if (tr->extension_prog) 917 /* cannot attach fentry/fexit if extension prog is attached. 918 * cannot overwrite extension prog either. 919 */ 920 return -EBUSY; 921 922 for (i = 0; i < BPF_TRAMP_MAX; i++) 923 cnt += tr->progs_cnt[i]; 924 925 if (kind == BPF_TRAMP_REPLACE) { 926 /* Cannot attach extension if fentry/fexit are in use. */ 927 if (cnt) 928 return -EBUSY; 929 err = bpf_freplace_check_tgt_prog(tgt_prog); 930 if (err) 931 return err; 932 tr->extension_prog = node->link->prog; 933 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP, 934 BPF_MOD_JUMP, NULL, 935 node->link->prog->bpf_func); 936 } 937 err = bpf_trampoline_add_prog(tr, node, cnt); 938 if (err) 939 return err; 940 err = bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data); 941 if (err) 942 bpf_trampoline_remove_prog(tr, node); 943 return err; 944 } 945 946 int bpf_trampoline_link_prog(struct bpf_tramp_node *node, 947 struct bpf_trampoline *tr, 948 struct bpf_prog *tgt_prog) 949 { 950 int err; 951 952 trampoline_lock(tr); 953 err = __bpf_trampoline_link_prog(node, tr, tgt_prog, &trampoline_ops, NULL); 954 trampoline_unlock(tr); 955 return err; 956 } 957 958 static int __bpf_trampoline_unlink_prog(struct bpf_tramp_node *node, 959 struct bpf_trampoline *tr, 960 struct bpf_prog *tgt_prog, 961 const struct bpf_trampoline_ops *ops, 962 void *data) 963 { 964 enum bpf_tramp_prog_type kind; 965 int err; 966 967 kind = bpf_attach_type_to_tramp(node->link->prog); 968 if (kind == BPF_TRAMP_REPLACE) { 969 WARN_ON_ONCE(!tr->extension_prog); 970 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, 971 BPF_MOD_NOP, 972 tr->extension_prog->bpf_func, NULL); 973 tr->extension_prog = NULL; 974 guard(mutex)(&tgt_prog->aux->ext_mutex); 975 tgt_prog->aux->is_extended = false; 976 return err; 977 } 978 bpf_trampoline_remove_prog(tr, node); 979 return bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data); 980 } 981 982 /* bpf_trampoline_unlink_prog() should never fail. */ 983 int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node, 984 struct bpf_trampoline *tr, 985 struct bpf_prog *tgt_prog) 986 { 987 int err; 988 989 trampoline_lock(tr); 990 err = __bpf_trampoline_unlink_prog(node, tr, tgt_prog, &trampoline_ops, NULL); 991 trampoline_unlock(tr); 992 return err; 993 } 994 995 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) 996 static void bpf_shim_tramp_link_release(struct bpf_link *link) 997 { 998 struct bpf_shim_tramp_link *shim_link = 999 container_of(link, struct bpf_shim_tramp_link, link.link); 1000 1001 /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */ 1002 if (!shim_link->trampoline) 1003 return; 1004 1005 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link.node, shim_link->trampoline, NULL)); 1006 bpf_trampoline_put(shim_link->trampoline); 1007 } 1008 1009 static void bpf_shim_tramp_link_dealloc(struct bpf_link *link) 1010 { 1011 struct bpf_shim_tramp_link *shim_link = 1012 container_of(link, struct bpf_shim_tramp_link, link.link); 1013 1014 kfree(shim_link); 1015 } 1016 1017 static const struct bpf_link_ops bpf_shim_tramp_link_lops = { 1018 .release = bpf_shim_tramp_link_release, 1019 .dealloc = bpf_shim_tramp_link_dealloc, 1020 }; 1021 1022 static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog, 1023 bpf_func_t bpf_func, 1024 int cgroup_atype, 1025 enum bpf_attach_type attach_type) 1026 { 1027 struct bpf_shim_tramp_link *shim_link = NULL; 1028 struct bpf_prog *p; 1029 1030 shim_link = kzalloc_obj(*shim_link, GFP_USER); 1031 if (!shim_link) 1032 return NULL; 1033 1034 p = bpf_prog_alloc(1, 0); 1035 if (!p) { 1036 kfree(shim_link); 1037 return NULL; 1038 } 1039 1040 p->jited = false; 1041 p->bpf_func = bpf_func; 1042 1043 p->aux->cgroup_atype = cgroup_atype; 1044 p->aux->attach_func_proto = prog->aux->attach_func_proto; 1045 p->aux->attach_btf_id = prog->aux->attach_btf_id; 1046 p->aux->attach_btf = prog->aux->attach_btf; 1047 btf_get(p->aux->attach_btf); 1048 p->type = BPF_PROG_TYPE_LSM; 1049 p->expected_attach_type = BPF_LSM_MAC; 1050 bpf_prog_inc(p); 1051 bpf_tramp_link_init(&shim_link->link, BPF_LINK_TYPE_UNSPEC, 1052 &bpf_shim_tramp_link_lops, p, attach_type, 0); 1053 bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype); 1054 1055 return shim_link; 1056 } 1057 1058 static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr, 1059 bpf_func_t bpf_func) 1060 { 1061 struct bpf_tramp_node *node; 1062 int kind; 1063 1064 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 1065 hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) { 1066 struct bpf_prog *p = node->link->prog; 1067 1068 if (p->bpf_func == bpf_func) 1069 return container_of(node, struct bpf_shim_tramp_link, link.node); 1070 } 1071 } 1072 1073 return NULL; 1074 } 1075 1076 int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, 1077 int cgroup_atype, 1078 enum bpf_attach_type attach_type) 1079 { 1080 struct bpf_shim_tramp_link *shim_link = NULL; 1081 struct bpf_attach_target_info tgt_info = {}; 1082 struct bpf_trampoline *tr; 1083 bpf_func_t bpf_func; 1084 u64 key; 1085 int err; 1086 1087 err = bpf_check_attach_target(NULL, prog, NULL, 1088 prog->aux->attach_btf_id, 1089 &tgt_info); 1090 if (err) 1091 return err; 1092 1093 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, 1094 prog->aux->attach_btf_id); 1095 1096 bpf_lsm_find_cgroup_shim(prog, &bpf_func); 1097 tr = bpf_trampoline_get(key, &tgt_info); 1098 if (!tr) 1099 return -ENOMEM; 1100 1101 trampoline_lock(tr); 1102 1103 shim_link = cgroup_shim_find(tr, bpf_func); 1104 if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) { 1105 /* Reusing existing shim attached by the other program. */ 1106 trampoline_unlock(tr); 1107 bpf_trampoline_put(tr); /* bpf_trampoline_get above */ 1108 return 0; 1109 } 1110 1111 /* Allocate and install new shim. */ 1112 1113 shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype, attach_type); 1114 if (!shim_link) { 1115 err = -ENOMEM; 1116 goto err; 1117 } 1118 1119 err = __bpf_trampoline_link_prog(&shim_link->link.node, tr, NULL, &trampoline_ops, NULL); 1120 if (err) 1121 goto err; 1122 1123 shim_link->trampoline = tr; 1124 /* note, we're still holding tr refcnt from above */ 1125 1126 trampoline_unlock(tr); 1127 1128 return 0; 1129 err: 1130 trampoline_unlock(tr); 1131 1132 if (shim_link) 1133 bpf_link_put(&shim_link->link.link); 1134 1135 /* have to release tr while _not_ holding pool mutex for trampoline */ 1136 bpf_trampoline_put(tr); /* bpf_trampoline_get above */ 1137 1138 return err; 1139 } 1140 1141 void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) 1142 { 1143 struct bpf_shim_tramp_link *shim_link = NULL; 1144 struct bpf_trampoline *tr; 1145 bpf_func_t bpf_func; 1146 u64 key; 1147 1148 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, 1149 prog->aux->attach_btf_id); 1150 1151 bpf_lsm_find_cgroup_shim(prog, &bpf_func); 1152 tr = bpf_trampoline_lookup(key, 0); 1153 if (WARN_ON_ONCE(!tr)) 1154 return; 1155 1156 trampoline_lock(tr); 1157 shim_link = cgroup_shim_find(tr, bpf_func); 1158 trampoline_unlock(tr); 1159 1160 if (shim_link) 1161 bpf_link_put(&shim_link->link.link); 1162 1163 bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */ 1164 } 1165 #endif 1166 1167 struct bpf_trampoline *bpf_trampoline_get(u64 key, 1168 struct bpf_attach_target_info *tgt_info) 1169 { 1170 struct bpf_trampoline *tr; 1171 1172 tr = bpf_trampoline_lookup(key, tgt_info->tgt_addr); 1173 if (!tr) 1174 return NULL; 1175 1176 trampoline_lock(tr); 1177 if (tr->func.addr) 1178 goto out; 1179 1180 memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel)); 1181 tr->func.addr = (void *)tgt_info->tgt_addr; 1182 out: 1183 trampoline_unlock(tr); 1184 return tr; 1185 } 1186 1187 void bpf_trampoline_put(struct bpf_trampoline *tr) 1188 { 1189 int i; 1190 1191 if (!tr) 1192 return; 1193 mutex_lock(&trampoline_mutex); 1194 if (!refcount_dec_and_test(&tr->refcnt)) 1195 goto out; 1196 1197 for (i = 0; i < BPF_TRAMP_MAX; i++) 1198 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) 1199 goto out; 1200 1201 /* This code will be executed even when the last bpf_tramp_image 1202 * is alive. All progs are detached from the trampoline and the 1203 * trampoline image is patched with jmp into epilogue to skip 1204 * fexit progs. The fentry-only trampoline will be freed via 1205 * multiple rcu callbacks. 1206 */ 1207 hlist_del(&tr->hlist_key); 1208 hlist_del(&tr->hlist_ip); 1209 direct_ops_free(tr); 1210 kfree(tr); 1211 out: 1212 mutex_unlock(&trampoline_mutex); 1213 } 1214 1215 #define NO_START_TIME 1 1216 static __always_inline u64 notrace bpf_prog_start_time(void) 1217 { 1218 u64 start = NO_START_TIME; 1219 1220 if (static_branch_unlikely(&bpf_stats_enabled_key)) { 1221 start = sched_clock(); 1222 if (unlikely(!start)) 1223 start = NO_START_TIME; 1224 } 1225 return start; 1226 } 1227 1228 /* The logic is similar to bpf_prog_run(), but with an explicit 1229 * rcu_read_lock() and migrate_disable() which are required 1230 * for the trampoline. The macro is split into 1231 * call __bpf_prog_enter 1232 * call prog->bpf_func 1233 * call __bpf_prog_exit 1234 * 1235 * __bpf_prog_enter returns: 1236 * 0 - skip execution of the bpf prog 1237 * 1 - execute bpf prog 1238 * [2..MAX_U64] - execute bpf prog and record execution time. 1239 * This is start time. 1240 */ 1241 static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) 1242 __acquires(RCU) 1243 { 1244 rcu_read_lock_dont_migrate(); 1245 1246 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1247 1248 if (unlikely(!bpf_prog_get_recursion_context(prog))) { 1249 bpf_prog_inc_misses_counter(prog); 1250 if (prog->aux->recursion_detected) 1251 prog->aux->recursion_detected(prog); 1252 return 0; 1253 } 1254 return bpf_prog_start_time(); 1255 } 1256 1257 static void notrace __update_prog_stats(struct bpf_prog *prog, u64 start) 1258 { 1259 struct bpf_prog_stats *stats; 1260 unsigned long flags; 1261 u64 duration; 1262 1263 /* 1264 * static_key could be enabled in __bpf_prog_enter* and disabled in 1265 * __bpf_prog_exit*. And vice versa. Check that 'start' is valid. 1266 */ 1267 if (start <= NO_START_TIME) 1268 return; 1269 1270 duration = sched_clock() - start; 1271 stats = this_cpu_ptr(prog->stats); 1272 flags = u64_stats_update_begin_irqsave(&stats->syncp); 1273 u64_stats_inc(&stats->cnt); 1274 u64_stats_add(&stats->nsecs, duration); 1275 u64_stats_update_end_irqrestore(&stats->syncp, flags); 1276 } 1277 1278 static __always_inline void notrace update_prog_stats(struct bpf_prog *prog, 1279 u64 start) 1280 { 1281 if (static_branch_unlikely(&bpf_stats_enabled_key)) 1282 __update_prog_stats(prog, start); 1283 } 1284 1285 static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, 1286 struct bpf_tramp_run_ctx *run_ctx) 1287 __releases(RCU) 1288 { 1289 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1290 1291 update_prog_stats(prog, start); 1292 bpf_prog_put_recursion_context(prog); 1293 rcu_read_unlock_migrate(); 1294 } 1295 1296 static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, 1297 struct bpf_tramp_run_ctx *run_ctx) 1298 __acquires(RCU) 1299 { 1300 /* Runtime stats are exported via actual BPF_LSM_CGROUP 1301 * programs, not the shims. 1302 */ 1303 rcu_read_lock_dont_migrate(); 1304 1305 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1306 1307 return NO_START_TIME; 1308 } 1309 1310 static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, 1311 struct bpf_tramp_run_ctx *run_ctx) 1312 __releases(RCU) 1313 { 1314 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1315 1316 rcu_read_unlock_migrate(); 1317 } 1318 1319 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, 1320 struct bpf_tramp_run_ctx *run_ctx) 1321 { 1322 rcu_read_lock_trace(); 1323 migrate_disable(); 1324 might_fault(); 1325 1326 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1327 1328 if (unlikely(!bpf_prog_get_recursion_context(prog))) { 1329 bpf_prog_inc_misses_counter(prog); 1330 if (prog->aux->recursion_detected) 1331 prog->aux->recursion_detected(prog); 1332 return 0; 1333 } 1334 return bpf_prog_start_time(); 1335 } 1336 1337 void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, 1338 struct bpf_tramp_run_ctx *run_ctx) 1339 { 1340 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1341 1342 update_prog_stats(prog, start); 1343 bpf_prog_put_recursion_context(prog); 1344 migrate_enable(); 1345 rcu_read_unlock_trace(); 1346 } 1347 1348 static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, 1349 struct bpf_tramp_run_ctx *run_ctx) 1350 { 1351 rcu_read_lock_trace(); 1352 migrate_disable(); 1353 might_fault(); 1354 1355 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1356 1357 return bpf_prog_start_time(); 1358 } 1359 1360 static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, 1361 struct bpf_tramp_run_ctx *run_ctx) 1362 { 1363 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1364 1365 update_prog_stats(prog, start); 1366 migrate_enable(); 1367 rcu_read_unlock_trace(); 1368 } 1369 1370 static u64 notrace __bpf_prog_enter(struct bpf_prog *prog, 1371 struct bpf_tramp_run_ctx *run_ctx) 1372 __acquires(RCU) 1373 { 1374 rcu_read_lock_dont_migrate(); 1375 1376 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1377 1378 return bpf_prog_start_time(); 1379 } 1380 1381 static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, 1382 struct bpf_tramp_run_ctx *run_ctx) 1383 __releases(RCU) 1384 { 1385 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1386 1387 update_prog_stats(prog, start); 1388 rcu_read_unlock_migrate(); 1389 } 1390 1391 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) 1392 { 1393 percpu_ref_get(&tr->pcref); 1394 } 1395 1396 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) 1397 { 1398 percpu_ref_put(&tr->pcref); 1399 } 1400 1401 bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog) 1402 { 1403 bool sleepable = prog->sleepable; 1404 1405 if (bpf_prog_check_recur(prog)) 1406 return sleepable ? __bpf_prog_enter_sleepable_recur : 1407 __bpf_prog_enter_recur; 1408 1409 if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && 1410 prog->expected_attach_type == BPF_LSM_CGROUP) 1411 return __bpf_prog_enter_lsm_cgroup; 1412 1413 return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter; 1414 } 1415 1416 bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog) 1417 { 1418 bool sleepable = prog->sleepable; 1419 1420 if (bpf_prog_check_recur(prog)) 1421 return sleepable ? __bpf_prog_exit_sleepable_recur : 1422 __bpf_prog_exit_recur; 1423 1424 if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && 1425 prog->expected_attach_type == BPF_LSM_CGROUP) 1426 return __bpf_prog_exit_lsm_cgroup; 1427 1428 return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit; 1429 } 1430 1431 int __weak 1432 arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, 1433 const struct btf_func_model *m, u32 flags, 1434 struct bpf_tramp_nodes *tnodes, 1435 void *func_addr) 1436 { 1437 return -ENOTSUPP; 1438 } 1439 1440 void * __weak arch_alloc_bpf_trampoline(unsigned int size) 1441 { 1442 void *image; 1443 1444 if (WARN_ON_ONCE(size > PAGE_SIZE)) 1445 return NULL; 1446 image = bpf_jit_alloc_exec(PAGE_SIZE); 1447 if (image) 1448 set_vm_flush_reset_perms(image); 1449 return image; 1450 } 1451 1452 void __weak arch_free_bpf_trampoline(void *image, unsigned int size) 1453 { 1454 WARN_ON_ONCE(size > PAGE_SIZE); 1455 /* bpf_jit_free_exec doesn't need "size", but 1456 * bpf_prog_pack_free() needs it. 1457 */ 1458 bpf_jit_free_exec(image); 1459 } 1460 1461 int __weak arch_protect_bpf_trampoline(void *image, unsigned int size) 1462 { 1463 WARN_ON_ONCE(size > PAGE_SIZE); 1464 return set_memory_rox((long)image, 1); 1465 } 1466 1467 int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 1468 struct bpf_tramp_nodes *tnodes, void *func_addr) 1469 { 1470 return -ENOTSUPP; 1471 } 1472 1473 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \ 1474 defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS) && \ 1475 defined(CONFIG_BPF_SYSCALL) 1476 1477 static void trampoline_lock_all(void) 1478 { 1479 int i; 1480 1481 for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++) 1482 mutex_lock(&trampoline_locks[i].mutex); 1483 } 1484 1485 static void trampoline_unlock_all(void) 1486 { 1487 int i; 1488 1489 for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++) 1490 mutex_unlock(&trampoline_locks[i].mutex); 1491 } 1492 1493 static void remove_tracing_multi_data(struct bpf_tracing_multi_data *data) 1494 { 1495 ftrace_hash_remove(data->reg); 1496 ftrace_hash_remove(data->unreg); 1497 ftrace_hash_remove(data->modify); 1498 } 1499 1500 static void clear_tracing_multi_data(struct bpf_tracing_multi_data *data) 1501 { 1502 remove_tracing_multi_data(data); 1503 1504 free_ftrace_hash(data->reg); 1505 free_ftrace_hash(data->unreg); 1506 free_ftrace_hash(data->modify); 1507 } 1508 1509 static int init_tracing_multi_data(struct bpf_tracing_multi_data *data) 1510 { 1511 data->reg = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); 1512 data->unreg = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); 1513 data->modify = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); 1514 1515 if (!data->reg || !data->unreg || !data->modify) { 1516 clear_tracing_multi_data(data); 1517 return -ENOMEM; 1518 } 1519 return 0; 1520 } 1521 1522 static void ftrace_hash_add(struct ftrace_hash *hash, struct ftrace_func_entry *entry, 1523 unsigned long ip, unsigned long direct) 1524 { 1525 entry->ip = ip; 1526 entry->direct = direct; 1527 add_ftrace_hash_entry(hash, entry); 1528 } 1529 1530 static int register_fentry_multi(struct bpf_trampoline *tr, struct bpf_tramp_image *im, void *ptr) 1531 { 1532 unsigned long addr = (unsigned long) im->image; 1533 unsigned long ip = ftrace_location(tr->ip); 1534 struct bpf_tracing_multi_data *data = ptr; 1535 1536 if (bpf_trampoline_use_jmp(tr->flags)) 1537 addr = ftrace_jmp_set(addr); 1538 1539 ftrace_hash_add(data->reg, data->entry, ip, addr); 1540 tr->cur_image = im; 1541 return 0; 1542 } 1543 1544 static int unregister_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, void *ptr) 1545 { 1546 unsigned long addr = (unsigned long) tr->cur_image->image; 1547 unsigned long ip = ftrace_location(tr->ip); 1548 struct bpf_tracing_multi_data *data = ptr; 1549 1550 if (bpf_trampoline_use_jmp(tr->flags)) 1551 addr = ftrace_jmp_set(addr); 1552 1553 ftrace_hash_add(data->unreg, data->entry, ip, addr); 1554 tr->cur_image = NULL; 1555 return 0; 1556 } 1557 1558 static int modify_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, struct bpf_tramp_image *im, 1559 bool lock_direct_mutex, void *ptr) 1560 { 1561 unsigned long addr = (unsigned long) im->image; 1562 unsigned long ip = ftrace_location(tr->ip); 1563 struct bpf_tracing_multi_data *data = ptr; 1564 1565 if (bpf_trampoline_use_jmp(tr->flags)) 1566 addr = ftrace_jmp_set(addr); 1567 1568 ftrace_hash_add(data->modify, data->entry, ip, addr); 1569 tr->cur_image = im; 1570 return 0; 1571 } 1572 1573 static const struct bpf_trampoline_ops trampoline_multi_ops = { 1574 .register_fentry = register_fentry_multi, 1575 .unregister_fentry = unregister_fentry_multi, 1576 .modify_fentry = modify_fentry_multi, 1577 }; 1578 1579 static void bpf_trampoline_multi_attach_init(struct bpf_trampoline *tr) 1580 { 1581 tr->multi_attach.old_image = tr->cur_image; 1582 tr->multi_attach.old_flags = tr->flags; 1583 } 1584 1585 static void bpf_trampoline_multi_attach_free(struct bpf_trampoline *tr) 1586 { 1587 if (tr->multi_attach.old_image) 1588 bpf_tramp_image_put(tr->multi_attach.old_image); 1589 1590 tr->multi_attach.old_image = NULL; 1591 tr->multi_attach.old_flags = 0; 1592 } 1593 1594 static void bpf_trampoline_multi_attach_rollback(struct bpf_trampoline *tr) 1595 { 1596 if (tr->cur_image) 1597 bpf_tramp_image_put(tr->cur_image); 1598 tr->cur_image = tr->multi_attach.old_image; 1599 tr->flags = tr->multi_attach.old_flags; 1600 1601 tr->multi_attach.old_image = NULL; 1602 tr->multi_attach.old_flags = 0; 1603 } 1604 1605 #define for_each_mnode_cnt(mnode, link, cnt) \ 1606 for (i = 0, mnode = &link->nodes[i]; i < cnt; i++, mnode = &link->nodes[i]) 1607 1608 #define for_each_mnode(mnode, link) \ 1609 for_each_mnode_cnt(mnode, link, link->nodes_cnt) 1610 1611 int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids, 1612 struct bpf_tracing_multi_link *link) 1613 { 1614 struct bpf_tracing_multi_data *data = &link->data; 1615 struct bpf_attach_target_info tgt_info = {}; 1616 struct btf *btf = prog->aux->attach_btf; 1617 struct bpf_tracing_multi_node *mnode; 1618 struct bpf_trampoline *tr; 1619 int i, err, rollback_cnt; 1620 u64 key; 1621 1622 for_each_mnode(mnode, link) { 1623 rollback_cnt = i; 1624 1625 err = bpf_check_attach_btf_id_multi(btf, prog, ids[i], &tgt_info); 1626 if (err) 1627 goto rollback_put; 1628 1629 key = bpf_trampoline_compute_key(NULL, btf, ids[i]); 1630 1631 tr = bpf_trampoline_get(key, &tgt_info); 1632 if (!tr) { 1633 err = -ENOMEM; 1634 goto rollback_put; 1635 } 1636 1637 mnode->trampoline = tr; 1638 mnode->node.link = &link->link; 1639 mnode->node.cookie = link->cookies ? link->cookies[i] : 0; 1640 1641 if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) { 1642 link->fexits[i].link = &link->link; 1643 link->fexits[i].cookie = link->cookies ? link->cookies[i] : 0; 1644 } 1645 1646 cond_resched(); 1647 } 1648 1649 err = init_tracing_multi_data(data); 1650 if (err) { 1651 rollback_cnt = link->nodes_cnt; 1652 goto rollback_put; 1653 } 1654 1655 trampoline_lock_all(); 1656 1657 for_each_mnode(mnode, link) { 1658 bpf_trampoline_multi_attach_init(mnode->trampoline); 1659 1660 data->entry = &mnode->entry; 1661 err = __bpf_trampoline_link_prog(&mnode->node, mnode->trampoline, NULL, 1662 &trampoline_multi_ops, data); 1663 if (err) { 1664 rollback_cnt = i; 1665 goto rollback_unlink; 1666 } 1667 } 1668 1669 rollback_cnt = link->nodes_cnt; 1670 if (ftrace_hash_count(data->reg)) { 1671 err = update_ftrace_direct_add(&direct_ops, data->reg); 1672 if (err) 1673 goto rollback_unlink; 1674 } 1675 1676 if (ftrace_hash_count(data->modify)) { 1677 err = update_ftrace_direct_mod(&direct_ops, data->modify, true); 1678 if (err) { 1679 if (ftrace_hash_count(data->reg)) 1680 WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data->reg)); 1681 goto rollback_unlink; 1682 } 1683 } 1684 1685 for_each_mnode(mnode, link) 1686 bpf_trampoline_multi_attach_free(mnode->trampoline); 1687 1688 trampoline_unlock_all(); 1689 1690 remove_tracing_multi_data(data); 1691 return 0; 1692 1693 rollback_unlink: 1694 for_each_mnode_cnt(mnode, link, rollback_cnt) { 1695 bpf_trampoline_remove_prog(mnode->trampoline, &mnode->node); 1696 bpf_trampoline_multi_attach_rollback(mnode->trampoline); 1697 } 1698 1699 trampoline_unlock_all(); 1700 1701 clear_tracing_multi_data(data); 1702 rollback_cnt = link->nodes_cnt; 1703 1704 rollback_put: 1705 for_each_mnode_cnt(mnode, link, rollback_cnt) 1706 bpf_trampoline_put(mnode->trampoline); 1707 1708 return err; 1709 } 1710 1711 int bpf_trampoline_multi_detach(struct bpf_prog *prog, struct bpf_tracing_multi_link *link) 1712 { 1713 struct bpf_tracing_multi_data *data = &link->data; 1714 struct bpf_tracing_multi_node *mnode; 1715 int i; 1716 1717 trampoline_lock_all(); 1718 1719 for_each_mnode(mnode, link) { 1720 data->entry = &mnode->entry; 1721 bpf_trampoline_multi_attach_init(mnode->trampoline); 1722 WARN_ON_ONCE(__bpf_trampoline_unlink_prog(&mnode->node, mnode->trampoline, 1723 NULL, &trampoline_multi_ops, data)); 1724 } 1725 1726 if (ftrace_hash_count(data->unreg)) 1727 WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data->unreg)); 1728 if (ftrace_hash_count(data->modify)) 1729 WARN_ON_ONCE(update_ftrace_direct_mod(&direct_ops, data->modify, true)); 1730 1731 for_each_mnode(mnode, link) 1732 bpf_trampoline_multi_attach_free(mnode->trampoline); 1733 1734 trampoline_unlock_all(); 1735 1736 for_each_mnode(mnode, link) 1737 bpf_trampoline_put(mnode->trampoline); 1738 1739 clear_tracing_multi_data(data); 1740 return 0; 1741 } 1742 1743 #undef for_each_mnode_cnt 1744 #undef for_each_mnode 1745 1746 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS && 1747 CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS && 1748 CONFIG_BPF_SYSCALL */ 1749 1750 static int __init init_trampolines(void) 1751 { 1752 int i; 1753 1754 for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) 1755 INIT_HLIST_HEAD(&trampoline_key_table[i]); 1756 for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) 1757 INIT_HLIST_HEAD(&trampoline_ip_table[i]); 1758 for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++) 1759 __mutex_init(&trampoline_locks[i].mutex, "trampoline_lock", &trampoline_locks[i].key); 1760 return 0; 1761 } 1762 late_initcall(init_trampolines); 1763