1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2019 Facebook */ 3 #include <linux/hash.h> 4 #include <linux/bpf.h> 5 #include <linux/filter.h> 6 #include <linux/ftrace.h> 7 #include <linux/rbtree_latch.h> 8 #include <linux/perf_event.h> 9 #include <linux/btf.h> 10 #include <linux/rcupdate_trace.h> 11 #include <linux/rcupdate_wait.h> 12 #include <linux/static_call.h> 13 #include <linux/bpf_verifier.h> 14 #include <linux/bpf_lsm.h> 15 #include <linux/delay.h> 16 17 /* dummy _ops. The verifier will operate on target program's ops. */ 18 const struct bpf_verifier_ops bpf_extension_verifier_ops = { 19 }; 20 const struct bpf_prog_ops bpf_extension_prog_ops = { 21 }; 22 23 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ 24 #define TRAMPOLINE_HASH_BITS 10 25 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) 26 27 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; 28 29 /* serializes access to trampoline_table */ 30 static DEFINE_MUTEX(trampoline_mutex); 31 32 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 33 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); 34 35 static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) 36 { 37 struct bpf_trampoline *tr = ops->private; 38 int ret = 0; 39 40 if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { 41 /* This is called inside register_ftrace_direct_multi(), so 42 * tr->mutex is already locked. 43 */ 44 lockdep_assert_held_once(&tr->mutex); 45 46 /* Instead of updating the trampoline here, we propagate 47 * -EAGAIN to register_ftrace_direct(). Then we can 48 * retry register_ftrace_direct() after updating the 49 * trampoline. 50 */ 51 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && 52 !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { 53 if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) 54 return -EBUSY; 55 56 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; 57 return -EAGAIN; 58 } 59 60 return 0; 61 } 62 63 /* The normal locking order is 64 * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) 65 * 66 * The following two commands are called from 67 * 68 * prepare_direct_functions_for_ipmodify 69 * cleanup_direct_functions_after_ipmodify 70 * 71 * In both cases, direct_mutex is already locked. Use 72 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition 73 * (something else is making changes to this same trampoline). 74 */ 75 if (!mutex_trylock(&tr->mutex)) { 76 /* sleep 1 ms to make sure whatever holding tr->mutex makes 77 * some progress. 78 */ 79 msleep(1); 80 return -EAGAIN; 81 } 82 83 switch (cmd) { 84 case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: 85 tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; 86 87 if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && 88 !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) 89 ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); 90 break; 91 case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: 92 tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; 93 94 if (tr->flags & BPF_TRAMP_F_ORIG_STACK) 95 ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); 96 break; 97 default: 98 ret = -EINVAL; 99 break; 100 } 101 102 mutex_unlock(&tr->mutex); 103 return ret; 104 } 105 #endif 106 107 bool bpf_prog_has_trampoline(const struct bpf_prog *prog) 108 { 109 enum bpf_attach_type eatype = prog->expected_attach_type; 110 enum bpf_prog_type ptype = prog->type; 111 112 return (ptype == BPF_PROG_TYPE_TRACING && 113 (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || 114 eatype == BPF_MODIFY_RETURN)) || 115 (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); 116 } 117 118 void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym) 119 { 120 ksym->start = (unsigned long) data; 121 ksym->end = ksym->start + size; 122 } 123 124 void bpf_image_ksym_add(struct bpf_ksym *ksym) 125 { 126 bpf_ksym_add(ksym); 127 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 128 PAGE_SIZE, false, ksym->name); 129 } 130 131 void bpf_image_ksym_del(struct bpf_ksym *ksym) 132 { 133 bpf_ksym_del(ksym); 134 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 135 PAGE_SIZE, true, ksym->name); 136 } 137 138 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 139 { 140 struct bpf_trampoline *tr; 141 struct hlist_head *head; 142 int i; 143 144 mutex_lock(&trampoline_mutex); 145 head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; 146 hlist_for_each_entry(tr, head, hlist) { 147 if (tr->key == key) { 148 refcount_inc(&tr->refcnt); 149 goto out; 150 } 151 } 152 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 153 if (!tr) 154 goto out; 155 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 156 tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL); 157 if (!tr->fops) { 158 kfree(tr); 159 tr = NULL; 160 goto out; 161 } 162 tr->fops->private = tr; 163 tr->fops->ops_func = bpf_tramp_ftrace_ops_func; 164 #endif 165 166 tr->key = key; 167 INIT_HLIST_NODE(&tr->hlist); 168 hlist_add_head(&tr->hlist, head); 169 refcount_set(&tr->refcnt, 1); 170 mutex_init(&tr->mutex); 171 for (i = 0; i < BPF_TRAMP_MAX; i++) 172 INIT_HLIST_HEAD(&tr->progs_hlist[i]); 173 out: 174 mutex_unlock(&trampoline_mutex); 175 return tr; 176 } 177 178 static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) 179 { 180 void *ip = tr->func.addr; 181 int ret; 182 183 if (tr->func.ftrace_managed) 184 ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false); 185 else 186 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); 187 188 return ret; 189 } 190 191 static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, 192 bool lock_direct_mutex) 193 { 194 void *ip = tr->func.addr; 195 int ret; 196 197 if (tr->func.ftrace_managed) { 198 if (lock_direct_mutex) 199 ret = modify_ftrace_direct(tr->fops, (long)new_addr); 200 else 201 ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr); 202 } else { 203 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); 204 } 205 return ret; 206 } 207 208 /* first time registering */ 209 static int register_fentry(struct bpf_trampoline *tr, void *new_addr) 210 { 211 void *ip = tr->func.addr; 212 unsigned long faddr; 213 int ret; 214 215 faddr = ftrace_location((unsigned long)ip); 216 if (faddr) { 217 if (!tr->fops) 218 return -ENOTSUPP; 219 tr->func.ftrace_managed = true; 220 } 221 222 if (tr->func.ftrace_managed) { 223 ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); 224 if (ret) 225 return ret; 226 ret = register_ftrace_direct(tr->fops, (long)new_addr); 227 } else { 228 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); 229 } 230 231 return ret; 232 } 233 234 static struct bpf_tramp_links * 235 bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) 236 { 237 struct bpf_tramp_link *link; 238 struct bpf_tramp_links *tlinks; 239 struct bpf_tramp_link **links; 240 int kind; 241 242 *total = 0; 243 tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); 244 if (!tlinks) 245 return ERR_PTR(-ENOMEM); 246 247 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 248 tlinks[kind].nr_links = tr->progs_cnt[kind]; 249 *total += tr->progs_cnt[kind]; 250 links = tlinks[kind].links; 251 252 hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { 253 *ip_arg |= link->link.prog->call_get_func_ip; 254 *links++ = link; 255 } 256 } 257 return tlinks; 258 } 259 260 static void bpf_tramp_image_free(struct bpf_tramp_image *im) 261 { 262 bpf_image_ksym_del(&im->ksym); 263 arch_free_bpf_trampoline(im->image, im->size); 264 bpf_jit_uncharge_modmem(im->size); 265 percpu_ref_exit(&im->pcref); 266 kfree_rcu(im, rcu); 267 } 268 269 static void __bpf_tramp_image_put_deferred(struct work_struct *work) 270 { 271 struct bpf_tramp_image *im; 272 273 im = container_of(work, struct bpf_tramp_image, work); 274 bpf_tramp_image_free(im); 275 } 276 277 /* callback, fexit step 3 or fentry step 2 */ 278 static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu) 279 { 280 struct bpf_tramp_image *im; 281 282 im = container_of(rcu, struct bpf_tramp_image, rcu); 283 INIT_WORK(&im->work, __bpf_tramp_image_put_deferred); 284 schedule_work(&im->work); 285 } 286 287 /* callback, fexit step 2. Called after percpu_ref_kill confirms. */ 288 static void __bpf_tramp_image_release(struct percpu_ref *pcref) 289 { 290 struct bpf_tramp_image *im; 291 292 im = container_of(pcref, struct bpf_tramp_image, pcref); 293 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); 294 } 295 296 /* callback, fexit or fentry step 1 */ 297 static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu) 298 { 299 struct bpf_tramp_image *im; 300 301 im = container_of(rcu, struct bpf_tramp_image, rcu); 302 if (im->ip_after_call) 303 /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */ 304 percpu_ref_kill(&im->pcref); 305 else 306 /* the case of fentry trampoline */ 307 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); 308 } 309 310 static void bpf_tramp_image_put(struct bpf_tramp_image *im) 311 { 312 /* The trampoline image that calls original function is using: 313 * rcu_read_lock_trace to protect sleepable bpf progs 314 * rcu_read_lock to protect normal bpf progs 315 * percpu_ref to protect trampoline itself 316 * rcu tasks to protect trampoline asm not covered by percpu_ref 317 * (which are few asm insns before __bpf_tramp_enter and 318 * after __bpf_tramp_exit) 319 * 320 * The trampoline is unreachable before bpf_tramp_image_put(). 321 * 322 * First, patch the trampoline to avoid calling into fexit progs. 323 * The progs will be freed even if the original function is still 324 * executing or sleeping. 325 * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on 326 * first few asm instructions to execute and call into 327 * __bpf_tramp_enter->percpu_ref_get. 328 * Then use percpu_ref_kill to wait for the trampoline and the original 329 * function to finish. 330 * Then use call_rcu_tasks() to make sure few asm insns in 331 * the trampoline epilogue are done as well. 332 * 333 * In !PREEMPT case the task that got interrupted in the first asm 334 * insns won't go through an RCU quiescent state which the 335 * percpu_ref_kill will be waiting for. Hence the first 336 * call_rcu_tasks() is not necessary. 337 */ 338 if (im->ip_after_call) { 339 int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP, 340 NULL, im->ip_epilogue); 341 WARN_ON(err); 342 if (IS_ENABLED(CONFIG_TASKS_RCU)) 343 call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks); 344 else 345 percpu_ref_kill(&im->pcref); 346 return; 347 } 348 349 /* The trampoline without fexit and fmod_ret progs doesn't call original 350 * function and doesn't use percpu_ref. 351 * Use call_rcu_tasks_trace() to wait for sleepable progs to finish. 352 * Then use call_rcu_tasks() to wait for the rest of trampoline asm 353 * and normal progs. 354 */ 355 call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); 356 } 357 358 static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size) 359 { 360 struct bpf_tramp_image *im; 361 struct bpf_ksym *ksym; 362 void *image; 363 int err = -ENOMEM; 364 365 im = kzalloc(sizeof(*im), GFP_KERNEL); 366 if (!im) 367 goto out; 368 369 err = bpf_jit_charge_modmem(size); 370 if (err) 371 goto out_free_im; 372 im->size = size; 373 374 err = -ENOMEM; 375 im->image = image = arch_alloc_bpf_trampoline(size); 376 if (!image) 377 goto out_uncharge; 378 379 err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); 380 if (err) 381 goto out_free_image; 382 383 ksym = &im->ksym; 384 INIT_LIST_HEAD_RCU(&ksym->lnode); 385 snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key); 386 bpf_image_ksym_init(image, size, ksym); 387 bpf_image_ksym_add(ksym); 388 return im; 389 390 out_free_image: 391 arch_free_bpf_trampoline(im->image, im->size); 392 out_uncharge: 393 bpf_jit_uncharge_modmem(size); 394 out_free_im: 395 kfree(im); 396 out: 397 return ERR_PTR(err); 398 } 399 400 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex) 401 { 402 struct bpf_tramp_image *im; 403 struct bpf_tramp_links *tlinks; 404 u32 orig_flags = tr->flags; 405 bool ip_arg = false; 406 int err, total, size; 407 408 tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg); 409 if (IS_ERR(tlinks)) 410 return PTR_ERR(tlinks); 411 412 if (total == 0) { 413 err = unregister_fentry(tr, tr->cur_image->image); 414 bpf_tramp_image_put(tr->cur_image); 415 tr->cur_image = NULL; 416 goto out; 417 } 418 419 /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ 420 tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); 421 422 if (tlinks[BPF_TRAMP_FEXIT].nr_links || 423 tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { 424 /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME 425 * should not be set together. 426 */ 427 tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; 428 } else { 429 tr->flags |= BPF_TRAMP_F_RESTORE_REGS; 430 } 431 432 if (ip_arg) 433 tr->flags |= BPF_TRAMP_F_IP_ARG; 434 435 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 436 again: 437 if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && 438 (tr->flags & BPF_TRAMP_F_CALL_ORIG)) 439 tr->flags |= BPF_TRAMP_F_ORIG_STACK; 440 #endif 441 442 size = arch_bpf_trampoline_size(&tr->func.model, tr->flags, 443 tlinks, tr->func.addr); 444 if (size < 0) { 445 err = size; 446 goto out; 447 } 448 449 if (size > PAGE_SIZE) { 450 err = -E2BIG; 451 goto out; 452 } 453 454 im = bpf_tramp_image_alloc(tr->key, size); 455 if (IS_ERR(im)) { 456 err = PTR_ERR(im); 457 goto out; 458 } 459 460 err = arch_prepare_bpf_trampoline(im, im->image, im->image + size, 461 &tr->func.model, tr->flags, tlinks, 462 tr->func.addr); 463 if (err < 0) 464 goto out_free; 465 466 err = arch_protect_bpf_trampoline(im->image, im->size); 467 if (err) 468 goto out_free; 469 470 WARN_ON(tr->cur_image && total == 0); 471 if (tr->cur_image) 472 /* progs already running at this address */ 473 err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); 474 else 475 /* first time registering */ 476 err = register_fentry(tr, im->image); 477 478 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 479 if (err == -EAGAIN) { 480 /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now 481 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the 482 * trampoline again, and retry register. 483 */ 484 bpf_tramp_image_free(im); 485 goto again; 486 } 487 #endif 488 if (err) 489 goto out_free; 490 491 if (tr->cur_image) 492 bpf_tramp_image_put(tr->cur_image); 493 tr->cur_image = im; 494 out: 495 /* If any error happens, restore previous flags */ 496 if (err) 497 tr->flags = orig_flags; 498 kfree(tlinks); 499 return err; 500 501 out_free: 502 bpf_tramp_image_free(im); 503 goto out; 504 } 505 506 static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) 507 { 508 switch (prog->expected_attach_type) { 509 case BPF_TRACE_FENTRY: 510 return BPF_TRAMP_FENTRY; 511 case BPF_MODIFY_RETURN: 512 return BPF_TRAMP_MODIFY_RETURN; 513 case BPF_TRACE_FEXIT: 514 return BPF_TRAMP_FEXIT; 515 case BPF_LSM_MAC: 516 if (!prog->aux->attach_func_proto->type) 517 /* The function returns void, we cannot modify its 518 * return value. 519 */ 520 return BPF_TRAMP_FEXIT; 521 else 522 return BPF_TRAMP_MODIFY_RETURN; 523 default: 524 return BPF_TRAMP_REPLACE; 525 } 526 } 527 528 static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) 529 { 530 struct bpf_prog_aux *aux = tgt_prog->aux; 531 532 guard(mutex)(&aux->ext_mutex); 533 if (aux->prog_array_member_cnt) 534 /* Program extensions can not extend target prog when the target 535 * prog has been updated to any prog_array map as tail callee. 536 * It's to prevent a potential infinite loop like: 537 * tgt prog entry -> tgt prog subprog -> freplace prog entry 538 * --tailcall-> tgt prog entry. 539 */ 540 return -EBUSY; 541 542 aux->is_extended = true; 543 return 0; 544 } 545 546 static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, 547 struct bpf_trampoline *tr, 548 struct bpf_prog *tgt_prog) 549 { 550 enum bpf_tramp_prog_type kind; 551 struct bpf_tramp_link *link_exiting; 552 int err = 0; 553 int cnt = 0, i; 554 555 kind = bpf_attach_type_to_tramp(link->link.prog); 556 if (tr->extension_prog) 557 /* cannot attach fentry/fexit if extension prog is attached. 558 * cannot overwrite extension prog either. 559 */ 560 return -EBUSY; 561 562 for (i = 0; i < BPF_TRAMP_MAX; i++) 563 cnt += tr->progs_cnt[i]; 564 565 if (kind == BPF_TRAMP_REPLACE) { 566 /* Cannot attach extension if fentry/fexit are in use. */ 567 if (cnt) 568 return -EBUSY; 569 err = bpf_freplace_check_tgt_prog(tgt_prog); 570 if (err) 571 return err; 572 tr->extension_prog = link->link.prog; 573 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, 574 link->link.prog->bpf_func); 575 } 576 if (cnt >= BPF_MAX_TRAMP_LINKS) 577 return -E2BIG; 578 if (!hlist_unhashed(&link->tramp_hlist)) 579 /* prog already linked */ 580 return -EBUSY; 581 hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { 582 if (link_exiting->link.prog != link->link.prog) 583 continue; 584 /* prog already linked */ 585 return -EBUSY; 586 } 587 588 hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); 589 tr->progs_cnt[kind]++; 590 err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); 591 if (err) { 592 hlist_del_init(&link->tramp_hlist); 593 tr->progs_cnt[kind]--; 594 } 595 return err; 596 } 597 598 int bpf_trampoline_link_prog(struct bpf_tramp_link *link, 599 struct bpf_trampoline *tr, 600 struct bpf_prog *tgt_prog) 601 { 602 int err; 603 604 mutex_lock(&tr->mutex); 605 err = __bpf_trampoline_link_prog(link, tr, tgt_prog); 606 mutex_unlock(&tr->mutex); 607 return err; 608 } 609 610 static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, 611 struct bpf_trampoline *tr, 612 struct bpf_prog *tgt_prog) 613 { 614 enum bpf_tramp_prog_type kind; 615 int err; 616 617 kind = bpf_attach_type_to_tramp(link->link.prog); 618 if (kind == BPF_TRAMP_REPLACE) { 619 WARN_ON_ONCE(!tr->extension_prog); 620 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, 621 tr->extension_prog->bpf_func, NULL); 622 tr->extension_prog = NULL; 623 guard(mutex)(&tgt_prog->aux->ext_mutex); 624 tgt_prog->aux->is_extended = false; 625 return err; 626 } 627 hlist_del_init(&link->tramp_hlist); 628 tr->progs_cnt[kind]--; 629 return bpf_trampoline_update(tr, true /* lock_direct_mutex */); 630 } 631 632 /* bpf_trampoline_unlink_prog() should never fail. */ 633 int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, 634 struct bpf_trampoline *tr, 635 struct bpf_prog *tgt_prog) 636 { 637 int err; 638 639 mutex_lock(&tr->mutex); 640 err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog); 641 mutex_unlock(&tr->mutex); 642 return err; 643 } 644 645 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) 646 static void bpf_shim_tramp_link_release(struct bpf_link *link) 647 { 648 struct bpf_shim_tramp_link *shim_link = 649 container_of(link, struct bpf_shim_tramp_link, link.link); 650 651 /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */ 652 if (!shim_link->trampoline) 653 return; 654 655 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); 656 bpf_trampoline_put(shim_link->trampoline); 657 } 658 659 static void bpf_shim_tramp_link_dealloc(struct bpf_link *link) 660 { 661 struct bpf_shim_tramp_link *shim_link = 662 container_of(link, struct bpf_shim_tramp_link, link.link); 663 664 kfree(shim_link); 665 } 666 667 static const struct bpf_link_ops bpf_shim_tramp_link_lops = { 668 .release = bpf_shim_tramp_link_release, 669 .dealloc = bpf_shim_tramp_link_dealloc, 670 }; 671 672 static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog, 673 bpf_func_t bpf_func, 674 int cgroup_atype, 675 enum bpf_attach_type attach_type) 676 { 677 struct bpf_shim_tramp_link *shim_link = NULL; 678 struct bpf_prog *p; 679 680 shim_link = kzalloc(sizeof(*shim_link), GFP_USER); 681 if (!shim_link) 682 return NULL; 683 684 p = bpf_prog_alloc(1, 0); 685 if (!p) { 686 kfree(shim_link); 687 return NULL; 688 } 689 690 p->jited = false; 691 p->bpf_func = bpf_func; 692 693 p->aux->cgroup_atype = cgroup_atype; 694 p->aux->attach_func_proto = prog->aux->attach_func_proto; 695 p->aux->attach_btf_id = prog->aux->attach_btf_id; 696 p->aux->attach_btf = prog->aux->attach_btf; 697 btf_get(p->aux->attach_btf); 698 p->type = BPF_PROG_TYPE_LSM; 699 p->expected_attach_type = BPF_LSM_MAC; 700 bpf_prog_inc(p); 701 bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC, 702 &bpf_shim_tramp_link_lops, p, attach_type); 703 bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype); 704 705 return shim_link; 706 } 707 708 static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr, 709 bpf_func_t bpf_func) 710 { 711 struct bpf_tramp_link *link; 712 int kind; 713 714 for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 715 hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { 716 struct bpf_prog *p = link->link.prog; 717 718 if (p->bpf_func == bpf_func) 719 return container_of(link, struct bpf_shim_tramp_link, link); 720 } 721 } 722 723 return NULL; 724 } 725 726 int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, 727 int cgroup_atype, 728 enum bpf_attach_type attach_type) 729 { 730 struct bpf_shim_tramp_link *shim_link = NULL; 731 struct bpf_attach_target_info tgt_info = {}; 732 struct bpf_trampoline *tr; 733 bpf_func_t bpf_func; 734 u64 key; 735 int err; 736 737 err = bpf_check_attach_target(NULL, prog, NULL, 738 prog->aux->attach_btf_id, 739 &tgt_info); 740 if (err) 741 return err; 742 743 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, 744 prog->aux->attach_btf_id); 745 746 bpf_lsm_find_cgroup_shim(prog, &bpf_func); 747 tr = bpf_trampoline_get(key, &tgt_info); 748 if (!tr) 749 return -ENOMEM; 750 751 mutex_lock(&tr->mutex); 752 753 shim_link = cgroup_shim_find(tr, bpf_func); 754 if (shim_link) { 755 /* Reusing existing shim attached by the other program. */ 756 bpf_link_inc(&shim_link->link.link); 757 758 mutex_unlock(&tr->mutex); 759 bpf_trampoline_put(tr); /* bpf_trampoline_get above */ 760 return 0; 761 } 762 763 /* Allocate and install new shim. */ 764 765 shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype, attach_type); 766 if (!shim_link) { 767 err = -ENOMEM; 768 goto err; 769 } 770 771 err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL); 772 if (err) 773 goto err; 774 775 shim_link->trampoline = tr; 776 /* note, we're still holding tr refcnt from above */ 777 778 mutex_unlock(&tr->mutex); 779 780 return 0; 781 err: 782 mutex_unlock(&tr->mutex); 783 784 if (shim_link) 785 bpf_link_put(&shim_link->link.link); 786 787 /* have to release tr while _not_ holding its mutex */ 788 bpf_trampoline_put(tr); /* bpf_trampoline_get above */ 789 790 return err; 791 } 792 793 void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) 794 { 795 struct bpf_shim_tramp_link *shim_link = NULL; 796 struct bpf_trampoline *tr; 797 bpf_func_t bpf_func; 798 u64 key; 799 800 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, 801 prog->aux->attach_btf_id); 802 803 bpf_lsm_find_cgroup_shim(prog, &bpf_func); 804 tr = bpf_trampoline_lookup(key); 805 if (WARN_ON_ONCE(!tr)) 806 return; 807 808 mutex_lock(&tr->mutex); 809 shim_link = cgroup_shim_find(tr, bpf_func); 810 mutex_unlock(&tr->mutex); 811 812 if (shim_link) 813 bpf_link_put(&shim_link->link.link); 814 815 bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */ 816 } 817 #endif 818 819 struct bpf_trampoline *bpf_trampoline_get(u64 key, 820 struct bpf_attach_target_info *tgt_info) 821 { 822 struct bpf_trampoline *tr; 823 824 tr = bpf_trampoline_lookup(key); 825 if (!tr) 826 return NULL; 827 828 mutex_lock(&tr->mutex); 829 if (tr->func.addr) 830 goto out; 831 832 memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel)); 833 tr->func.addr = (void *)tgt_info->tgt_addr; 834 out: 835 mutex_unlock(&tr->mutex); 836 return tr; 837 } 838 839 void bpf_trampoline_put(struct bpf_trampoline *tr) 840 { 841 int i; 842 843 if (!tr) 844 return; 845 mutex_lock(&trampoline_mutex); 846 if (!refcount_dec_and_test(&tr->refcnt)) 847 goto out; 848 WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); 849 850 for (i = 0; i < BPF_TRAMP_MAX; i++) 851 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) 852 goto out; 853 854 /* This code will be executed even when the last bpf_tramp_image 855 * is alive. All progs are detached from the trampoline and the 856 * trampoline image is patched with jmp into epilogue to skip 857 * fexit progs. The fentry-only trampoline will be freed via 858 * multiple rcu callbacks. 859 */ 860 hlist_del(&tr->hlist); 861 if (tr->fops) { 862 ftrace_free_filter(tr->fops); 863 kfree(tr->fops); 864 } 865 kfree(tr); 866 out: 867 mutex_unlock(&trampoline_mutex); 868 } 869 870 #define NO_START_TIME 1 871 static __always_inline u64 notrace bpf_prog_start_time(void) 872 { 873 u64 start = NO_START_TIME; 874 875 if (static_branch_unlikely(&bpf_stats_enabled_key)) { 876 start = sched_clock(); 877 if (unlikely(!start)) 878 start = NO_START_TIME; 879 } 880 return start; 881 } 882 883 /* The logic is similar to bpf_prog_run(), but with an explicit 884 * rcu_read_lock() and migrate_disable() which are required 885 * for the trampoline. The macro is split into 886 * call __bpf_prog_enter 887 * call prog->bpf_func 888 * call __bpf_prog_exit 889 * 890 * __bpf_prog_enter returns: 891 * 0 - skip execution of the bpf prog 892 * 1 - execute bpf prog 893 * [2..MAX_U64] - execute bpf prog and record execution time. 894 * This is start time. 895 */ 896 static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) 897 __acquires(RCU) 898 { 899 rcu_read_lock_dont_migrate(); 900 901 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 902 903 if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { 904 bpf_prog_inc_misses_counter(prog); 905 if (prog->aux->recursion_detected) 906 prog->aux->recursion_detected(prog); 907 return 0; 908 } 909 return bpf_prog_start_time(); 910 } 911 912 static void notrace __update_prog_stats(struct bpf_prog *prog, u64 start) 913 { 914 struct bpf_prog_stats *stats; 915 unsigned long flags; 916 u64 duration; 917 918 /* 919 * static_key could be enabled in __bpf_prog_enter* and disabled in 920 * __bpf_prog_exit*. And vice versa. Check that 'start' is valid. 921 */ 922 if (start <= NO_START_TIME) 923 return; 924 925 duration = sched_clock() - start; 926 stats = this_cpu_ptr(prog->stats); 927 flags = u64_stats_update_begin_irqsave(&stats->syncp); 928 u64_stats_inc(&stats->cnt); 929 u64_stats_add(&stats->nsecs, duration); 930 u64_stats_update_end_irqrestore(&stats->syncp, flags); 931 } 932 933 static __always_inline void notrace update_prog_stats(struct bpf_prog *prog, 934 u64 start) 935 { 936 if (static_branch_unlikely(&bpf_stats_enabled_key)) 937 __update_prog_stats(prog, start); 938 } 939 940 static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, 941 struct bpf_tramp_run_ctx *run_ctx) 942 __releases(RCU) 943 { 944 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 945 946 update_prog_stats(prog, start); 947 this_cpu_dec(*(prog->active)); 948 rcu_read_unlock_migrate(); 949 } 950 951 static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, 952 struct bpf_tramp_run_ctx *run_ctx) 953 __acquires(RCU) 954 { 955 /* Runtime stats are exported via actual BPF_LSM_CGROUP 956 * programs, not the shims. 957 */ 958 rcu_read_lock_dont_migrate(); 959 960 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 961 962 return NO_START_TIME; 963 } 964 965 static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, 966 struct bpf_tramp_run_ctx *run_ctx) 967 __releases(RCU) 968 { 969 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 970 971 rcu_read_unlock_migrate(); 972 } 973 974 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, 975 struct bpf_tramp_run_ctx *run_ctx) 976 { 977 rcu_read_lock_trace(); 978 migrate_disable(); 979 might_fault(); 980 981 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 982 983 if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { 984 bpf_prog_inc_misses_counter(prog); 985 if (prog->aux->recursion_detected) 986 prog->aux->recursion_detected(prog); 987 return 0; 988 } 989 return bpf_prog_start_time(); 990 } 991 992 void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, 993 struct bpf_tramp_run_ctx *run_ctx) 994 { 995 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 996 997 update_prog_stats(prog, start); 998 this_cpu_dec(*(prog->active)); 999 migrate_enable(); 1000 rcu_read_unlock_trace(); 1001 } 1002 1003 static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, 1004 struct bpf_tramp_run_ctx *run_ctx) 1005 { 1006 rcu_read_lock_trace(); 1007 migrate_disable(); 1008 might_fault(); 1009 1010 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1011 1012 return bpf_prog_start_time(); 1013 } 1014 1015 static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, 1016 struct bpf_tramp_run_ctx *run_ctx) 1017 { 1018 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1019 1020 update_prog_stats(prog, start); 1021 migrate_enable(); 1022 rcu_read_unlock_trace(); 1023 } 1024 1025 static u64 notrace __bpf_prog_enter(struct bpf_prog *prog, 1026 struct bpf_tramp_run_ctx *run_ctx) 1027 __acquires(RCU) 1028 { 1029 rcu_read_lock_dont_migrate(); 1030 1031 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 1032 1033 return bpf_prog_start_time(); 1034 } 1035 1036 static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, 1037 struct bpf_tramp_run_ctx *run_ctx) 1038 __releases(RCU) 1039 { 1040 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 1041 1042 update_prog_stats(prog, start); 1043 rcu_read_unlock_migrate(); 1044 } 1045 1046 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) 1047 { 1048 percpu_ref_get(&tr->pcref); 1049 } 1050 1051 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) 1052 { 1053 percpu_ref_put(&tr->pcref); 1054 } 1055 1056 bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog) 1057 { 1058 bool sleepable = prog->sleepable; 1059 1060 if (bpf_prog_check_recur(prog)) 1061 return sleepable ? __bpf_prog_enter_sleepable_recur : 1062 __bpf_prog_enter_recur; 1063 1064 if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && 1065 prog->expected_attach_type == BPF_LSM_CGROUP) 1066 return __bpf_prog_enter_lsm_cgroup; 1067 1068 return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter; 1069 } 1070 1071 bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog) 1072 { 1073 bool sleepable = prog->sleepable; 1074 1075 if (bpf_prog_check_recur(prog)) 1076 return sleepable ? __bpf_prog_exit_sleepable_recur : 1077 __bpf_prog_exit_recur; 1078 1079 if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && 1080 prog->expected_attach_type == BPF_LSM_CGROUP) 1081 return __bpf_prog_exit_lsm_cgroup; 1082 1083 return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit; 1084 } 1085 1086 int __weak 1087 arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, 1088 const struct btf_func_model *m, u32 flags, 1089 struct bpf_tramp_links *tlinks, 1090 void *func_addr) 1091 { 1092 return -ENOTSUPP; 1093 } 1094 1095 void * __weak arch_alloc_bpf_trampoline(unsigned int size) 1096 { 1097 void *image; 1098 1099 if (WARN_ON_ONCE(size > PAGE_SIZE)) 1100 return NULL; 1101 image = bpf_jit_alloc_exec(PAGE_SIZE); 1102 if (image) 1103 set_vm_flush_reset_perms(image); 1104 return image; 1105 } 1106 1107 void __weak arch_free_bpf_trampoline(void *image, unsigned int size) 1108 { 1109 WARN_ON_ONCE(size > PAGE_SIZE); 1110 /* bpf_jit_free_exec doesn't need "size", but 1111 * bpf_prog_pack_free() needs it. 1112 */ 1113 bpf_jit_free_exec(image); 1114 } 1115 1116 int __weak arch_protect_bpf_trampoline(void *image, unsigned int size) 1117 { 1118 WARN_ON_ONCE(size > PAGE_SIZE); 1119 return set_memory_rox((long)image, 1); 1120 } 1121 1122 int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 1123 struct bpf_tramp_links *tlinks, void *func_addr) 1124 { 1125 return -ENOTSUPP; 1126 } 1127 1128 static int __init init_trampolines(void) 1129 { 1130 int i; 1131 1132 for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) 1133 INIT_HLIST_HEAD(&trampoline_table[i]); 1134 return 0; 1135 } 1136 late_initcall(init_trampolines); 1137