1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <linux/kernel.h> 17 #include <linux/memory.h> 18 #include <linux/bpf.h> 19 20 #include <asm/kprobes.h> 21 #include <asm/text-patching.h> 22 23 #include "bpf_jit.h" 24 25 /* These offsets are from bpf prog end and stay the same across progs */ 26 static int bpf_jit_ool_stub, bpf_jit_long_branch_stub; 27 28 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 29 { 30 memset32(area, BREAKPOINT_INSTRUCTION, size / 4); 31 } 32 33 void dummy_tramp(void); 34 35 asm ( 36 " .pushsection .text, \"ax\", @progbits ;" 37 " .global dummy_tramp ;" 38 " .type dummy_tramp, @function ;" 39 "dummy_tramp: ;" 40 #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE 41 " blr ;" 42 #else 43 /* LR is always in r11, so we don't need a 'mflr r11' here */ 44 " mtctr 11 ;" 45 " mtlr 0 ;" 46 " bctr ;" 47 #endif 48 " .size dummy_tramp, .-dummy_tramp ;" 49 " .popsection ;" 50 ); 51 52 void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx) 53 { 54 int ool_stub_idx, long_branch_stub_idx; 55 56 /* 57 * Out-of-line stub: 58 * mflr r0 59 * [b|bl] tramp 60 * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE 61 * b bpf_func + 4 62 */ 63 ool_stub_idx = ctx->idx; 64 EMIT(PPC_RAW_MFLR(_R0)); 65 EMIT(PPC_RAW_NOP()); 66 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 67 EMIT(PPC_RAW_MTLR(_R0)); 68 WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4)); 69 EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4)); 70 71 /* 72 * Long branch stub: 73 * .long <dummy_tramp_addr> 74 * mflr r11 75 * bcl 20,31,$+4 76 * mflr r12 77 * ld r12, -8-SZL(r12) 78 * mtctr r12 79 * mtlr r11 // needed to retain ftrace ABI 80 * bctr 81 */ 82 if (image) 83 *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp; 84 ctx->idx += SZL / 4; 85 long_branch_stub_idx = ctx->idx; 86 EMIT(PPC_RAW_MFLR(_R11)); 87 EMIT(PPC_RAW_BCL4()); 88 EMIT(PPC_RAW_MFLR(_R12)); 89 EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL)); 90 EMIT(PPC_RAW_MTCTR(_R12)); 91 EMIT(PPC_RAW_MTLR(_R11)); 92 EMIT(PPC_RAW_BCTR()); 93 94 if (!bpf_jit_ool_stub) { 95 bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4; 96 bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4; 97 } 98 } 99 100 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) 101 { 102 if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { 103 PPC_JMP(exit_addr); 104 } else if (ctx->alt_exit_addr) { 105 if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) 106 return -1; 107 PPC_JMP(ctx->alt_exit_addr); 108 } else { 109 ctx->alt_exit_addr = ctx->idx * 4; 110 bpf_jit_build_epilogue(image, ctx); 111 } 112 113 return 0; 114 } 115 116 struct powerpc_jit_data { 117 /* address of rw header */ 118 struct bpf_binary_header *hdr; 119 /* address of ro final header */ 120 struct bpf_binary_header *fhdr; 121 u32 *addrs; 122 u8 *fimage; 123 u32 proglen; 124 struct codegen_context ctx; 125 }; 126 127 bool bpf_jit_needs_zext(void) 128 { 129 return true; 130 } 131 132 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 133 { 134 u32 proglen; 135 u32 alloclen; 136 u8 *image = NULL; 137 u32 *code_base; 138 u32 *addrs; 139 struct powerpc_jit_data *jit_data; 140 struct codegen_context cgctx; 141 int pass; 142 int flen; 143 struct bpf_binary_header *fhdr = NULL; 144 struct bpf_binary_header *hdr = NULL; 145 struct bpf_prog *org_fp = fp; 146 struct bpf_prog *tmp_fp; 147 bool bpf_blinded = false; 148 bool extra_pass = false; 149 u8 *fimage = NULL; 150 u32 *fcode_base; 151 u32 extable_len; 152 u32 fixup_len; 153 154 if (!fp->jit_requested) 155 return org_fp; 156 157 tmp_fp = bpf_jit_blind_constants(org_fp); 158 if (IS_ERR(tmp_fp)) 159 return org_fp; 160 161 if (tmp_fp != org_fp) { 162 bpf_blinded = true; 163 fp = tmp_fp; 164 } 165 166 jit_data = fp->aux->jit_data; 167 if (!jit_data) { 168 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 169 if (!jit_data) { 170 fp = org_fp; 171 goto out; 172 } 173 fp->aux->jit_data = jit_data; 174 } 175 176 flen = fp->len; 177 addrs = jit_data->addrs; 178 if (addrs) { 179 cgctx = jit_data->ctx; 180 /* 181 * JIT compiled to a writable location (image/code_base) first. 182 * It is then moved to the readonly final location (fimage/fcode_base) 183 * using instruction patching. 184 */ 185 fimage = jit_data->fimage; 186 fhdr = jit_data->fhdr; 187 proglen = jit_data->proglen; 188 hdr = jit_data->hdr; 189 image = (void *)hdr + ((void *)fimage - (void *)fhdr); 190 extra_pass = true; 191 /* During extra pass, ensure index is reset before repopulating extable entries */ 192 cgctx.exentry_idx = 0; 193 goto skip_init_ctx; 194 } 195 196 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 197 if (addrs == NULL) { 198 fp = org_fp; 199 goto out_addrs; 200 } 201 202 memset(&cgctx, 0, sizeof(struct codegen_context)); 203 bpf_jit_init_reg_mapping(&cgctx); 204 205 /* Make sure that the stack is quadword aligned. */ 206 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 207 208 /* Scouting faux-generate pass 0 */ 209 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { 210 /* We hit something illegal or unsupported. */ 211 fp = org_fp; 212 goto out_addrs; 213 } 214 215 /* 216 * If we have seen a tail call, we need a second pass. 217 * This is because bpf_jit_emit_common_epilogue() is called 218 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. 219 * We also need a second pass if we ended up with too large 220 * a program so as to ensure BPF_EXIT branches are in range. 221 */ 222 if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { 223 cgctx.idx = 0; 224 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { 225 fp = org_fp; 226 goto out_addrs; 227 } 228 } 229 230 bpf_jit_realloc_regs(&cgctx); 231 /* 232 * Pretend to build prologue, given the features we've seen. This will 233 * update ctgtx.idx as it pretends to output instructions, then we can 234 * calculate total size from idx. 235 */ 236 bpf_jit_build_prologue(NULL, &cgctx); 237 addrs[fp->len] = cgctx.idx * 4; 238 bpf_jit_build_epilogue(NULL, &cgctx); 239 240 fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; 241 extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); 242 243 proglen = cgctx.idx * 4; 244 alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; 245 246 fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image, 247 bpf_jit_fill_ill_insns); 248 if (!fhdr) { 249 fp = org_fp; 250 goto out_addrs; 251 } 252 253 if (extable_len) 254 fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len; 255 256 skip_init_ctx: 257 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 258 fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE); 259 260 /* Code generation passes 1-2 */ 261 for (pass = 1; pass < 3; pass++) { 262 /* Now build the prologue, body code & epilogue for real. */ 263 cgctx.idx = 0; 264 cgctx.alt_exit_addr = 0; 265 bpf_jit_build_prologue(code_base, &cgctx); 266 if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, 267 extra_pass)) { 268 bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size)); 269 bpf_jit_binary_pack_free(fhdr, hdr); 270 fp = org_fp; 271 goto out_addrs; 272 } 273 bpf_jit_build_epilogue(code_base, &cgctx); 274 275 if (bpf_jit_enable > 1) 276 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 277 proglen - (cgctx.idx * 4), cgctx.seen); 278 } 279 280 if (bpf_jit_enable > 1) 281 /* 282 * Note that we output the base address of the code_base 283 * rather than image, since opcodes are in code_base. 284 */ 285 bpf_jit_dump(flen, proglen, pass, code_base); 286 287 #ifdef CONFIG_PPC64_ELF_ABI_V1 288 /* Function descriptor nastiness: Address + TOC */ 289 ((u64 *)image)[0] = (u64)fcode_base; 290 ((u64 *)image)[1] = local_paca->kernel_toc; 291 #endif 292 293 fp->bpf_func = (void *)fimage; 294 fp->jited = 1; 295 fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; 296 297 if (!fp->is_func || extra_pass) { 298 if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { 299 fp = org_fp; 300 goto out_addrs; 301 } 302 bpf_prog_fill_jited_linfo(fp, addrs); 303 out_addrs: 304 kfree(addrs); 305 kfree(jit_data); 306 fp->aux->jit_data = NULL; 307 } else { 308 jit_data->addrs = addrs; 309 jit_data->ctx = cgctx; 310 jit_data->proglen = proglen; 311 jit_data->fimage = fimage; 312 jit_data->fhdr = fhdr; 313 jit_data->hdr = hdr; 314 } 315 316 out: 317 if (bpf_blinded) 318 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 319 320 return fp; 321 } 322 323 /* 324 * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling 325 * this function, as this only applies to BPF_PROBE_MEM, for now. 326 */ 327 int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, 328 struct codegen_context *ctx, int insn_idx, int jmp_off, 329 int dst_reg) 330 { 331 off_t offset; 332 unsigned long pc; 333 struct exception_table_entry *ex, *ex_entry; 334 u32 *fixup; 335 336 /* Populate extable entries only in the last pass */ 337 if (pass != 2) 338 return 0; 339 340 if (!fp->aux->extable || 341 WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) 342 return -EINVAL; 343 344 /* 345 * Program is first written to image before copying to the 346 * final location (fimage). Accordingly, update in the image first. 347 * As all offsets used are relative, copying as is to the 348 * final location should be alright. 349 */ 350 pc = (unsigned long)&image[insn_idx]; 351 ex = (void *)fp->aux->extable - (void *)fimage + (void *)image; 352 353 fixup = (void *)ex - 354 (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + 355 (ctx->exentry_idx * BPF_FIXUP_LEN * 4); 356 357 fixup[0] = PPC_RAW_LI(dst_reg, 0); 358 if (IS_ENABLED(CONFIG_PPC32)) 359 fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ 360 361 fixup[BPF_FIXUP_LEN - 1] = 362 PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); 363 364 ex_entry = &ex[ctx->exentry_idx]; 365 366 offset = pc - (long)&ex_entry->insn; 367 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 368 return -ERANGE; 369 ex_entry->insn = offset; 370 371 offset = (long)fixup - (long)&ex_entry->fixup; 372 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 373 return -ERANGE; 374 ex_entry->fixup = offset; 375 376 ctx->exentry_idx++; 377 return 0; 378 } 379 380 void *bpf_arch_text_copy(void *dst, void *src, size_t len) 381 { 382 int err; 383 384 if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) 385 return ERR_PTR(-EINVAL); 386 387 mutex_lock(&text_mutex); 388 err = patch_instructions(dst, src, len, false); 389 mutex_unlock(&text_mutex); 390 391 return err ? ERR_PTR(err) : dst; 392 } 393 394 int bpf_arch_text_invalidate(void *dst, size_t len) 395 { 396 u32 insn = BREAKPOINT_INSTRUCTION; 397 int ret; 398 399 if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) 400 return -EINVAL; 401 402 mutex_lock(&text_mutex); 403 ret = patch_instructions(dst, &insn, len, true); 404 mutex_unlock(&text_mutex); 405 406 return ret; 407 } 408 409 void bpf_jit_free(struct bpf_prog *fp) 410 { 411 if (fp->jited) { 412 struct powerpc_jit_data *jit_data = fp->aux->jit_data; 413 struct bpf_binary_header *hdr; 414 415 /* 416 * If we fail the final pass of JIT (from jit_subprogs), 417 * the program may not be finalized yet. Call finalize here 418 * before freeing it. 419 */ 420 if (jit_data) { 421 bpf_jit_binary_pack_finalize(jit_data->fhdr, jit_data->hdr); 422 kvfree(jit_data->addrs); 423 kfree(jit_data); 424 } 425 hdr = bpf_jit_binary_pack_hdr(fp); 426 bpf_jit_binary_pack_free(hdr, NULL); 427 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); 428 } 429 430 bpf_prog_unlock_free(fp); 431 } 432 433 bool bpf_jit_supports_kfunc_call(void) 434 { 435 return true; 436 } 437 438 bool bpf_jit_supports_far_kfunc_call(void) 439 { 440 return IS_ENABLED(CONFIG_PPC64); 441 } 442 443 void *arch_alloc_bpf_trampoline(unsigned int size) 444 { 445 return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); 446 } 447 448 void arch_free_bpf_trampoline(void *image, unsigned int size) 449 { 450 bpf_prog_pack_free(image, size); 451 } 452 453 int arch_protect_bpf_trampoline(void *image, unsigned int size) 454 { 455 return 0; 456 } 457 458 static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, 459 struct bpf_tramp_link *l, int regs_off, int retval_off, 460 int run_ctx_off, bool save_ret) 461 { 462 struct bpf_prog *p = l->link.prog; 463 ppc_inst_t branch_insn; 464 u32 jmp_idx; 465 int ret = 0; 466 467 /* Save cookie */ 468 if (IS_ENABLED(CONFIG_PPC64)) { 469 PPC_LI64(_R3, l->cookie); 470 EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, 471 bpf_cookie))); 472 } else { 473 PPC_LI32(_R3, l->cookie >> 32); 474 PPC_LI32(_R4, l->cookie); 475 EMIT(PPC_RAW_STW(_R3, _R1, 476 run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); 477 EMIT(PPC_RAW_STW(_R4, _R1, 478 run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4)); 479 } 480 481 /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */ 482 PPC_LI_ADDR(_R3, p); 483 EMIT(PPC_RAW_MR(_R25, _R3)); 484 EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off)); 485 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 486 (unsigned long)bpf_trampoline_enter(p)); 487 if (ret) 488 return ret; 489 490 /* Remember prog start time returned by __bpf_prog_enter */ 491 EMIT(PPC_RAW_MR(_R26, _R3)); 492 493 /* 494 * if (__bpf_prog_enter(p) == 0) 495 * goto skip_exec_of_prog; 496 * 497 * Emit a nop to be later patched with conditional branch, once offset is known 498 */ 499 EMIT(PPC_RAW_CMPLI(_R3, 0)); 500 jmp_idx = ctx->idx; 501 EMIT(PPC_RAW_NOP()); 502 503 /* p->bpf_func(ctx) */ 504 EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); 505 if (!p->jited) 506 PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); 507 if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, 508 BRANCH_SET_LINK)) { 509 if (image) 510 image[ctx->idx] = ppc_inst_val(branch_insn); 511 ctx->idx++; 512 } else { 513 EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); 514 EMIT(PPC_RAW_MTCTR(_R12)); 515 EMIT(PPC_RAW_BCTRL()); 516 } 517 518 if (save_ret) 519 EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 520 521 /* Fix up branch */ 522 if (image) { 523 if (create_cond_branch(&branch_insn, &image[jmp_idx], 524 (unsigned long)&image[ctx->idx], COND_EQ << 16)) 525 return -EINVAL; 526 image[jmp_idx] = ppc_inst_val(branch_insn); 527 } 528 529 /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */ 530 EMIT(PPC_RAW_MR(_R3, _R25)); 531 EMIT(PPC_RAW_MR(_R4, _R26)); 532 EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off)); 533 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 534 (unsigned long)bpf_trampoline_exit(p)); 535 536 return ret; 537 } 538 539 static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, 540 struct bpf_tramp_links *tl, int regs_off, int retval_off, 541 int run_ctx_off, u32 *branches) 542 { 543 int i; 544 545 /* 546 * The first fmod_ret program will receive a garbage return value. 547 * Set this to 0 to avoid confusing the program. 548 */ 549 EMIT(PPC_RAW_LI(_R3, 0)); 550 EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 551 for (i = 0; i < tl->nr_links; i++) { 552 if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, 553 run_ctx_off, true)) 554 return -EINVAL; 555 556 /* 557 * mod_ret prog stored return value after prog ctx. Emit: 558 * if (*(u64 *)(ret_val) != 0) 559 * goto do_fexit; 560 */ 561 EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); 562 EMIT(PPC_RAW_CMPLI(_R3, 0)); 563 564 /* 565 * Save the location of the branch and generate a nop, which is 566 * replaced with a conditional jump once do_fexit (i.e. the 567 * start of the fexit invocation) is finalized. 568 */ 569 branches[i] = ctx->idx; 570 EMIT(PPC_RAW_NOP()); 571 } 572 573 return 0; 574 } 575 576 static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx, 577 int func_frame_offset, int r4_off) 578 { 579 if (IS_ENABLED(CONFIG_PPC64)) { 580 /* See bpf_jit_stack_tailcallcnt() */ 581 int tailcallcnt_offset = 6 * 8; 582 583 EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); 584 EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); 585 } else { 586 /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ 587 EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); 588 } 589 } 590 591 static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, 592 int func_frame_offset, int r4_off) 593 { 594 if (IS_ENABLED(CONFIG_PPC64)) { 595 /* See bpf_jit_stack_tailcallcnt() */ 596 int tailcallcnt_offset = 6 * 8; 597 598 EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); 599 EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); 600 } else { 601 /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ 602 EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 603 } 604 } 605 606 static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, 607 int nr_regs, int regs_off) 608 { 609 int param_save_area_offset; 610 611 param_save_area_offset = func_frame_offset; /* the two frames we alloted */ 612 param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 613 614 for (int i = 0; i < nr_regs; i++) { 615 if (i < 8) { 616 EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL)); 617 } else { 618 EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); 619 EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL)); 620 } 621 } 622 } 623 624 /* Used when restoring just the register parameters when returning back */ 625 static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx, 626 int nr_regs, int regs_off) 627 { 628 for (int i = 0; i < nr_regs && i < 8; i++) 629 EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL)); 630 } 631 632 /* Used when we call into the traced function. Replicate parameter save area */ 633 static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, 634 int func_frame_offset, int nr_regs, int regs_off) 635 { 636 int param_save_area_offset; 637 638 param_save_area_offset = func_frame_offset; /* the two frames we alloted */ 639 param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 640 641 for (int i = 8; i < nr_regs; i++) { 642 EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); 643 EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL)); 644 } 645 bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); 646 } 647 648 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, 649 void *rw_image_end, void *ro_image, 650 const struct btf_func_model *m, u32 flags, 651 struct bpf_tramp_links *tlinks, 652 void *func_addr) 653 { 654 int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; 655 int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; 656 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 657 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 658 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 659 struct codegen_context codegen_ctx, *ctx; 660 u32 *image = (u32 *)rw_image; 661 ppc_inst_t branch_insn; 662 u32 *branches = NULL; 663 bool save_ret; 664 665 if (IS_ENABLED(CONFIG_PPC32)) 666 return -EOPNOTSUPP; 667 668 nr_regs = m->nr_args; 669 /* Extra registers for struct arguments */ 670 for (i = 0; i < m->nr_args; i++) 671 if (m->arg_size[i] > SZL) 672 nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1; 673 674 if (nr_regs > MAX_BPF_FUNC_ARGS) 675 return -EOPNOTSUPP; 676 677 ctx = &codegen_ctx; 678 memset(ctx, 0, sizeof(*ctx)); 679 680 /* 681 * Generated stack layout: 682 * 683 * func prev back chain [ back chain ] 684 * [ ] 685 * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) 686 * [ ] -- 687 * LR save area [ r0 save (64-bit) ] | header 688 * [ r0 save (32-bit) ] | 689 * dummy frame for unwind [ back chain 1 ] -- 690 * [ padding ] align stack frame 691 * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc 692 * alt_lr_off [ real lr (ool stub)] optional - actual lr 693 * [ r26 ] 694 * nvr_off [ r25 ] nvr save area 695 * retval_off [ return value ] 696 * [ reg argN ] 697 * [ ... ] 698 * regs_off [ reg_arg1 ] prog ctx context 699 * nregs_off [ args count ] 700 * ip_off [ traced function ] 701 * [ ... ] 702 * run_ctx_off [ bpf_tramp_run_ctx ] 703 * [ reg argN ] 704 * [ ... ] 705 * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI 706 * [ TOC save (64-bit) ] -- 707 * [ LR save (64-bit) ] | header 708 * [ LR save (32-bit) ] | 709 * bpf trampoline frame [ back chain 2 ] -- 710 * 711 */ 712 713 /* Minimum stack frame header */ 714 bpf_frame_size = STACK_FRAME_MIN_SIZE; 715 716 /* 717 * Room for parameter save area. 718 * 719 * As per the ABI, this is required if we call into the traced 720 * function (BPF_TRAMP_F_CALL_ORIG): 721 * - if the function takes more than 8 arguments for the rest to spill onto the stack 722 * - or, if the function has variadic arguments 723 * - or, if this functions's prototype was not available to the caller 724 * 725 * Reserve space for at least 8 registers for now. This can be optimized later. 726 */ 727 bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL; 728 729 /* Room for struct bpf_tramp_run_ctx */ 730 run_ctx_off = bpf_frame_size; 731 bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL); 732 733 /* Room for IP address argument */ 734 ip_off = bpf_frame_size; 735 if (flags & BPF_TRAMP_F_IP_ARG) 736 bpf_frame_size += SZL; 737 738 /* Room for args count */ 739 nregs_off = bpf_frame_size; 740 bpf_frame_size += SZL; 741 742 /* Room for args */ 743 regs_off = bpf_frame_size; 744 bpf_frame_size += nr_regs * SZL; 745 746 /* Room for return value of func_addr or fentry prog */ 747 retval_off = bpf_frame_size; 748 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 749 if (save_ret) 750 bpf_frame_size += SZL; 751 752 /* Room for nvr save area */ 753 nvr_off = bpf_frame_size; 754 bpf_frame_size += 2 * SZL; 755 756 /* Optional save area for actual LR in case of ool ftrace */ 757 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 758 alt_lr_off = bpf_frame_size; 759 bpf_frame_size += SZL; 760 } 761 762 if (IS_ENABLED(CONFIG_PPC32)) { 763 if (nr_regs < 2) { 764 r4_off = bpf_frame_size; 765 bpf_frame_size += SZL; 766 } else { 767 r4_off = regs_off + SZL; 768 } 769 } 770 771 /* Padding to align stack frame, if any */ 772 bpf_frame_size = round_up(bpf_frame_size, SZL * 2); 773 774 /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ 775 bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; 776 777 /* Offset to the traced function's stack frame */ 778 func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; 779 780 /* Create dummy frame for unwind, store original return value */ 781 EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); 782 /* Protect red zone where tail call count goes */ 783 EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); 784 785 /* Create our stack frame */ 786 EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); 787 788 /* 64-bit: Save TOC and load kernel TOC */ 789 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { 790 EMIT(PPC_RAW_STD(_R2, _R1, 24)); 791 PPC64_LOAD_PACA(); 792 } 793 794 /* 32-bit: save tail call count in r4 */ 795 if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) 796 EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 797 798 bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); 799 800 /* Save our return address */ 801 EMIT(PPC_RAW_MFLR(_R3)); 802 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 803 EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); 804 else 805 EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 806 807 /* 808 * Save ip address of the traced function. 809 * We could recover this from LR, but we will need to address for OOL trampoline, 810 * and optional GEP area. 811 */ 812 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { 813 EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); 814 EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); 815 EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); 816 EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); 817 EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); 818 } 819 820 if (flags & BPF_TRAMP_F_IP_ARG) 821 EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); 822 823 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 824 /* Fake our LR for unwind */ 825 EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 826 827 /* Save function arg count -- see bpf_get_func_arg_cnt() */ 828 EMIT(PPC_RAW_LI(_R3, nr_regs)); 829 EMIT(PPC_RAW_STL(_R3, _R1, nregs_off)); 830 831 /* Save nv regs */ 832 EMIT(PPC_RAW_STL(_R25, _R1, nvr_off)); 833 EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL)); 834 835 if (flags & BPF_TRAMP_F_CALL_ORIG) { 836 PPC_LI_ADDR(_R3, (unsigned long)im); 837 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 838 (unsigned long)__bpf_tramp_enter); 839 if (ret) 840 return ret; 841 } 842 843 for (i = 0; i < fentry->nr_links; i++) 844 if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, 845 run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) 846 return -EINVAL; 847 848 if (fmod_ret->nr_links) { 849 branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); 850 if (!branches) 851 return -ENOMEM; 852 853 if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off, 854 run_ctx_off, branches)) { 855 ret = -EINVAL; 856 goto cleanup; 857 } 858 } 859 860 /* Call the traced function */ 861 if (flags & BPF_TRAMP_F_CALL_ORIG) { 862 /* 863 * The address in LR save area points to the correct point in the original function 864 * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction 865 * sequence 866 */ 867 EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 868 EMIT(PPC_RAW_MTCTR(_R3)); 869 870 /* Replicate tail_call_cnt before calling the original BPF prog */ 871 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 872 bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off); 873 874 /* Restore args */ 875 bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); 876 877 /* Restore TOC for 64-bit */ 878 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 879 EMIT(PPC_RAW_LD(_R2, _R1, 24)); 880 EMIT(PPC_RAW_BCTRL()); 881 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 882 PPC64_LOAD_PACA(); 883 884 /* Store return value for bpf prog to access */ 885 EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 886 887 /* Restore updated tail_call_cnt */ 888 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 889 bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); 890 891 /* Reserve space to patch branch instruction to skip fexit progs */ 892 im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; 893 EMIT(PPC_RAW_NOP()); 894 } 895 896 /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ 897 for (i = 0; i < fmod_ret->nr_links && image; i++) { 898 if (create_cond_branch(&branch_insn, &image[branches[i]], 899 (unsigned long)&image[ctx->idx], COND_NE << 16)) { 900 ret = -EINVAL; 901 goto cleanup; 902 } 903 904 image[branches[i]] = ppc_inst_val(branch_insn); 905 } 906 907 for (i = 0; i < fexit->nr_links; i++) 908 if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, 909 run_ctx_off, false)) { 910 ret = -EINVAL; 911 goto cleanup; 912 } 913 914 if (flags & BPF_TRAMP_F_CALL_ORIG) { 915 im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; 916 PPC_LI_ADDR(_R3, im); 917 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 918 (unsigned long)__bpf_tramp_exit); 919 if (ret) 920 goto cleanup; 921 } 922 923 if (flags & BPF_TRAMP_F_RESTORE_REGS) 924 bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); 925 926 /* Restore return value of func_addr or fentry prog */ 927 if (save_ret) 928 EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); 929 930 /* Restore nv regs */ 931 EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL)); 932 EMIT(PPC_RAW_LL(_R25, _R1, nvr_off)); 933 934 /* Epilogue */ 935 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 936 EMIT(PPC_RAW_LD(_R2, _R1, 24)); 937 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 938 /* Skip the traced function and return to parent */ 939 EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 940 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 941 EMIT(PPC_RAW_MTLR(_R0)); 942 EMIT(PPC_RAW_BLR()); 943 } else { 944 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 945 EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); 946 EMIT(PPC_RAW_MTLR(_R0)); 947 EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 948 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 949 EMIT(PPC_RAW_BLR()); 950 } else { 951 EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); 952 EMIT(PPC_RAW_MTCTR(_R0)); 953 EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 954 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 955 EMIT(PPC_RAW_MTLR(_R0)); 956 EMIT(PPC_RAW_BCTR()); 957 } 958 } 959 960 /* Make sure the trampoline generation logic doesn't overflow */ 961 if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { 962 ret = -EFAULT; 963 goto cleanup; 964 } 965 ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4; 966 967 cleanup: 968 kfree(branches); 969 return ret; 970 } 971 972 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 973 struct bpf_tramp_links *tlinks, void *func_addr) 974 { 975 struct bpf_tramp_image im; 976 void *image; 977 int ret; 978 979 /* 980 * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). 981 * This will NOT cause fragmentation in direct map, as we do not 982 * call set_memory_*() on this buffer. 983 * 984 * We cannot use kvmalloc here, because we need image to be in 985 * module memory range. 986 */ 987 image = bpf_jit_alloc_exec(PAGE_SIZE); 988 if (!image) 989 return -ENOMEM; 990 991 ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, 992 m, flags, tlinks, func_addr); 993 bpf_jit_free_exec(image); 994 995 return ret; 996 } 997 998 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, 999 const struct btf_func_model *m, u32 flags, 1000 struct bpf_tramp_links *tlinks, 1001 void *func_addr) 1002 { 1003 u32 size = image_end - image; 1004 void *rw_image, *tmp; 1005 int ret; 1006 1007 /* 1008 * rw_image doesn't need to be in module memory range, so we can 1009 * use kvmalloc. 1010 */ 1011 rw_image = kvmalloc(size, GFP_KERNEL); 1012 if (!rw_image) 1013 return -ENOMEM; 1014 1015 ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, 1016 flags, tlinks, func_addr); 1017 if (ret < 0) 1018 goto out; 1019 1020 if (bpf_jit_enable > 1) 1021 bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image); 1022 1023 tmp = bpf_arch_text_copy(image, rw_image, size); 1024 if (IS_ERR(tmp)) 1025 ret = PTR_ERR(tmp); 1026 1027 out: 1028 kvfree(rw_image); 1029 return ret; 1030 } 1031 1032 static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst) 1033 { 1034 ppc_inst_t org_inst; 1035 1036 if (copy_inst_from_kernel_nofault(&org_inst, ip)) { 1037 pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip); 1038 return -EFAULT; 1039 } 1040 1041 if (!ppc_inst_equal(org_inst, old_inst)) { 1042 pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", 1043 (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst)); 1044 return -EINVAL; 1045 } 1046 1047 if (ppc_inst_equal(old_inst, new_inst)) 1048 return 0; 1049 1050 return patch_instruction(ip, new_inst); 1051 } 1052 1053 static void do_isync(void *info __maybe_unused) 1054 { 1055 isync(); 1056 } 1057 1058 /* 1059 * A 3-step process for bpf prog entry: 1060 * 1. At bpf prog entry, a single nop/b: 1061 * bpf_func: 1062 * [nop|b] ool_stub 1063 * 2. Out-of-line stub: 1064 * ool_stub: 1065 * mflr r0 1066 * [b|bl] <bpf_prog>/<long_branch_stub> 1067 * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only 1068 * b bpf_func + 4 1069 * 3. Long branch stub: 1070 * long_branch_stub: 1071 * .long <branch_addr>/<dummy_tramp> 1072 * mflr r11 1073 * bcl 20,31,$+4 1074 * mflr r12 1075 * ld r12, -16(r12) 1076 * mtctr r12 1077 * mtlr r11 // needed to retain ftrace ABI 1078 * bctr 1079 * 1080 * dummy_tramp is used to reduce synchronization requirements. 1081 * 1082 * When attaching a bpf trampoline to a bpf prog, we do not need any 1083 * synchronization here since we always have a valid branch target regardless 1084 * of the order in which the above stores are seen. dummy_tramp ensures that 1085 * the long_branch stub goes to a valid destination on other cpus, even when 1086 * the branch to the long_branch stub is seen before the updated trampoline 1087 * address. 1088 * 1089 * However, when detaching a bpf trampoline from a bpf prog, or if changing 1090 * the bpf trampoline address, we need synchronization to ensure that other 1091 * cpus can no longer branch into the older trampoline so that it can be 1092 * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus 1093 * make forward progress, but we still need to ensure that other cpus 1094 * execute isync (or some CSI) so that they don't go back into the 1095 * trampoline again. 1096 */ 1097 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 1098 void *old_addr, void *new_addr) 1099 { 1100 unsigned long bpf_func, bpf_func_end, size, offset; 1101 ppc_inst_t old_inst, new_inst; 1102 int ret = 0, branch_flags; 1103 char name[KSYM_NAME_LEN]; 1104 1105 if (IS_ENABLED(CONFIG_PPC32)) 1106 return -EOPNOTSUPP; 1107 1108 bpf_func = (unsigned long)ip; 1109 branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; 1110 1111 /* We currently only support poking bpf programs */ 1112 if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) { 1113 pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); 1114 return -EOPNOTSUPP; 1115 } 1116 1117 /* 1118 * If we are not poking at bpf prog entry, then we are simply patching in/out 1119 * an unconditional branch instruction at im->ip_after_call 1120 */ 1121 if (offset) { 1122 if (poke_type != BPF_MOD_JUMP) { 1123 pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__, 1124 bpf_func); 1125 return -EOPNOTSUPP; 1126 } 1127 old_inst = ppc_inst(PPC_RAW_NOP()); 1128 if (old_addr) 1129 if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0)) 1130 return -ERANGE; 1131 new_inst = ppc_inst(PPC_RAW_NOP()); 1132 if (new_addr) 1133 if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0)) 1134 return -ERANGE; 1135 mutex_lock(&text_mutex); 1136 ret = bpf_modify_inst(ip, old_inst, new_inst); 1137 mutex_unlock(&text_mutex); 1138 1139 /* Make sure all cpus see the new instruction */ 1140 smp_call_function(do_isync, NULL, 1); 1141 return ret; 1142 } 1143 1144 bpf_func_end = bpf_func + size; 1145 1146 /* Address of the jmp/call instruction in the out-of-line stub */ 1147 ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4); 1148 1149 if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) { 1150 pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__, 1151 bpf_func); 1152 return -ERANGE; 1153 } 1154 1155 old_inst = ppc_inst(PPC_RAW_NOP()); 1156 if (old_addr) { 1157 if (is_offset_in_branch_range(ip - old_addr)) 1158 create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags); 1159 else 1160 create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, 1161 branch_flags); 1162 } 1163 new_inst = ppc_inst(PPC_RAW_NOP()); 1164 if (new_addr) { 1165 if (is_offset_in_branch_range(ip - new_addr)) 1166 create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags); 1167 else 1168 create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, 1169 branch_flags); 1170 } 1171 1172 mutex_lock(&text_mutex); 1173 1174 /* 1175 * 1. Update the address in the long branch stub: 1176 * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr 1177 * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here. 1178 */ 1179 if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) || 1180 (old_addr && !is_offset_in_branch_range(old_addr - ip))) 1181 ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL), 1182 (new_addr && !is_offset_in_branch_range(new_addr - ip)) ? 1183 (unsigned long)new_addr : (unsigned long)dummy_tramp); 1184 if (ret) 1185 goto out; 1186 1187 /* 2. Update the branch/call in the out-of-line stub */ 1188 ret = bpf_modify_inst(ip, old_inst, new_inst); 1189 if (ret) 1190 goto out; 1191 1192 /* 3. Update instruction at bpf prog entry */ 1193 ip = (void *)bpf_func; 1194 if (!old_addr || !new_addr) { 1195 if (!old_addr) { 1196 old_inst = ppc_inst(PPC_RAW_NOP()); 1197 create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); 1198 } else { 1199 new_inst = ppc_inst(PPC_RAW_NOP()); 1200 create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); 1201 } 1202 ret = bpf_modify_inst(ip, old_inst, new_inst); 1203 } 1204 1205 out: 1206 mutex_unlock(&text_mutex); 1207 1208 /* 1209 * Sync only if we are not attaching a trampoline to a bpf prog so the older 1210 * trampoline can be freed safely. 1211 */ 1212 if (old_addr) 1213 smp_call_function(do_isync, NULL, 1); 1214 1215 return ret; 1216 } 1217