1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <linux/kernel.h> 17 #include <linux/memory.h> 18 #include <linux/bpf.h> 19 20 #include <asm/kprobes.h> 21 #include <asm/text-patching.h> 22 23 #include "bpf_jit.h" 24 25 /* These offsets are from bpf prog end and stay the same across progs */ 26 static int bpf_jit_ool_stub, bpf_jit_long_branch_stub; 27 28 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 29 { 30 memset32(area, BREAKPOINT_INSTRUCTION, size / 4); 31 } 32 33 void dummy_tramp(void); 34 35 asm ( 36 " .pushsection .text, \"ax\", @progbits ;" 37 " .global dummy_tramp ;" 38 " .type dummy_tramp, @function ;" 39 "dummy_tramp: ;" 40 #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE 41 " blr ;" 42 #else 43 /* LR is always in r11, so we don't need a 'mflr r11' here */ 44 " mtctr 11 ;" 45 " mtlr 0 ;" 46 " bctr ;" 47 #endif 48 " .size dummy_tramp, .-dummy_tramp ;" 49 " .popsection ;" 50 ); 51 52 void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx) 53 { 54 int ool_stub_idx, long_branch_stub_idx; 55 56 /* 57 * Out-of-line stub: 58 * mflr r0 59 * [b|bl] tramp 60 * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE 61 * b bpf_func + 4 62 */ 63 ool_stub_idx = ctx->idx; 64 EMIT(PPC_RAW_MFLR(_R0)); 65 EMIT(PPC_RAW_NOP()); 66 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 67 EMIT(PPC_RAW_MTLR(_R0)); 68 WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4)); 69 EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4)); 70 71 /* 72 * Long branch stub: 73 * .long <dummy_tramp_addr> 74 * mflr r11 75 * bcl 20,31,$+4 76 * mflr r12 77 * ld r12, -8-SZL(r12) 78 * mtctr r12 79 * mtlr r11 // needed to retain ftrace ABI 80 * bctr 81 */ 82 if (image) 83 *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp; 84 ctx->idx += SZL / 4; 85 long_branch_stub_idx = ctx->idx; 86 EMIT(PPC_RAW_MFLR(_R11)); 87 EMIT(PPC_RAW_BCL4()); 88 EMIT(PPC_RAW_MFLR(_R12)); 89 EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL)); 90 EMIT(PPC_RAW_MTCTR(_R12)); 91 EMIT(PPC_RAW_MTLR(_R11)); 92 EMIT(PPC_RAW_BCTR()); 93 94 if (!bpf_jit_ool_stub) { 95 bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4; 96 bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4; 97 } 98 } 99 100 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) 101 { 102 if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { 103 PPC_JMP(exit_addr); 104 } else if (ctx->alt_exit_addr) { 105 if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) 106 return -1; 107 PPC_JMP(ctx->alt_exit_addr); 108 } else { 109 ctx->alt_exit_addr = ctx->idx * 4; 110 bpf_jit_build_epilogue(image, ctx); 111 } 112 113 return 0; 114 } 115 116 struct powerpc_jit_data { 117 /* address of rw header */ 118 struct bpf_binary_header *hdr; 119 /* address of ro final header */ 120 struct bpf_binary_header *fhdr; 121 u32 *addrs; 122 u8 *fimage; 123 u32 proglen; 124 struct codegen_context ctx; 125 }; 126 127 bool bpf_jit_needs_zext(void) 128 { 129 return true; 130 } 131 132 static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size) 133 { 134 int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; 135 u64 *stack_ptr; 136 137 for_each_possible_cpu(cpu) { 138 stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); 139 stack_ptr[0] = PRIV_STACK_GUARD_VAL; 140 stack_ptr[1] = PRIV_STACK_GUARD_VAL; 141 stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL; 142 stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL; 143 } 144 } 145 146 static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size, 147 struct bpf_prog *fp) 148 { 149 int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; 150 u64 *stack_ptr; 151 152 for_each_possible_cpu(cpu) { 153 stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); 154 if (stack_ptr[0] != PRIV_STACK_GUARD_VAL || 155 stack_ptr[1] != PRIV_STACK_GUARD_VAL || 156 stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL || 157 stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) { 158 pr_err("BPF private stack overflow/underflow detected for prog %s\n", 159 bpf_jit_get_prog_name(fp)); 160 break; 161 } 162 } 163 } 164 165 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 166 { 167 u32 proglen; 168 u32 alloclen; 169 u8 *image = NULL; 170 u32 *code_base = NULL; 171 u32 *addrs = NULL; 172 struct powerpc_jit_data *jit_data = NULL; 173 struct codegen_context cgctx; 174 int pass; 175 int flen; 176 int priv_stack_alloc_size; 177 void __percpu *priv_stack_ptr = NULL; 178 struct bpf_binary_header *fhdr = NULL; 179 struct bpf_binary_header *hdr = NULL; 180 struct bpf_prog *org_fp = fp; 181 struct bpf_prog *tmp_fp = NULL; 182 bool bpf_blinded = false; 183 bool extra_pass = false; 184 u8 *fimage = NULL; 185 u32 *fcode_base = NULL; 186 u32 extable_len; 187 u32 fixup_len; 188 189 if (!fp->jit_requested) 190 return org_fp; 191 192 tmp_fp = bpf_jit_blind_constants(org_fp); 193 if (IS_ERR(tmp_fp)) 194 return org_fp; 195 196 if (tmp_fp != org_fp) { 197 bpf_blinded = true; 198 fp = tmp_fp; 199 } 200 201 jit_data = fp->aux->jit_data; 202 if (!jit_data) { 203 jit_data = kzalloc_obj(*jit_data); 204 if (!jit_data) { 205 fp = org_fp; 206 goto out; 207 } 208 fp->aux->jit_data = jit_data; 209 } 210 211 priv_stack_ptr = fp->aux->priv_stack_ptr; 212 if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) { 213 /* 214 * Allocate private stack of size equivalent to 215 * verifier-calculated stack size plus two memory 216 * guard regions to detect private stack overflow 217 * and underflow. 218 */ 219 priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) + 220 2 * PRIV_STACK_GUARD_SZ; 221 priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL); 222 if (!priv_stack_ptr) { 223 fp = org_fp; 224 goto out_priv_stack; 225 } 226 227 priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size); 228 fp->aux->priv_stack_ptr = priv_stack_ptr; 229 } 230 231 flen = fp->len; 232 addrs = jit_data->addrs; 233 if (addrs) { 234 cgctx = jit_data->ctx; 235 /* 236 * JIT compiled to a writable location (image/code_base) first. 237 * It is then moved to the readonly final location (fimage/fcode_base) 238 * using instruction patching. 239 */ 240 fimage = jit_data->fimage; 241 fhdr = jit_data->fhdr; 242 proglen = jit_data->proglen; 243 hdr = jit_data->hdr; 244 image = (void *)hdr + ((void *)fimage - (void *)fhdr); 245 extra_pass = true; 246 /* During extra pass, ensure index is reset before repopulating extable entries */ 247 cgctx.exentry_idx = 0; 248 goto skip_init_ctx; 249 } 250 251 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 252 if (addrs == NULL) { 253 fp = org_fp; 254 goto out_addrs; 255 } 256 257 memset(&cgctx, 0, sizeof(struct codegen_context)); 258 bpf_jit_init_reg_mapping(&cgctx); 259 260 /* Make sure that the stack is quadword aligned. */ 261 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 262 cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena); 263 cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena); 264 cgctx.is_subprog = bpf_is_subprog(fp); 265 cgctx.exception_boundary = fp->aux->exception_boundary; 266 cgctx.exception_cb = fp->aux->exception_cb; 267 cgctx.priv_sp = priv_stack_ptr; 268 cgctx.priv_stack_size = 0; 269 if (priv_stack_ptr) { 270 /* 271 * priv_stack_size required for setting bpf FP inside 272 * percpu allocation. 273 * stack_size is marked 0 to prevent allocation on 274 * general stack and offset calculation don't go for 275 * a toss in bpf_jit_stack_offsetof() & bpf_jit_stack_local() 276 */ 277 cgctx.priv_stack_size = cgctx.stack_size; 278 cgctx.stack_size = 0; 279 } 280 281 /* Scouting faux-generate pass 0 */ 282 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { 283 /* We hit something illegal or unsupported. */ 284 fp = org_fp; 285 goto out_addrs; 286 } 287 288 /* 289 * If we have seen a tail call, we need a second pass. 290 * This is because bpf_jit_emit_common_epilogue() is called 291 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. 292 * We also need a second pass if we ended up with too large 293 * a program so as to ensure BPF_EXIT branches are in range. 294 */ 295 if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { 296 cgctx.idx = 0; 297 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { 298 fp = org_fp; 299 goto out_addrs; 300 } 301 } 302 303 bpf_jit_realloc_regs(&cgctx); 304 /* 305 * Pretend to build prologue, given the features we've seen. This will 306 * update ctgtx.idx as it pretends to output instructions, then we can 307 * calculate total size from idx. 308 */ 309 bpf_jit_build_prologue(NULL, &cgctx); 310 addrs[fp->len] = cgctx.idx * 4; 311 bpf_jit_build_epilogue(NULL, &cgctx); 312 313 fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; 314 extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); 315 316 proglen = cgctx.idx * 4; 317 alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; 318 319 fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image, 320 bpf_jit_fill_ill_insns); 321 if (!fhdr) { 322 fp = org_fp; 323 goto out_addrs; 324 } 325 326 if (extable_len) 327 fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len; 328 329 skip_init_ctx: 330 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 331 fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE); 332 333 /* Code generation passes 1-2 */ 334 for (pass = 1; pass < 3; pass++) { 335 /* Now build the prologue, body code & epilogue for real. */ 336 cgctx.idx = 0; 337 cgctx.alt_exit_addr = 0; 338 bpf_jit_build_prologue(code_base, &cgctx); 339 if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, 340 extra_pass)) { 341 bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size)); 342 bpf_jit_binary_pack_free(fhdr, hdr); 343 fp = org_fp; 344 goto out_addrs; 345 } 346 bpf_jit_build_epilogue(code_base, &cgctx); 347 348 if (bpf_jit_enable > 1) 349 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 350 proglen - (cgctx.idx * 4), cgctx.seen); 351 } 352 353 if (bpf_jit_enable > 1) 354 /* 355 * Note that we output the base address of the code_base 356 * rather than image, since opcodes are in code_base. 357 */ 358 bpf_jit_dump(flen, proglen, pass, code_base); 359 360 #ifdef CONFIG_PPC64_ELF_ABI_V1 361 /* Function descriptor nastiness: Address + TOC */ 362 ((u64 *)image)[0] = (u64)fcode_base; 363 ((u64 *)image)[1] = local_paca->kernel_toc; 364 #endif 365 366 fp->bpf_func = (void *)fimage; 367 fp->jited = 1; 368 fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; 369 370 if (!fp->is_func || extra_pass) { 371 if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { 372 fp = org_fp; 373 goto out_addrs; 374 } 375 bpf_prog_fill_jited_linfo(fp, addrs); 376 /* 377 * On ABI V1, executable code starts after the function 378 * descriptor, so adjust base accordingly. 379 */ 380 bpf_prog_update_insn_ptrs(fp, addrs, 381 (void *)fimage + FUNCTION_DESCR_SIZE); 382 383 out_addrs: 384 if (!image && priv_stack_ptr) { 385 fp->aux->priv_stack_ptr = NULL; 386 free_percpu(priv_stack_ptr); 387 } 388 out_priv_stack: 389 kfree(addrs); 390 kfree(jit_data); 391 fp->aux->jit_data = NULL; 392 } else { 393 jit_data->addrs = addrs; 394 jit_data->ctx = cgctx; 395 jit_data->proglen = proglen; 396 jit_data->fimage = fimage; 397 jit_data->fhdr = fhdr; 398 jit_data->hdr = hdr; 399 } 400 401 out: 402 if (bpf_blinded) 403 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 404 405 return fp; 406 } 407 408 /* 409 * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling 410 * this function, as this only applies to BPF_PROBE_MEM, for now. 411 */ 412 int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, 413 struct codegen_context *ctx, int insn_idx, int jmp_off, 414 int dst_reg, u32 code) 415 { 416 off_t offset; 417 unsigned long pc; 418 struct exception_table_entry *ex, *ex_entry; 419 u32 *fixup; 420 421 /* Populate extable entries only in the last pass */ 422 if (pass != 2) 423 return 0; 424 425 if (!fp->aux->extable || 426 WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) 427 return -EINVAL; 428 429 /* 430 * Program is first written to image before copying to the 431 * final location (fimage). Accordingly, update in the image first. 432 * As all offsets used are relative, copying as is to the 433 * final location should be alright. 434 */ 435 pc = (unsigned long)&image[insn_idx]; 436 ex = (void *)fp->aux->extable - (void *)fimage + (void *)image; 437 438 fixup = (void *)ex - 439 (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + 440 (ctx->exentry_idx * BPF_FIXUP_LEN * 4); 441 442 fixup[0] = PPC_RAW_LI(dst_reg, 0); 443 if (BPF_CLASS(code) == BPF_ST || BPF_CLASS(code) == BPF_STX) 444 fixup[0] = PPC_RAW_NOP(); 445 446 if (IS_ENABLED(CONFIG_PPC32)) 447 fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ 448 449 fixup[BPF_FIXUP_LEN - 1] = 450 PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); 451 452 ex_entry = &ex[ctx->exentry_idx]; 453 454 offset = pc - (long)&ex_entry->insn; 455 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 456 return -ERANGE; 457 ex_entry->insn = offset; 458 459 offset = (long)fixup - (long)&ex_entry->fixup; 460 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 461 return -ERANGE; 462 ex_entry->fixup = offset; 463 464 ctx->exentry_idx++; 465 return 0; 466 } 467 468 void *bpf_arch_text_copy(void *dst, void *src, size_t len) 469 { 470 int err; 471 472 if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) 473 return ERR_PTR(-EINVAL); 474 475 mutex_lock(&text_mutex); 476 err = patch_instructions(dst, src, len, false); 477 mutex_unlock(&text_mutex); 478 479 return err ? ERR_PTR(err) : dst; 480 } 481 482 int bpf_arch_text_invalidate(void *dst, size_t len) 483 { 484 u32 insn = BREAKPOINT_INSTRUCTION; 485 int ret; 486 487 if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) 488 return -EINVAL; 489 490 mutex_lock(&text_mutex); 491 ret = patch_instructions(dst, &insn, len, true); 492 mutex_unlock(&text_mutex); 493 494 return ret; 495 } 496 497 void bpf_jit_free(struct bpf_prog *fp) 498 { 499 if (fp->jited) { 500 struct powerpc_jit_data *jit_data = fp->aux->jit_data; 501 struct bpf_binary_header *hdr; 502 void __percpu *priv_stack_ptr; 503 int priv_stack_alloc_size; 504 505 /* 506 * If we fail the final pass of JIT (from jit_subprogs), 507 * the program may not be finalized yet. Call finalize here 508 * before freeing it. 509 */ 510 if (jit_data) { 511 bpf_jit_binary_pack_finalize(jit_data->fhdr, jit_data->hdr); 512 kvfree(jit_data->addrs); 513 kfree(jit_data); 514 } 515 hdr = bpf_jit_binary_pack_hdr(fp); 516 bpf_jit_binary_pack_free(hdr, NULL); 517 priv_stack_ptr = fp->aux->priv_stack_ptr; 518 if (priv_stack_ptr) { 519 priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) + 520 2 * PRIV_STACK_GUARD_SZ; 521 priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_size, fp); 522 free_percpu(priv_stack_ptr); 523 } 524 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); 525 } 526 527 bpf_prog_unlock_free(fp); 528 } 529 530 bool bpf_jit_supports_exceptions(void) 531 { 532 return IS_ENABLED(CONFIG_PPC64); 533 } 534 535 bool bpf_jit_supports_subprog_tailcalls(void) 536 { 537 return IS_ENABLED(CONFIG_PPC64); 538 } 539 540 bool bpf_jit_supports_kfunc_call(void) 541 { 542 return IS_ENABLED(CONFIG_PPC64); 543 } 544 545 bool bpf_jit_supports_private_stack(void) 546 { 547 return IS_ENABLED(CONFIG_PPC64); 548 } 549 550 bool bpf_jit_supports_fsession(void) 551 { 552 /* 553 * TODO: Remove after validating support 554 * for fsession and trampoline on ppc32. 555 */ 556 if (IS_ENABLED(CONFIG_PPC32)) 557 return -EOPNOTSUPP; 558 return true; 559 } 560 561 bool bpf_jit_supports_arena(void) 562 { 563 return IS_ENABLED(CONFIG_PPC64); 564 } 565 566 bool bpf_jit_supports_far_kfunc_call(void) 567 { 568 return IS_ENABLED(CONFIG_PPC64); 569 } 570 571 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) 572 { 573 if (!in_arena) 574 return true; 575 switch (insn->code) { 576 case BPF_STX | BPF_ATOMIC | BPF_H: 577 case BPF_STX | BPF_ATOMIC | BPF_B: 578 case BPF_STX | BPF_ATOMIC | BPF_W: 579 case BPF_STX | BPF_ATOMIC | BPF_DW: 580 if (bpf_atomic_is_load_store(insn)) 581 return false; 582 return IS_ENABLED(CONFIG_PPC64); 583 } 584 return true; 585 } 586 587 bool bpf_jit_supports_percpu_insn(void) 588 { 589 return IS_ENABLED(CONFIG_PPC64); 590 } 591 592 bool bpf_jit_inlines_helper_call(s32 imm) 593 { 594 switch (imm) { 595 case BPF_FUNC_get_smp_processor_id: 596 case BPF_FUNC_get_current_task: 597 case BPF_FUNC_get_current_task_btf: 598 return true; 599 default: 600 return false; 601 } 602 } 603 604 void *arch_alloc_bpf_trampoline(unsigned int size) 605 { 606 return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); 607 } 608 609 void arch_free_bpf_trampoline(void *image, unsigned int size) 610 { 611 bpf_prog_pack_free(image, size); 612 } 613 614 int arch_protect_bpf_trampoline(void *image, unsigned int size) 615 { 616 return 0; 617 } 618 619 static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, 620 struct bpf_tramp_link *l, int regs_off, int retval_off, 621 int run_ctx_off, bool save_ret) 622 { 623 struct bpf_prog *p = l->link.prog; 624 ppc_inst_t branch_insn; 625 u32 jmp_idx; 626 int ret = 0; 627 628 /* Save cookie */ 629 if (IS_ENABLED(CONFIG_PPC64)) { 630 PPC_LI64(_R3, l->cookie); 631 EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, 632 bpf_cookie))); 633 } else { 634 PPC_LI32(_R3, l->cookie >> 32); 635 PPC_LI32(_R4, l->cookie); 636 EMIT(PPC_RAW_STW(_R3, _R1, 637 run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); 638 EMIT(PPC_RAW_STW(_R4, _R1, 639 run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4)); 640 } 641 642 /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */ 643 PPC_LI_ADDR(_R3, p); 644 EMIT(PPC_RAW_MR(_R25, _R3)); 645 EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off)); 646 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 647 (unsigned long)bpf_trampoline_enter(p)); 648 if (ret) 649 return ret; 650 651 /* Remember prog start time returned by __bpf_prog_enter */ 652 EMIT(PPC_RAW_MR(_R26, _R3)); 653 654 /* 655 * if (__bpf_prog_enter(p) == 0) 656 * goto skip_exec_of_prog; 657 * 658 * Emit a nop to be later patched with conditional branch, once offset is known 659 */ 660 EMIT(PPC_RAW_CMPLI(_R3, 0)); 661 jmp_idx = ctx->idx; 662 EMIT(PPC_RAW_NOP()); 663 664 /* p->bpf_func(ctx) */ 665 EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); 666 if (!p->jited) 667 PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); 668 /* Account for max possible instructions during dummy pass for size calculation */ 669 if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], 670 (unsigned long)p->bpf_func, 671 BRANCH_SET_LINK)) { 672 image[ctx->idx] = ppc_inst_val(branch_insn); 673 ctx->idx++; 674 } else { 675 EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); 676 EMIT(PPC_RAW_MTCTR(_R12)); 677 EMIT(PPC_RAW_BCTRL()); 678 } 679 680 if (save_ret) 681 EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 682 683 /* Fix up branch */ 684 if (image) { 685 if (create_cond_branch(&branch_insn, &image[jmp_idx], 686 (unsigned long)&image[ctx->idx], COND_EQ << 16)) 687 return -EINVAL; 688 image[jmp_idx] = ppc_inst_val(branch_insn); 689 } 690 691 /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */ 692 EMIT(PPC_RAW_MR(_R3, _R25)); 693 EMIT(PPC_RAW_MR(_R4, _R26)); 694 EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off)); 695 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 696 (unsigned long)bpf_trampoline_exit(p)); 697 698 return ret; 699 } 700 701 static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, 702 struct bpf_tramp_links *tl, int regs_off, int retval_off, 703 int run_ctx_off, u32 *branches) 704 { 705 int i; 706 707 /* 708 * The first fmod_ret program will receive a garbage return value. 709 * Set this to 0 to avoid confusing the program. 710 */ 711 EMIT(PPC_RAW_LI(_R3, 0)); 712 EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 713 for (i = 0; i < tl->nr_links; i++) { 714 if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, 715 run_ctx_off, true)) 716 return -EINVAL; 717 718 /* 719 * mod_ret prog stored return value after prog ctx. Emit: 720 * if (*(u64 *)(ret_val) != 0) 721 * goto do_fexit; 722 */ 723 EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); 724 EMIT(PPC_RAW_CMPLI(_R3, 0)); 725 726 /* 727 * Save the location of the branch and generate a nop, which is 728 * replaced with a conditional jump once do_fexit (i.e. the 729 * start of the fexit invocation) is finalized. 730 */ 731 branches[i] = ctx->idx; 732 EMIT(PPC_RAW_NOP()); 733 } 734 735 return 0; 736 } 737 738 /* 739 * Refer __arch_prepare_bpf_trampoline() for stack component details. 740 * 741 * The tailcall count/reference is present in caller's stack frame. The 742 * tail_call_info is saved at the same offset on the trampoline frame 743 * for the traced function (BPF subprog/callee) to fetch it. 744 */ 745 static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx, 746 int bpf_frame_size, int r4_off) 747 { 748 if (IS_ENABLED(CONFIG_PPC64)) { 749 EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size)); 750 /* Refer to trampoline's Generated stack layout */ 751 EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL)); 752 753 /* 754 * Setting the tail_call_info in trampoline's frame 755 * depending on if previous frame had value or reference. 756 */ 757 EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT)); 758 PPC_BCC_CONST_SHORT(COND_GT, 8); 759 EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL)); 760 761 /* 762 * Trampoline's tail_call_info is at the same offset, as that of 763 * any bpf program, with reference to previous frame. Update the 764 * address of main's tail_call_info in trampoline frame. 765 */ 766 EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL)); 767 } else { 768 /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ 769 EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); 770 } 771 } 772 773 static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, 774 int bpf_frame_size, int r4_off) 775 { 776 if (IS_ENABLED(CONFIG_PPC32)) { 777 /* 778 * Restore tailcall for 32-bit powerpc 779 * See bpf_jit_stack_offsetof() and BPF_PPC_TC 780 */ 781 EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 782 } 783 } 784 785 static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, 786 int bpf_frame_size, int nr_regs, int regs_off) 787 { 788 int param_save_area_offset; 789 790 param_save_area_offset = bpf_frame_size; 791 param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 792 793 for (int i = 0; i < nr_regs; i++) { 794 if (i < 8) { 795 EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL)); 796 } else { 797 EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); 798 EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL)); 799 } 800 } 801 } 802 803 /* Used when restoring just the register parameters when returning back */ 804 static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx, 805 int nr_regs, int regs_off) 806 { 807 for (int i = 0; i < nr_regs && i < 8; i++) 808 EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL)); 809 } 810 811 /* Used when we call into the traced function. Replicate parameter save area */ 812 static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, 813 int bpf_frame_size, int nr_regs, int regs_off) 814 { 815 int param_save_area_offset; 816 817 param_save_area_offset = bpf_frame_size; 818 param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 819 820 for (int i = 8; i < nr_regs; i++) { 821 EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); 822 EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL)); 823 } 824 bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); 825 } 826 827 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, 828 void *rw_image_end, void *ro_image, 829 const struct btf_func_model *m, u32 flags, 830 struct bpf_tramp_links *tlinks, 831 void *func_addr) 832 { 833 int regs_off, func_meta_off, ip_off, run_ctx_off, retval_off; 834 int nvr_off, alt_lr_off, r4_off = 0; 835 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 836 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 837 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 838 int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0; 839 struct codegen_context codegen_ctx, *ctx; 840 int cookie_off, cookie_cnt, cookie_ctx_off; 841 int fsession_cnt = bpf_fsession_cnt(tlinks); 842 u64 func_meta; 843 u32 *image = (u32 *)rw_image; 844 ppc_inst_t branch_insn; 845 u32 *branches = NULL; 846 bool save_ret; 847 848 if (IS_ENABLED(CONFIG_PPC32)) 849 return -EOPNOTSUPP; 850 851 nr_regs = m->nr_args; 852 /* Extra registers for struct arguments */ 853 for (i = 0; i < m->nr_args; i++) 854 if (m->arg_size[i] > SZL) 855 nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1; 856 857 if (nr_regs > MAX_BPF_FUNC_ARGS) 858 return -EOPNOTSUPP; 859 860 ctx = &codegen_ctx; 861 memset(ctx, 0, sizeof(*ctx)); 862 863 /* 864 * Generated stack layout: 865 * 866 * func prev back chain [ back chain ] 867 * [ tail_call_info ] optional - 64-bit powerpc 868 * [ padding ] align stack frame 869 * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc 870 * alt_lr_off [ real lr (ool stub)] optional - actual lr 871 * retaddr_off [ return address ] 872 * [ r26 ] 873 * nvr_off [ r25 ] nvr save area 874 * retval_off [ return value ] 875 * [ reg argN ] 876 * [ ... ] 877 * regs_off [ reg_arg1 ] prog_ctx 878 * func_meta_off [ args count ] ((u64 *)prog_ctx)[-1] 879 * ip_off [ traced function ] ((u64 *)prog_ctx)[-2] 880 * [ stack cookieN ] 881 * [ ... ] 882 * cookie_off [ stack cookie1 ] 883 * run_ctx_off [ bpf_tramp_run_ctx ] 884 * [ reg argN ] 885 * [ ... ] 886 * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI 887 * [ TOC save (64-bit) ] -- 888 * [ LR save (64-bit) ] | header 889 * [ LR save (32-bit) ] | 890 * bpf trampoline frame [ back chain 2 ] -- 891 * 892 */ 893 894 /* Minimum stack frame header */ 895 bpf_frame_size = STACK_FRAME_MIN_SIZE; 896 897 /* 898 * Room for parameter save area. 899 * 900 * As per the ABI, this is required if we call into the traced 901 * function (BPF_TRAMP_F_CALL_ORIG): 902 * - if the function takes more than 8 arguments for the rest to spill onto the stack 903 * - or, if the function has variadic arguments 904 * - or, if this functions's prototype was not available to the caller 905 * 906 * Reserve space for at least 8 registers for now. This can be optimized later. 907 */ 908 bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL; 909 910 /* Room for struct bpf_tramp_run_ctx */ 911 run_ctx_off = bpf_frame_size; 912 bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL); 913 914 /* room for session cookies */ 915 cookie_off = bpf_frame_size; 916 cookie_cnt = bpf_fsession_cookie_cnt(tlinks); 917 bpf_frame_size += cookie_cnt * 8; 918 919 /* Room for IP address argument */ 920 ip_off = bpf_frame_size; 921 if (flags & BPF_TRAMP_F_IP_ARG) 922 bpf_frame_size += SZL; 923 924 /* Room for function metadata, arg regs count */ 925 func_meta_off = bpf_frame_size; 926 bpf_frame_size += SZL; 927 928 /* Room for arg regs */ 929 regs_off = bpf_frame_size; 930 bpf_frame_size += nr_regs * SZL; 931 932 /* Room for return value of func_addr or fentry prog */ 933 retval_off = bpf_frame_size; 934 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 935 if (save_ret) 936 bpf_frame_size += SZL; 937 938 /* Room for nvr save area */ 939 nvr_off = bpf_frame_size; 940 bpf_frame_size += 2 * SZL; 941 942 /* Save area for return address */ 943 retaddr_off = bpf_frame_size; 944 bpf_frame_size += SZL; 945 946 /* Optional save area for actual LR in case of ool ftrace */ 947 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 948 alt_lr_off = bpf_frame_size; 949 bpf_frame_size += SZL; 950 } 951 952 if (IS_ENABLED(CONFIG_PPC32)) { 953 if (nr_regs < 2) { 954 r4_off = bpf_frame_size; 955 bpf_frame_size += SZL; 956 } else { 957 r4_off = regs_off + SZL; 958 } 959 } 960 961 /* 962 * Save tailcall count pointer at the same offset on the 963 * stack where subprogs expect it 964 */ 965 if ((flags & BPF_TRAMP_F_CALL_ORIG) && 966 (flags & BPF_TRAMP_F_TAIL_CALL_CTX)) 967 bpf_frame_size += BPF_PPC_TAILCALL; 968 969 /* Padding to align stack frame, if any */ 970 bpf_frame_size = round_up(bpf_frame_size, SZL * 2); 971 972 /* Store original return value */ 973 EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); 974 975 /* Create our stack frame */ 976 EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); 977 978 /* 64-bit: Save TOC and load kernel TOC */ 979 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { 980 EMIT(PPC_RAW_STD(_R2, _R1, 24)); 981 PPC64_LOAD_PACA(); 982 } 983 984 /* 32-bit: save tail call count in r4 */ 985 if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) 986 EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 987 988 bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off); 989 990 /* Save our LR/return address */ 991 EMIT(PPC_RAW_MFLR(_R3)); 992 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 993 EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); 994 else 995 EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); 996 997 /* 998 * Derive IP address of the traced function. 999 * In case of CONFIG_PPC_FTRACE_OUT_OF_LINE or BPF program, LR points to the instruction 1000 * after the 'bl' instruction in the OOL stub. Refer to ftrace_init_ool_stub() and 1001 * bpf_arch_text_poke() for OOL stub of kernel functions and bpf programs respectively. 1002 * Relevant stub sequence: 1003 * 1004 * bl <tramp> 1005 * LR (R3) => mtlr r0 1006 * b <func_addr+4> 1007 * 1008 * Recover kernel function/bpf program address from the unconditional 1009 * branch instruction at the end of OOL stub. 1010 */ 1011 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { 1012 EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); 1013 EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); 1014 EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); 1015 EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); 1016 } 1017 1018 if (flags & BPF_TRAMP_F_IP_ARG) 1019 EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); 1020 1021 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 1022 /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */ 1023 EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); 1024 EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); 1025 } 1026 1027 /* Save function arg regs count -- see bpf_get_func_arg_cnt() */ 1028 func_meta = nr_regs; 1029 store_func_meta(image, ctx, func_meta, func_meta_off); 1030 1031 /* Save nv regs */ 1032 EMIT(PPC_RAW_STL(_R25, _R1, nvr_off)); 1033 EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL)); 1034 1035 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1036 PPC_LI_ADDR(_R3, (unsigned long)im); 1037 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 1038 (unsigned long)__bpf_tramp_enter); 1039 if (ret) 1040 return ret; 1041 } 1042 1043 if (fsession_cnt) { 1044 /* 1045 * Clear all the session cookies' values 1046 * Clear the return value to make sure fentry always get 0 1047 */ 1048 prepare_for_fsession_fentry(image, ctx, cookie_cnt, cookie_off, retval_off); 1049 } 1050 1051 cookie_ctx_off = (regs_off - cookie_off) / 8; 1052 1053 for (i = 0; i < fentry->nr_links; i++) { 1054 if (bpf_prog_calls_session_cookie(fentry->links[i])) { 1055 u64 meta = func_meta | (cookie_ctx_off << BPF_TRAMP_COOKIE_INDEX_SHIFT); 1056 1057 store_func_meta(image, ctx, meta, func_meta_off); 1058 cookie_ctx_off--; 1059 } 1060 1061 if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, 1062 run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) 1063 return -EINVAL; 1064 } 1065 1066 if (fmod_ret->nr_links) { 1067 branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); 1068 if (!branches) 1069 return -ENOMEM; 1070 1071 if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off, 1072 run_ctx_off, branches)) { 1073 ret = -EINVAL; 1074 goto cleanup; 1075 } 1076 } 1077 1078 /* Call the traced function */ 1079 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1080 /* 1081 * retaddr on trampoline stack points to the correct point in the original function 1082 * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction 1083 * sequence 1084 */ 1085 EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off)); 1086 EMIT(PPC_RAW_MTCTR(_R3)); 1087 1088 /* Replicate tail_call_cnt before calling the original BPF prog */ 1089 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 1090 bpf_trampoline_setup_tail_call_info(image, ctx, bpf_frame_size, r4_off); 1091 1092 /* Restore args */ 1093 bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, nr_regs, regs_off); 1094 1095 /* Restore TOC for 64-bit */ 1096 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 1097 EMIT(PPC_RAW_LD(_R2, _R1, 24)); 1098 EMIT(PPC_RAW_BCTRL()); 1099 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 1100 PPC64_LOAD_PACA(); 1101 1102 /* Store return value for bpf prog to access */ 1103 EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 1104 1105 /* Restore updated tail_call_cnt */ 1106 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 1107 bpf_trampoline_restore_tail_call_cnt(image, ctx, bpf_frame_size, r4_off); 1108 1109 /* Reserve space to patch branch instruction to skip fexit progs */ 1110 if (ro_image) /* image is NULL for dummy pass */ 1111 im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; 1112 EMIT(PPC_RAW_NOP()); 1113 } 1114 1115 /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ 1116 for (i = 0; i < fmod_ret->nr_links && image; i++) { 1117 if (create_cond_branch(&branch_insn, &image[branches[i]], 1118 (unsigned long)&image[ctx->idx], COND_NE << 16)) { 1119 ret = -EINVAL; 1120 goto cleanup; 1121 } 1122 1123 image[branches[i]] = ppc_inst_val(branch_insn); 1124 } 1125 1126 /* set the "is_return" flag for fsession */ 1127 func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT); 1128 if (fsession_cnt) 1129 store_func_meta(image, ctx, func_meta, func_meta_off); 1130 1131 cookie_ctx_off = (regs_off - cookie_off) / 8; 1132 1133 for (i = 0; i < fexit->nr_links; i++) { 1134 if (bpf_prog_calls_session_cookie(fexit->links[i])) { 1135 u64 meta = func_meta | (cookie_ctx_off << BPF_TRAMP_COOKIE_INDEX_SHIFT); 1136 1137 store_func_meta(image, ctx, meta, func_meta_off); 1138 cookie_ctx_off--; 1139 } 1140 1141 if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, 1142 run_ctx_off, false)) { 1143 ret = -EINVAL; 1144 goto cleanup; 1145 } 1146 } 1147 1148 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1149 if (ro_image) /* image is NULL for dummy pass */ 1150 im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; 1151 PPC_LI_ADDR(_R3, im); 1152 ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 1153 (unsigned long)__bpf_tramp_exit); 1154 if (ret) 1155 goto cleanup; 1156 } 1157 1158 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1159 bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); 1160 1161 /* Restore return value of func_addr or fentry prog */ 1162 if (save_ret) 1163 EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); 1164 1165 /* Restore nv regs */ 1166 EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL)); 1167 EMIT(PPC_RAW_LL(_R25, _R1, nvr_off)); 1168 1169 /* Epilogue */ 1170 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 1171 EMIT(PPC_RAW_LD(_R2, _R1, 24)); 1172 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 1173 /* Skip the traced function and return to parent */ 1174 EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); 1175 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 1176 EMIT(PPC_RAW_MTLR(_R0)); 1177 EMIT(PPC_RAW_BLR()); 1178 } else { 1179 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 1180 EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); 1181 EMIT(PPC_RAW_MTLR(_R0)); 1182 EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); 1183 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 1184 EMIT(PPC_RAW_BLR()); 1185 } else { 1186 EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off)); 1187 EMIT(PPC_RAW_MTCTR(_R0)); 1188 EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); 1189 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 1190 EMIT(PPC_RAW_MTLR(_R0)); 1191 EMIT(PPC_RAW_BCTR()); 1192 } 1193 } 1194 1195 /* Make sure the trampoline generation logic doesn't overflow */ 1196 if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { 1197 ret = -EFAULT; 1198 goto cleanup; 1199 } 1200 ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4; 1201 1202 cleanup: 1203 kfree(branches); 1204 return ret; 1205 } 1206 1207 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 1208 struct bpf_tramp_links *tlinks, void *func_addr) 1209 { 1210 struct bpf_tramp_image im; 1211 int ret; 1212 1213 ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr); 1214 return ret; 1215 } 1216 1217 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, 1218 const struct btf_func_model *m, u32 flags, 1219 struct bpf_tramp_links *tlinks, 1220 void *func_addr) 1221 { 1222 u32 size = image_end - image; 1223 void *rw_image, *tmp; 1224 int ret; 1225 1226 /* 1227 * rw_image doesn't need to be in module memory range, so we can 1228 * use kvmalloc. 1229 */ 1230 rw_image = kvmalloc(size, GFP_KERNEL); 1231 if (!rw_image) 1232 return -ENOMEM; 1233 1234 ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, 1235 flags, tlinks, func_addr); 1236 if (ret < 0) 1237 goto out; 1238 1239 if (bpf_jit_enable > 1) 1240 bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image); 1241 1242 tmp = bpf_arch_text_copy(image, rw_image, size); 1243 if (IS_ERR(tmp)) 1244 ret = PTR_ERR(tmp); 1245 1246 out: 1247 kvfree(rw_image); 1248 return ret; 1249 } 1250 1251 static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst) 1252 { 1253 ppc_inst_t org_inst; 1254 1255 if (copy_inst_from_kernel_nofault(&org_inst, ip)) { 1256 pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip); 1257 return -EFAULT; 1258 } 1259 1260 if (!ppc_inst_equal(org_inst, old_inst)) { 1261 pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", 1262 (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst)); 1263 return -EINVAL; 1264 } 1265 1266 if (ppc_inst_equal(old_inst, new_inst)) 1267 return 0; 1268 1269 return patch_instruction(ip, new_inst); 1270 } 1271 1272 static void do_isync(void *info __maybe_unused) 1273 { 1274 isync(); 1275 } 1276 1277 /* 1278 * A 3-step process for bpf prog entry: 1279 * 1. At bpf prog entry, a single nop/b: 1280 * bpf_func: 1281 * [nop|b] ool_stub 1282 * 2. Out-of-line stub: 1283 * ool_stub: 1284 * mflr r0 1285 * [b|bl] <bpf_prog>/<long_branch_stub> 1286 * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only 1287 * b bpf_func + 4 1288 * 3. Long branch stub: 1289 * long_branch_stub: 1290 * .long <branch_addr>/<dummy_tramp> 1291 * mflr r11 1292 * bcl 20,31,$+4 1293 * mflr r12 1294 * ld r12, -16(r12) 1295 * mtctr r12 1296 * mtlr r11 // needed to retain ftrace ABI 1297 * bctr 1298 * 1299 * dummy_tramp is used to reduce synchronization requirements. 1300 * 1301 * When attaching a bpf trampoline to a bpf prog, we do not need any 1302 * synchronization here since we always have a valid branch target regardless 1303 * of the order in which the above stores are seen. dummy_tramp ensures that 1304 * the long_branch stub goes to a valid destination on other cpus, even when 1305 * the branch to the long_branch stub is seen before the updated trampoline 1306 * address. 1307 * 1308 * However, when detaching a bpf trampoline from a bpf prog, or if changing 1309 * the bpf trampoline address, we need synchronization to ensure that other 1310 * cpus can no longer branch into the older trampoline so that it can be 1311 * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus 1312 * make forward progress, but we still need to ensure that other cpus 1313 * execute isync (or some CSI) so that they don't go back into the 1314 * trampoline again. 1315 */ 1316 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, 1317 enum bpf_text_poke_type new_t, void *old_addr, 1318 void *new_addr) 1319 { 1320 unsigned long bpf_func, bpf_func_end, size, offset; 1321 ppc_inst_t old_inst, new_inst; 1322 int ret = 0, branch_flags; 1323 char name[KSYM_NAME_LEN]; 1324 1325 if (IS_ENABLED(CONFIG_PPC32)) 1326 return -EOPNOTSUPP; 1327 1328 bpf_func = (unsigned long)ip; 1329 1330 /* We currently only support poking bpf programs */ 1331 if (!bpf_address_lookup(bpf_func, &size, &offset, name)) { 1332 pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); 1333 return -EOPNOTSUPP; 1334 } 1335 1336 /* 1337 * If we are not poking at bpf prog entry, then we are simply patching in/out 1338 * an unconditional branch instruction at im->ip_after_call 1339 */ 1340 if (offset) { 1341 if (old_t == BPF_MOD_CALL || new_t == BPF_MOD_CALL) { 1342 pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__, 1343 bpf_func); 1344 return -EOPNOTSUPP; 1345 } 1346 old_inst = ppc_inst(PPC_RAW_NOP()); 1347 if (old_addr) 1348 if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0)) 1349 return -ERANGE; 1350 new_inst = ppc_inst(PPC_RAW_NOP()); 1351 if (new_addr) 1352 if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0)) 1353 return -ERANGE; 1354 mutex_lock(&text_mutex); 1355 ret = bpf_modify_inst(ip, old_inst, new_inst); 1356 mutex_unlock(&text_mutex); 1357 1358 /* Make sure all cpus see the new instruction */ 1359 smp_call_function(do_isync, NULL, 1); 1360 return ret; 1361 } 1362 1363 bpf_func_end = bpf_func + size; 1364 1365 /* Address of the jmp/call instruction in the out-of-line stub */ 1366 ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4); 1367 1368 if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) { 1369 pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__, 1370 bpf_func); 1371 return -ERANGE; 1372 } 1373 1374 old_inst = ppc_inst(PPC_RAW_NOP()); 1375 branch_flags = old_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; 1376 if (old_addr) { 1377 if (is_offset_in_branch_range(ip - old_addr)) 1378 create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags); 1379 else 1380 create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, 1381 branch_flags); 1382 } 1383 new_inst = ppc_inst(PPC_RAW_NOP()); 1384 branch_flags = new_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; 1385 if (new_addr) { 1386 if (is_offset_in_branch_range(ip - new_addr)) 1387 create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags); 1388 else 1389 create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, 1390 branch_flags); 1391 } 1392 1393 mutex_lock(&text_mutex); 1394 1395 /* 1396 * 1. Update the address in the long branch stub: 1397 * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr 1398 * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here. 1399 */ 1400 if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) || 1401 (old_addr && !is_offset_in_branch_range(old_addr - ip))) 1402 ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL), 1403 (new_addr && !is_offset_in_branch_range(new_addr - ip)) ? 1404 (unsigned long)new_addr : (unsigned long)dummy_tramp); 1405 if (ret) 1406 goto out; 1407 1408 /* 2. Update the branch/call in the out-of-line stub */ 1409 ret = bpf_modify_inst(ip, old_inst, new_inst); 1410 if (ret) 1411 goto out; 1412 1413 /* 3. Update instruction at bpf prog entry */ 1414 ip = (void *)bpf_func; 1415 if (!old_addr || !new_addr) { 1416 if (!old_addr) { 1417 old_inst = ppc_inst(PPC_RAW_NOP()); 1418 create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); 1419 } else { 1420 new_inst = ppc_inst(PPC_RAW_NOP()); 1421 create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); 1422 } 1423 ret = bpf_modify_inst(ip, old_inst, new_inst); 1424 } 1425 1426 out: 1427 mutex_unlock(&text_mutex); 1428 1429 /* 1430 * Sync only if we are not attaching a trampoline to a bpf prog so the older 1431 * trampoline can be freed safely. 1432 */ 1433 if (old_addr) 1434 smp_call_function(do_isync, NULL, 1); 1435 1436 return ret; 1437 } 1438