1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * BPF JIT compiler for ARM64 4 * 5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> 6 */ 7 8 #define pr_fmt(fmt) "bpf_jit: " fmt 9 10 #include <linux/bitfield.h> 11 #include <linux/bpf.h> 12 #include <linux/filter.h> 13 #include <linux/memory.h> 14 #include <linux/printk.h> 15 #include <linux/slab.h> 16 17 #include <asm/asm-extable.h> 18 #include <asm/byteorder.h> 19 #include <asm/cacheflush.h> 20 #include <asm/debug-monitors.h> 21 #include <asm/insn.h> 22 #include <asm/patching.h> 23 #include <asm/set_memory.h> 24 25 #include "bpf_jit.h" 26 27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 29 #define TCALL_CNT (MAX_BPF_JIT_REG + 2) 30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) 31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) 32 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) 33 34 #define check_imm(bits, imm) do { \ 35 if ((((imm) > 0) && ((imm) >> (bits))) || \ 36 (((imm) < 0) && (~(imm) >> (bits)))) { \ 37 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 38 i, imm, imm); \ 39 return -EINVAL; \ 40 } \ 41 } while (0) 42 #define check_imm19(imm) check_imm(19, imm) 43 #define check_imm26(imm) check_imm(26, imm) 44 45 /* Map BPF registers to A64 registers */ 46 static const int bpf2a64[] = { 47 /* return value from in-kernel function, and exit value from eBPF */ 48 [BPF_REG_0] = A64_R(7), 49 /* arguments from eBPF program to in-kernel function */ 50 [BPF_REG_1] = A64_R(0), 51 [BPF_REG_2] = A64_R(1), 52 [BPF_REG_3] = A64_R(2), 53 [BPF_REG_4] = A64_R(3), 54 [BPF_REG_5] = A64_R(4), 55 /* callee saved registers that in-kernel function will preserve */ 56 [BPF_REG_6] = A64_R(19), 57 [BPF_REG_7] = A64_R(20), 58 [BPF_REG_8] = A64_R(21), 59 [BPF_REG_9] = A64_R(22), 60 /* read-only frame pointer to access stack */ 61 [BPF_REG_FP] = A64_R(25), 62 /* temporary registers for BPF JIT */ 63 [TMP_REG_1] = A64_R(10), 64 [TMP_REG_2] = A64_R(11), 65 [TMP_REG_3] = A64_R(12), 66 /* tail_call_cnt */ 67 [TCALL_CNT] = A64_R(26), 68 /* temporary register for blinding constants */ 69 [BPF_REG_AX] = A64_R(9), 70 [FP_BOTTOM] = A64_R(27), 71 /* callee saved register for kern_vm_start address */ 72 [ARENA_VM_START] = A64_R(28), 73 }; 74 75 struct jit_ctx { 76 const struct bpf_prog *prog; 77 int idx; 78 int epilogue_offset; 79 int *offset; 80 int exentry_idx; 81 __le32 *image; 82 __le32 *ro_image; 83 u32 stack_size; 84 int fpb_offset; 85 u64 user_vm_start; 86 }; 87 88 struct bpf_plt { 89 u32 insn_ldr; /* load target */ 90 u32 insn_br; /* branch to target */ 91 u64 target; /* target value */ 92 }; 93 94 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) 95 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) 96 97 static inline void emit(const u32 insn, struct jit_ctx *ctx) 98 { 99 if (ctx->image != NULL) 100 ctx->image[ctx->idx] = cpu_to_le32(insn); 101 102 ctx->idx++; 103 } 104 105 static inline void emit_a64_mov_i(const int is64, const int reg, 106 const s32 val, struct jit_ctx *ctx) 107 { 108 u16 hi = val >> 16; 109 u16 lo = val & 0xffff; 110 111 if (hi & 0x8000) { 112 if (hi == 0xffff) { 113 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); 114 } else { 115 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); 116 if (lo != 0xffff) 117 emit(A64_MOVK(is64, reg, lo, 0), ctx); 118 } 119 } else { 120 emit(A64_MOVZ(is64, reg, lo, 0), ctx); 121 if (hi) 122 emit(A64_MOVK(is64, reg, hi, 16), ctx); 123 } 124 } 125 126 static int i64_i16_blocks(const u64 val, bool inverse) 127 { 128 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 129 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 130 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 131 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); 132 } 133 134 static inline void emit_a64_mov_i64(const int reg, const u64 val, 135 struct jit_ctx *ctx) 136 { 137 u64 nrm_tmp = val, rev_tmp = ~val; 138 bool inverse; 139 int shift; 140 141 if (!(nrm_tmp >> 32)) 142 return emit_a64_mov_i(0, reg, (u32)val, ctx); 143 144 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); 145 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : 146 (fls64(nrm_tmp) - 1)), 16), 0); 147 if (inverse) 148 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); 149 else 150 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 151 shift -= 16; 152 while (shift >= 0) { 153 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) 154 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 155 shift -= 16; 156 } 157 } 158 159 static inline void emit_bti(u32 insn, struct jit_ctx *ctx) 160 { 161 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) 162 emit(insn, ctx); 163 } 164 165 /* 166 * Kernel addresses in the vmalloc space use at most 48 bits, and the 167 * remaining bits are guaranteed to be 0x1. So we can compose the address 168 * with a fixed length movn/movk/movk sequence. 169 */ 170 static inline void emit_addr_mov_i64(const int reg, const u64 val, 171 struct jit_ctx *ctx) 172 { 173 u64 tmp = val; 174 int shift = 0; 175 176 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx); 177 while (shift < 32) { 178 tmp >>= 16; 179 shift += 16; 180 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); 181 } 182 } 183 184 static inline void emit_call(u64 target, struct jit_ctx *ctx) 185 { 186 u8 tmp = bpf2a64[TMP_REG_1]; 187 188 emit_addr_mov_i64(tmp, target, ctx); 189 emit(A64_BLR(tmp), ctx); 190 } 191 192 static inline int bpf2a64_offset(int bpf_insn, int off, 193 const struct jit_ctx *ctx) 194 { 195 /* BPF JMP offset is relative to the next instruction */ 196 bpf_insn++; 197 /* 198 * Whereas arm64 branch instructions encode the offset 199 * from the branch itself, so we must subtract 1 from the 200 * instruction offset. 201 */ 202 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); 203 } 204 205 static void jit_fill_hole(void *area, unsigned int size) 206 { 207 __le32 *ptr; 208 /* We are guaranteed to have aligned memory. */ 209 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 210 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); 211 } 212 213 int bpf_arch_text_invalidate(void *dst, size_t len) 214 { 215 if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) 216 return -EINVAL; 217 218 return 0; 219 } 220 221 static inline int epilogue_offset(const struct jit_ctx *ctx) 222 { 223 int to = ctx->epilogue_offset; 224 int from = ctx->idx; 225 226 return to - from; 227 } 228 229 static bool is_addsub_imm(u32 imm) 230 { 231 /* Either imm12 or shifted imm12. */ 232 return !(imm & ~0xfff) || !(imm & ~0xfff000); 233 } 234 235 /* 236 * There are 3 types of AArch64 LDR/STR (immediate) instruction: 237 * Post-index, Pre-index, Unsigned offset. 238 * 239 * For BPF ldr/str, the "unsigned offset" type is sufficient. 240 * 241 * "Unsigned offset" type LDR(immediate) format: 242 * 243 * 3 2 1 0 244 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 245 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 246 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | 247 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 248 * scale 249 * 250 * "Unsigned offset" type STR(immediate) format: 251 * 3 2 1 0 252 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 253 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 254 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | 255 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 256 * scale 257 * 258 * The offset is calculated from imm12 and scale in the following way: 259 * 260 * offset = (u64)imm12 << scale 261 */ 262 static bool is_lsi_offset(int offset, int scale) 263 { 264 if (offset < 0) 265 return false; 266 267 if (offset > (0xFFF << scale)) 268 return false; 269 270 if (offset & ((1 << scale) - 1)) 271 return false; 272 273 return true; 274 } 275 276 /* generated prologue: 277 * bti c // if CONFIG_ARM64_BTI_KERNEL 278 * mov x9, lr 279 * nop // POKE_OFFSET 280 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL 281 * stp x29, lr, [sp, #-16]! 282 * mov x29, sp 283 * stp x19, x20, [sp, #-16]! 284 * stp x21, x22, [sp, #-16]! 285 * stp x25, x26, [sp, #-16]! 286 * stp x27, x28, [sp, #-16]! 287 * mov x25, sp 288 * mov tcc, #0 289 * // PROLOGUE_OFFSET 290 */ 291 292 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) 293 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) 294 295 /* Offset of nop instruction in bpf prog entry to be poked */ 296 #define POKE_OFFSET (BTI_INSNS + 1) 297 298 /* Tail call offset to jump into */ 299 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) 300 301 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, 302 bool is_exception_cb, u64 arena_vm_start) 303 { 304 const struct bpf_prog *prog = ctx->prog; 305 const bool is_main_prog = !bpf_is_subprog(prog); 306 const u8 r6 = bpf2a64[BPF_REG_6]; 307 const u8 r7 = bpf2a64[BPF_REG_7]; 308 const u8 r8 = bpf2a64[BPF_REG_8]; 309 const u8 r9 = bpf2a64[BPF_REG_9]; 310 const u8 fp = bpf2a64[BPF_REG_FP]; 311 const u8 tcc = bpf2a64[TCALL_CNT]; 312 const u8 fpb = bpf2a64[FP_BOTTOM]; 313 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 314 const int idx0 = ctx->idx; 315 int cur_offset; 316 317 /* 318 * BPF prog stack layout 319 * 320 * high 321 * original A64_SP => 0:+-----+ BPF prologue 322 * |FP/LR| 323 * current A64_FP => -16:+-----+ 324 * | ... | callee saved registers 325 * BPF fp register => -64:+-----+ <= (BPF_FP) 326 * | | 327 * | ... | BPF prog stack 328 * | | 329 * +-----+ <= (BPF_FP - prog->aux->stack_depth) 330 * |RSVD | padding 331 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) 332 * | | 333 * | ... | Function call stack 334 * | | 335 * +-----+ 336 * low 337 * 338 */ 339 340 /* bpf function may be invoked by 3 instruction types: 341 * 1. bl, attached via freplace to bpf prog via short jump 342 * 2. br, attached via freplace to bpf prog via long jump 343 * 3. blr, working as a function pointer, used by emit_call. 344 * So BTI_JC should used here to support both br and blr. 345 */ 346 emit_bti(A64_BTI_JC, ctx); 347 348 emit(A64_MOV(1, A64_R(9), A64_LR), ctx); 349 emit(A64_NOP, ctx); 350 351 if (!is_exception_cb) { 352 /* Sign lr */ 353 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 354 emit(A64_PACIASP, ctx); 355 /* Save FP and LR registers to stay align with ARM64 AAPCS */ 356 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 357 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 358 359 /* Save callee-saved registers */ 360 emit(A64_PUSH(r6, r7, A64_SP), ctx); 361 emit(A64_PUSH(r8, r9, A64_SP), ctx); 362 emit(A64_PUSH(fp, tcc, A64_SP), ctx); 363 emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); 364 } else { 365 /* 366 * Exception callback receives FP of Main Program as third 367 * parameter 368 */ 369 emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); 370 /* 371 * Main Program already pushed the frame record and the 372 * callee-saved registers. The exception callback will not push 373 * anything and re-use the main program's stack. 374 * 375 * 10 registers are on the stack 376 */ 377 emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); 378 } 379 380 /* Set up BPF prog stack base register */ 381 emit(A64_MOV(1, fp, A64_SP), ctx); 382 383 if (!ebpf_from_cbpf && is_main_prog) { 384 /* Initialize tail_call_cnt */ 385 emit(A64_MOVZ(1, tcc, 0, 0), ctx); 386 387 cur_offset = ctx->idx - idx0; 388 if (cur_offset != PROLOGUE_OFFSET) { 389 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", 390 cur_offset, PROLOGUE_OFFSET); 391 return -1; 392 } 393 394 /* BTI landing pad for the tail call, done with a BR */ 395 emit_bti(A64_BTI_J, ctx); 396 } 397 398 /* 399 * Program acting as exception boundary should save all ARM64 400 * Callee-saved registers as the exception callback needs to recover 401 * all ARM64 Callee-saved registers in its epilogue. 402 */ 403 if (prog->aux->exception_boundary) { 404 /* 405 * As we are pushing two more registers, BPF_FP should be moved 406 * 16 bytes 407 */ 408 emit(A64_SUB_I(1, fp, fp, 16), ctx); 409 emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); 410 } 411 412 emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); 413 414 /* Stack must be multiples of 16B */ 415 ctx->stack_size = round_up(prog->aux->stack_depth, 16); 416 417 /* Set up function call stack */ 418 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 419 420 if (arena_vm_start) 421 emit_a64_mov_i64(arena_vm_base, arena_vm_start, ctx); 422 423 return 0; 424 } 425 426 static int out_offset = -1; /* initialized on the first pass of build_body() */ 427 static int emit_bpf_tail_call(struct jit_ctx *ctx) 428 { 429 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 430 const u8 r2 = bpf2a64[BPF_REG_2]; 431 const u8 r3 = bpf2a64[BPF_REG_3]; 432 433 const u8 tmp = bpf2a64[TMP_REG_1]; 434 const u8 prg = bpf2a64[TMP_REG_2]; 435 const u8 tcc = bpf2a64[TCALL_CNT]; 436 const int idx0 = ctx->idx; 437 #define cur_offset (ctx->idx - idx0) 438 #define jmp_offset (out_offset - (cur_offset)) 439 size_t off; 440 441 /* if (index >= array->map.max_entries) 442 * goto out; 443 */ 444 off = offsetof(struct bpf_array, map.max_entries); 445 emit_a64_mov_i64(tmp, off, ctx); 446 emit(A64_LDR32(tmp, r2, tmp), ctx); 447 emit(A64_MOV(0, r3, r3), ctx); 448 emit(A64_CMP(0, r3, tmp), ctx); 449 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 450 451 /* 452 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) 453 * goto out; 454 * tail_call_cnt++; 455 */ 456 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); 457 emit(A64_CMP(1, tcc, tmp), ctx); 458 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 459 emit(A64_ADD_I(1, tcc, tcc, 1), ctx); 460 461 /* prog = array->ptrs[index]; 462 * if (prog == NULL) 463 * goto out; 464 */ 465 off = offsetof(struct bpf_array, ptrs); 466 emit_a64_mov_i64(tmp, off, ctx); 467 emit(A64_ADD(1, tmp, r2, tmp), ctx); 468 emit(A64_LSL(1, prg, r3, 3), ctx); 469 emit(A64_LDR64(prg, tmp, prg), ctx); 470 emit(A64_CBZ(1, prg, jmp_offset), ctx); 471 472 /* goto *(prog->bpf_func + prologue_offset); */ 473 off = offsetof(struct bpf_prog, bpf_func); 474 emit_a64_mov_i64(tmp, off, ctx); 475 emit(A64_LDR64(tmp, prg, tmp), ctx); 476 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); 477 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 478 emit(A64_BR(tmp), ctx); 479 480 /* out: */ 481 if (out_offset == -1) 482 out_offset = cur_offset; 483 if (cur_offset != out_offset) { 484 pr_err_once("tail_call out_offset = %d, expected %d!\n", 485 cur_offset, out_offset); 486 return -1; 487 } 488 return 0; 489 #undef cur_offset 490 #undef jmp_offset 491 } 492 493 #ifdef CONFIG_ARM64_LSE_ATOMICS 494 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 495 { 496 const u8 code = insn->code; 497 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 498 const u8 dst = bpf2a64[insn->dst_reg]; 499 const u8 src = bpf2a64[insn->src_reg]; 500 const u8 tmp = bpf2a64[TMP_REG_1]; 501 const u8 tmp2 = bpf2a64[TMP_REG_2]; 502 const bool isdw = BPF_SIZE(code) == BPF_DW; 503 const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; 504 const s16 off = insn->off; 505 u8 reg = dst; 506 507 if (off || arena) { 508 if (off) { 509 emit_a64_mov_i(1, tmp, off, ctx); 510 emit(A64_ADD(1, tmp, tmp, dst), ctx); 511 reg = tmp; 512 } 513 if (arena) { 514 emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); 515 reg = tmp; 516 } 517 } 518 519 switch (insn->imm) { 520 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 521 case BPF_ADD: 522 emit(A64_STADD(isdw, reg, src), ctx); 523 break; 524 case BPF_AND: 525 emit(A64_MVN(isdw, tmp2, src), ctx); 526 emit(A64_STCLR(isdw, reg, tmp2), ctx); 527 break; 528 case BPF_OR: 529 emit(A64_STSET(isdw, reg, src), ctx); 530 break; 531 case BPF_XOR: 532 emit(A64_STEOR(isdw, reg, src), ctx); 533 break; 534 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 535 case BPF_ADD | BPF_FETCH: 536 emit(A64_LDADDAL(isdw, src, reg, src), ctx); 537 break; 538 case BPF_AND | BPF_FETCH: 539 emit(A64_MVN(isdw, tmp2, src), ctx); 540 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); 541 break; 542 case BPF_OR | BPF_FETCH: 543 emit(A64_LDSETAL(isdw, src, reg, src), ctx); 544 break; 545 case BPF_XOR | BPF_FETCH: 546 emit(A64_LDEORAL(isdw, src, reg, src), ctx); 547 break; 548 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 549 case BPF_XCHG: 550 emit(A64_SWPAL(isdw, src, reg, src), ctx); 551 break; 552 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 553 case BPF_CMPXCHG: 554 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); 555 break; 556 default: 557 pr_err_once("unknown atomic op code %02x\n", insn->imm); 558 return -EINVAL; 559 } 560 561 return 0; 562 } 563 #else 564 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 565 { 566 return -EINVAL; 567 } 568 #endif 569 570 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 571 { 572 const u8 code = insn->code; 573 const u8 dst = bpf2a64[insn->dst_reg]; 574 const u8 src = bpf2a64[insn->src_reg]; 575 const u8 tmp = bpf2a64[TMP_REG_1]; 576 const u8 tmp2 = bpf2a64[TMP_REG_2]; 577 const u8 tmp3 = bpf2a64[TMP_REG_3]; 578 const int i = insn - ctx->prog->insnsi; 579 const s32 imm = insn->imm; 580 const s16 off = insn->off; 581 const bool isdw = BPF_SIZE(code) == BPF_DW; 582 u8 reg; 583 s32 jmp_offset; 584 585 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { 586 /* ll_sc based atomics don't support unsafe pointers yet. */ 587 pr_err_once("unknown atomic opcode %02x\n", code); 588 return -EINVAL; 589 } 590 591 if (!off) { 592 reg = dst; 593 } else { 594 emit_a64_mov_i(1, tmp, off, ctx); 595 emit(A64_ADD(1, tmp, tmp, dst), ctx); 596 reg = tmp; 597 } 598 599 if (imm == BPF_ADD || imm == BPF_AND || 600 imm == BPF_OR || imm == BPF_XOR) { 601 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 602 emit(A64_LDXR(isdw, tmp2, reg), ctx); 603 if (imm == BPF_ADD) 604 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); 605 else if (imm == BPF_AND) 606 emit(A64_AND(isdw, tmp2, tmp2, src), ctx); 607 else if (imm == BPF_OR) 608 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); 609 else 610 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); 611 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); 612 jmp_offset = -3; 613 check_imm19(jmp_offset); 614 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 615 } else if (imm == (BPF_ADD | BPF_FETCH) || 616 imm == (BPF_AND | BPF_FETCH) || 617 imm == (BPF_OR | BPF_FETCH) || 618 imm == (BPF_XOR | BPF_FETCH)) { 619 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 620 const u8 ax = bpf2a64[BPF_REG_AX]; 621 622 emit(A64_MOV(isdw, ax, src), ctx); 623 emit(A64_LDXR(isdw, src, reg), ctx); 624 if (imm == (BPF_ADD | BPF_FETCH)) 625 emit(A64_ADD(isdw, tmp2, src, ax), ctx); 626 else if (imm == (BPF_AND | BPF_FETCH)) 627 emit(A64_AND(isdw, tmp2, src, ax), ctx); 628 else if (imm == (BPF_OR | BPF_FETCH)) 629 emit(A64_ORR(isdw, tmp2, src, ax), ctx); 630 else 631 emit(A64_EOR(isdw, tmp2, src, ax), ctx); 632 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 633 jmp_offset = -3; 634 check_imm19(jmp_offset); 635 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 636 emit(A64_DMB_ISH, ctx); 637 } else if (imm == BPF_XCHG) { 638 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 639 emit(A64_MOV(isdw, tmp2, src), ctx); 640 emit(A64_LDXR(isdw, src, reg), ctx); 641 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 642 jmp_offset = -2; 643 check_imm19(jmp_offset); 644 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 645 emit(A64_DMB_ISH, ctx); 646 } else if (imm == BPF_CMPXCHG) { 647 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 648 const u8 r0 = bpf2a64[BPF_REG_0]; 649 650 emit(A64_MOV(isdw, tmp2, r0), ctx); 651 emit(A64_LDXR(isdw, r0, reg), ctx); 652 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); 653 jmp_offset = 4; 654 check_imm19(jmp_offset); 655 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); 656 emit(A64_STLXR(isdw, src, reg, tmp3), ctx); 657 jmp_offset = -4; 658 check_imm19(jmp_offset); 659 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 660 emit(A64_DMB_ISH, ctx); 661 } else { 662 pr_err_once("unknown atomic op code %02x\n", imm); 663 return -EINVAL; 664 } 665 666 return 0; 667 } 668 669 void dummy_tramp(void); 670 671 asm ( 672 " .pushsection .text, \"ax\", @progbits\n" 673 " .global dummy_tramp\n" 674 " .type dummy_tramp, %function\n" 675 "dummy_tramp:" 676 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) 677 " bti j\n" /* dummy_tramp is called via "br x10" */ 678 #endif 679 " mov x10, x30\n" 680 " mov x30, x9\n" 681 " ret x10\n" 682 " .size dummy_tramp, .-dummy_tramp\n" 683 " .popsection\n" 684 ); 685 686 /* build a plt initialized like this: 687 * 688 * plt: 689 * ldr tmp, target 690 * br tmp 691 * target: 692 * .quad dummy_tramp 693 * 694 * when a long jump trampoline is attached, target is filled with the 695 * trampoline address, and when the trampoline is removed, target is 696 * restored to dummy_tramp address. 697 */ 698 static void build_plt(struct jit_ctx *ctx) 699 { 700 const u8 tmp = bpf2a64[TMP_REG_1]; 701 struct bpf_plt *plt = NULL; 702 703 /* make sure target is 64-bit aligned */ 704 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) 705 emit(A64_NOP, ctx); 706 707 plt = (struct bpf_plt *)(ctx->image + ctx->idx); 708 /* plt is called via bl, no BTI needed here */ 709 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); 710 emit(A64_BR(tmp), ctx); 711 712 if (ctx->image) 713 plt->target = (u64)&dummy_tramp; 714 } 715 716 static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) 717 { 718 const u8 r0 = bpf2a64[BPF_REG_0]; 719 const u8 r6 = bpf2a64[BPF_REG_6]; 720 const u8 r7 = bpf2a64[BPF_REG_7]; 721 const u8 r8 = bpf2a64[BPF_REG_8]; 722 const u8 r9 = bpf2a64[BPF_REG_9]; 723 const u8 fp = bpf2a64[BPF_REG_FP]; 724 const u8 fpb = bpf2a64[FP_BOTTOM]; 725 726 /* We're done with BPF stack */ 727 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 728 729 /* 730 * Program acting as exception boundary pushes R23 and R24 in addition 731 * to BPF callee-saved registers. Exception callback uses the boundary 732 * program's stack frame, so recover these extra registers in the above 733 * two cases. 734 */ 735 if (ctx->prog->aux->exception_boundary || is_exception_cb) 736 emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); 737 738 /* Restore x27 and x28 */ 739 emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); 740 /* Restore fs (x25) and x26 */ 741 emit(A64_POP(fp, A64_R(26), A64_SP), ctx); 742 743 /* Restore callee-saved register */ 744 emit(A64_POP(r8, r9, A64_SP), ctx); 745 emit(A64_POP(r6, r7, A64_SP), ctx); 746 747 /* Restore FP/LR registers */ 748 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 749 750 /* Set return value */ 751 emit(A64_MOV(1, A64_R(0), r0), ctx); 752 753 /* Authenticate lr */ 754 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 755 emit(A64_AUTIASP, ctx); 756 757 emit(A64_RET(A64_LR), ctx); 758 } 759 760 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 761 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 762 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ 763 764 bool ex_handler_bpf(const struct exception_table_entry *ex, 765 struct pt_regs *regs) 766 { 767 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 768 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 769 770 if (dst_reg != DONT_CLEAR) 771 regs->regs[dst_reg] = 0; 772 regs->pc = (unsigned long)&ex->fixup - offset; 773 return true; 774 } 775 776 /* For accesses to BTF pointers, add an entry to the exception table */ 777 static int add_exception_handler(const struct bpf_insn *insn, 778 struct jit_ctx *ctx, 779 int dst_reg) 780 { 781 off_t ins_offset; 782 off_t fixup_offset; 783 unsigned long pc; 784 struct exception_table_entry *ex; 785 786 if (!ctx->image) 787 /* First pass */ 788 return 0; 789 790 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 791 BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 792 BPF_MODE(insn->code) != BPF_PROBE_MEM32 && 793 BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) 794 return 0; 795 796 if (!ctx->prog->aux->extable || 797 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) 798 return -EINVAL; 799 800 ex = &ctx->prog->aux->extable[ctx->exentry_idx]; 801 pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; 802 803 /* 804 * This is the relative offset of the instruction that may fault from 805 * the exception table itself. This will be written to the exception 806 * table and if this instruction faults, the destination register will 807 * be set to '0' and the execution will jump to the next instruction. 808 */ 809 ins_offset = pc - (long)&ex->insn; 810 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 811 return -ERANGE; 812 813 /* 814 * Since the extable follows the program, the fixup offset is always 815 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 816 * to keep things simple, and put the destination register in the upper 817 * bits. We don't need to worry about buildtime or runtime sort 818 * modifying the upper bits because the table is already sorted, and 819 * isn't part of the main exception table. 820 * 821 * The fixup_offset is set to the next instruction from the instruction 822 * that may fault. The execution will jump to this after handling the 823 * fault. 824 */ 825 fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); 826 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 827 return -ERANGE; 828 829 /* 830 * The offsets above have been calculated using the RO buffer but we 831 * need to use the R/W buffer for writes. 832 * switch ex to rw buffer for writing. 833 */ 834 ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); 835 836 ex->insn = ins_offset; 837 838 if (BPF_CLASS(insn->code) != BPF_LDX) 839 dst_reg = DONT_CLEAR; 840 841 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 842 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 843 844 ex->type = EX_TYPE_BPF; 845 846 ctx->exentry_idx++; 847 return 0; 848 } 849 850 /* JITs an eBPF instruction. 851 * Returns: 852 * 0 - successfully JITed an 8-byte eBPF instruction. 853 * >0 - successfully JITed a 16-byte eBPF instruction. 854 * <0 - failed to JIT. 855 */ 856 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, 857 bool extra_pass) 858 { 859 const u8 code = insn->code; 860 u8 dst = bpf2a64[insn->dst_reg]; 861 u8 src = bpf2a64[insn->src_reg]; 862 const u8 tmp = bpf2a64[TMP_REG_1]; 863 const u8 tmp2 = bpf2a64[TMP_REG_2]; 864 const u8 fp = bpf2a64[BPF_REG_FP]; 865 const u8 fpb = bpf2a64[FP_BOTTOM]; 866 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 867 const s16 off = insn->off; 868 const s32 imm = insn->imm; 869 const int i = insn - ctx->prog->insnsi; 870 const bool is64 = BPF_CLASS(code) == BPF_ALU64 || 871 BPF_CLASS(code) == BPF_JMP; 872 u8 jmp_cond; 873 s32 jmp_offset; 874 u32 a64_insn; 875 u8 src_adj; 876 u8 dst_adj; 877 int off_adj; 878 int ret; 879 bool sign_extend; 880 881 switch (code) { 882 /* dst = src */ 883 case BPF_ALU | BPF_MOV | BPF_X: 884 case BPF_ALU64 | BPF_MOV | BPF_X: 885 if (insn_is_cast_user(insn)) { 886 emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits 887 emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); 888 emit(A64_LSL(1, dst, dst, 32), ctx); 889 emit(A64_CBZ(1, tmp, 2), ctx); 890 emit(A64_ORR(1, tmp, dst, tmp), ctx); 891 emit(A64_MOV(1, dst, tmp), ctx); 892 break; 893 } else if (insn_is_mov_percpu_addr(insn)) { 894 if (dst != src) 895 emit(A64_MOV(1, dst, src), ctx); 896 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 897 emit(A64_MRS_TPIDR_EL2(tmp), ctx); 898 else 899 emit(A64_MRS_TPIDR_EL1(tmp), ctx); 900 emit(A64_ADD(1, dst, dst, tmp), ctx); 901 break; 902 } 903 switch (insn->off) { 904 case 0: 905 emit(A64_MOV(is64, dst, src), ctx); 906 break; 907 case 8: 908 emit(A64_SXTB(is64, dst, src), ctx); 909 break; 910 case 16: 911 emit(A64_SXTH(is64, dst, src), ctx); 912 break; 913 case 32: 914 emit(A64_SXTW(is64, dst, src), ctx); 915 break; 916 } 917 break; 918 /* dst = dst OP src */ 919 case BPF_ALU | BPF_ADD | BPF_X: 920 case BPF_ALU64 | BPF_ADD | BPF_X: 921 emit(A64_ADD(is64, dst, dst, src), ctx); 922 break; 923 case BPF_ALU | BPF_SUB | BPF_X: 924 case BPF_ALU64 | BPF_SUB | BPF_X: 925 emit(A64_SUB(is64, dst, dst, src), ctx); 926 break; 927 case BPF_ALU | BPF_AND | BPF_X: 928 case BPF_ALU64 | BPF_AND | BPF_X: 929 emit(A64_AND(is64, dst, dst, src), ctx); 930 break; 931 case BPF_ALU | BPF_OR | BPF_X: 932 case BPF_ALU64 | BPF_OR | BPF_X: 933 emit(A64_ORR(is64, dst, dst, src), ctx); 934 break; 935 case BPF_ALU | BPF_XOR | BPF_X: 936 case BPF_ALU64 | BPF_XOR | BPF_X: 937 emit(A64_EOR(is64, dst, dst, src), ctx); 938 break; 939 case BPF_ALU | BPF_MUL | BPF_X: 940 case BPF_ALU64 | BPF_MUL | BPF_X: 941 emit(A64_MUL(is64, dst, dst, src), ctx); 942 break; 943 case BPF_ALU | BPF_DIV | BPF_X: 944 case BPF_ALU64 | BPF_DIV | BPF_X: 945 if (!off) 946 emit(A64_UDIV(is64, dst, dst, src), ctx); 947 else 948 emit(A64_SDIV(is64, dst, dst, src), ctx); 949 break; 950 case BPF_ALU | BPF_MOD | BPF_X: 951 case BPF_ALU64 | BPF_MOD | BPF_X: 952 if (!off) 953 emit(A64_UDIV(is64, tmp, dst, src), ctx); 954 else 955 emit(A64_SDIV(is64, tmp, dst, src), ctx); 956 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 957 break; 958 case BPF_ALU | BPF_LSH | BPF_X: 959 case BPF_ALU64 | BPF_LSH | BPF_X: 960 emit(A64_LSLV(is64, dst, dst, src), ctx); 961 break; 962 case BPF_ALU | BPF_RSH | BPF_X: 963 case BPF_ALU64 | BPF_RSH | BPF_X: 964 emit(A64_LSRV(is64, dst, dst, src), ctx); 965 break; 966 case BPF_ALU | BPF_ARSH | BPF_X: 967 case BPF_ALU64 | BPF_ARSH | BPF_X: 968 emit(A64_ASRV(is64, dst, dst, src), ctx); 969 break; 970 /* dst = -dst */ 971 case BPF_ALU | BPF_NEG: 972 case BPF_ALU64 | BPF_NEG: 973 emit(A64_NEG(is64, dst, dst), ctx); 974 break; 975 /* dst = BSWAP##imm(dst) */ 976 case BPF_ALU | BPF_END | BPF_FROM_LE: 977 case BPF_ALU | BPF_END | BPF_FROM_BE: 978 case BPF_ALU64 | BPF_END | BPF_FROM_LE: 979 #ifdef CONFIG_CPU_BIG_ENDIAN 980 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) 981 goto emit_bswap_uxt; 982 #else /* !CONFIG_CPU_BIG_ENDIAN */ 983 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) 984 goto emit_bswap_uxt; 985 #endif 986 switch (imm) { 987 case 16: 988 emit(A64_REV16(is64, dst, dst), ctx); 989 /* zero-extend 16 bits into 64 bits */ 990 emit(A64_UXTH(is64, dst, dst), ctx); 991 break; 992 case 32: 993 emit(A64_REV32(0, dst, dst), ctx); 994 /* upper 32 bits already cleared */ 995 break; 996 case 64: 997 emit(A64_REV64(dst, dst), ctx); 998 break; 999 } 1000 break; 1001 emit_bswap_uxt: 1002 switch (imm) { 1003 case 16: 1004 /* zero-extend 16 bits into 64 bits */ 1005 emit(A64_UXTH(is64, dst, dst), ctx); 1006 break; 1007 case 32: 1008 /* zero-extend 32 bits into 64 bits */ 1009 emit(A64_UXTW(is64, dst, dst), ctx); 1010 break; 1011 case 64: 1012 /* nop */ 1013 break; 1014 } 1015 break; 1016 /* dst = imm */ 1017 case BPF_ALU | BPF_MOV | BPF_K: 1018 case BPF_ALU64 | BPF_MOV | BPF_K: 1019 emit_a64_mov_i(is64, dst, imm, ctx); 1020 break; 1021 /* dst = dst OP imm */ 1022 case BPF_ALU | BPF_ADD | BPF_K: 1023 case BPF_ALU64 | BPF_ADD | BPF_K: 1024 if (is_addsub_imm(imm)) { 1025 emit(A64_ADD_I(is64, dst, dst, imm), ctx); 1026 } else if (is_addsub_imm(-imm)) { 1027 emit(A64_SUB_I(is64, dst, dst, -imm), ctx); 1028 } else { 1029 emit_a64_mov_i(is64, tmp, imm, ctx); 1030 emit(A64_ADD(is64, dst, dst, tmp), ctx); 1031 } 1032 break; 1033 case BPF_ALU | BPF_SUB | BPF_K: 1034 case BPF_ALU64 | BPF_SUB | BPF_K: 1035 if (is_addsub_imm(imm)) { 1036 emit(A64_SUB_I(is64, dst, dst, imm), ctx); 1037 } else if (is_addsub_imm(-imm)) { 1038 emit(A64_ADD_I(is64, dst, dst, -imm), ctx); 1039 } else { 1040 emit_a64_mov_i(is64, tmp, imm, ctx); 1041 emit(A64_SUB(is64, dst, dst, tmp), ctx); 1042 } 1043 break; 1044 case BPF_ALU | BPF_AND | BPF_K: 1045 case BPF_ALU64 | BPF_AND | BPF_K: 1046 a64_insn = A64_AND_I(is64, dst, dst, imm); 1047 if (a64_insn != AARCH64_BREAK_FAULT) { 1048 emit(a64_insn, ctx); 1049 } else { 1050 emit_a64_mov_i(is64, tmp, imm, ctx); 1051 emit(A64_AND(is64, dst, dst, tmp), ctx); 1052 } 1053 break; 1054 case BPF_ALU | BPF_OR | BPF_K: 1055 case BPF_ALU64 | BPF_OR | BPF_K: 1056 a64_insn = A64_ORR_I(is64, dst, dst, imm); 1057 if (a64_insn != AARCH64_BREAK_FAULT) { 1058 emit(a64_insn, ctx); 1059 } else { 1060 emit_a64_mov_i(is64, tmp, imm, ctx); 1061 emit(A64_ORR(is64, dst, dst, tmp), ctx); 1062 } 1063 break; 1064 case BPF_ALU | BPF_XOR | BPF_K: 1065 case BPF_ALU64 | BPF_XOR | BPF_K: 1066 a64_insn = A64_EOR_I(is64, dst, dst, imm); 1067 if (a64_insn != AARCH64_BREAK_FAULT) { 1068 emit(a64_insn, ctx); 1069 } else { 1070 emit_a64_mov_i(is64, tmp, imm, ctx); 1071 emit(A64_EOR(is64, dst, dst, tmp), ctx); 1072 } 1073 break; 1074 case BPF_ALU | BPF_MUL | BPF_K: 1075 case BPF_ALU64 | BPF_MUL | BPF_K: 1076 emit_a64_mov_i(is64, tmp, imm, ctx); 1077 emit(A64_MUL(is64, dst, dst, tmp), ctx); 1078 break; 1079 case BPF_ALU | BPF_DIV | BPF_K: 1080 case BPF_ALU64 | BPF_DIV | BPF_K: 1081 emit_a64_mov_i(is64, tmp, imm, ctx); 1082 if (!off) 1083 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 1084 else 1085 emit(A64_SDIV(is64, dst, dst, tmp), ctx); 1086 break; 1087 case BPF_ALU | BPF_MOD | BPF_K: 1088 case BPF_ALU64 | BPF_MOD | BPF_K: 1089 emit_a64_mov_i(is64, tmp2, imm, ctx); 1090 if (!off) 1091 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 1092 else 1093 emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); 1094 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 1095 break; 1096 case BPF_ALU | BPF_LSH | BPF_K: 1097 case BPF_ALU64 | BPF_LSH | BPF_K: 1098 emit(A64_LSL(is64, dst, dst, imm), ctx); 1099 break; 1100 case BPF_ALU | BPF_RSH | BPF_K: 1101 case BPF_ALU64 | BPF_RSH | BPF_K: 1102 emit(A64_LSR(is64, dst, dst, imm), ctx); 1103 break; 1104 case BPF_ALU | BPF_ARSH | BPF_K: 1105 case BPF_ALU64 | BPF_ARSH | BPF_K: 1106 emit(A64_ASR(is64, dst, dst, imm), ctx); 1107 break; 1108 1109 /* JUMP off */ 1110 case BPF_JMP | BPF_JA: 1111 case BPF_JMP32 | BPF_JA: 1112 if (BPF_CLASS(code) == BPF_JMP) 1113 jmp_offset = bpf2a64_offset(i, off, ctx); 1114 else 1115 jmp_offset = bpf2a64_offset(i, imm, ctx); 1116 check_imm26(jmp_offset); 1117 emit(A64_B(jmp_offset), ctx); 1118 break; 1119 /* IF (dst COND src) JUMP off */ 1120 case BPF_JMP | BPF_JEQ | BPF_X: 1121 case BPF_JMP | BPF_JGT | BPF_X: 1122 case BPF_JMP | BPF_JLT | BPF_X: 1123 case BPF_JMP | BPF_JGE | BPF_X: 1124 case BPF_JMP | BPF_JLE | BPF_X: 1125 case BPF_JMP | BPF_JNE | BPF_X: 1126 case BPF_JMP | BPF_JSGT | BPF_X: 1127 case BPF_JMP | BPF_JSLT | BPF_X: 1128 case BPF_JMP | BPF_JSGE | BPF_X: 1129 case BPF_JMP | BPF_JSLE | BPF_X: 1130 case BPF_JMP32 | BPF_JEQ | BPF_X: 1131 case BPF_JMP32 | BPF_JGT | BPF_X: 1132 case BPF_JMP32 | BPF_JLT | BPF_X: 1133 case BPF_JMP32 | BPF_JGE | BPF_X: 1134 case BPF_JMP32 | BPF_JLE | BPF_X: 1135 case BPF_JMP32 | BPF_JNE | BPF_X: 1136 case BPF_JMP32 | BPF_JSGT | BPF_X: 1137 case BPF_JMP32 | BPF_JSLT | BPF_X: 1138 case BPF_JMP32 | BPF_JSGE | BPF_X: 1139 case BPF_JMP32 | BPF_JSLE | BPF_X: 1140 emit(A64_CMP(is64, dst, src), ctx); 1141 emit_cond_jmp: 1142 jmp_offset = bpf2a64_offset(i, off, ctx); 1143 check_imm19(jmp_offset); 1144 switch (BPF_OP(code)) { 1145 case BPF_JEQ: 1146 jmp_cond = A64_COND_EQ; 1147 break; 1148 case BPF_JGT: 1149 jmp_cond = A64_COND_HI; 1150 break; 1151 case BPF_JLT: 1152 jmp_cond = A64_COND_CC; 1153 break; 1154 case BPF_JGE: 1155 jmp_cond = A64_COND_CS; 1156 break; 1157 case BPF_JLE: 1158 jmp_cond = A64_COND_LS; 1159 break; 1160 case BPF_JSET: 1161 case BPF_JNE: 1162 jmp_cond = A64_COND_NE; 1163 break; 1164 case BPF_JSGT: 1165 jmp_cond = A64_COND_GT; 1166 break; 1167 case BPF_JSLT: 1168 jmp_cond = A64_COND_LT; 1169 break; 1170 case BPF_JSGE: 1171 jmp_cond = A64_COND_GE; 1172 break; 1173 case BPF_JSLE: 1174 jmp_cond = A64_COND_LE; 1175 break; 1176 default: 1177 return -EFAULT; 1178 } 1179 emit(A64_B_(jmp_cond, jmp_offset), ctx); 1180 break; 1181 case BPF_JMP | BPF_JSET | BPF_X: 1182 case BPF_JMP32 | BPF_JSET | BPF_X: 1183 emit(A64_TST(is64, dst, src), ctx); 1184 goto emit_cond_jmp; 1185 /* IF (dst COND imm) JUMP off */ 1186 case BPF_JMP | BPF_JEQ | BPF_K: 1187 case BPF_JMP | BPF_JGT | BPF_K: 1188 case BPF_JMP | BPF_JLT | BPF_K: 1189 case BPF_JMP | BPF_JGE | BPF_K: 1190 case BPF_JMP | BPF_JLE | BPF_K: 1191 case BPF_JMP | BPF_JNE | BPF_K: 1192 case BPF_JMP | BPF_JSGT | BPF_K: 1193 case BPF_JMP | BPF_JSLT | BPF_K: 1194 case BPF_JMP | BPF_JSGE | BPF_K: 1195 case BPF_JMP | BPF_JSLE | BPF_K: 1196 case BPF_JMP32 | BPF_JEQ | BPF_K: 1197 case BPF_JMP32 | BPF_JGT | BPF_K: 1198 case BPF_JMP32 | BPF_JLT | BPF_K: 1199 case BPF_JMP32 | BPF_JGE | BPF_K: 1200 case BPF_JMP32 | BPF_JLE | BPF_K: 1201 case BPF_JMP32 | BPF_JNE | BPF_K: 1202 case BPF_JMP32 | BPF_JSGT | BPF_K: 1203 case BPF_JMP32 | BPF_JSLT | BPF_K: 1204 case BPF_JMP32 | BPF_JSGE | BPF_K: 1205 case BPF_JMP32 | BPF_JSLE | BPF_K: 1206 if (is_addsub_imm(imm)) { 1207 emit(A64_CMP_I(is64, dst, imm), ctx); 1208 } else if (is_addsub_imm(-imm)) { 1209 emit(A64_CMN_I(is64, dst, -imm), ctx); 1210 } else { 1211 emit_a64_mov_i(is64, tmp, imm, ctx); 1212 emit(A64_CMP(is64, dst, tmp), ctx); 1213 } 1214 goto emit_cond_jmp; 1215 case BPF_JMP | BPF_JSET | BPF_K: 1216 case BPF_JMP32 | BPF_JSET | BPF_K: 1217 a64_insn = A64_TST_I(is64, dst, imm); 1218 if (a64_insn != AARCH64_BREAK_FAULT) { 1219 emit(a64_insn, ctx); 1220 } else { 1221 emit_a64_mov_i(is64, tmp, imm, ctx); 1222 emit(A64_TST(is64, dst, tmp), ctx); 1223 } 1224 goto emit_cond_jmp; 1225 /* function call */ 1226 case BPF_JMP | BPF_CALL: 1227 { 1228 const u8 r0 = bpf2a64[BPF_REG_0]; 1229 bool func_addr_fixed; 1230 u64 func_addr; 1231 u32 cpu_offset; 1232 1233 /* Implement helper call to bpf_get_smp_processor_id() inline */ 1234 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { 1235 cpu_offset = offsetof(struct thread_info, cpu); 1236 1237 emit(A64_MRS_SP_EL0(tmp), ctx); 1238 if (is_lsi_offset(cpu_offset, 2)) { 1239 emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); 1240 } else { 1241 emit_a64_mov_i(1, tmp2, cpu_offset, ctx); 1242 emit(A64_LDR32(r0, tmp, tmp2), ctx); 1243 } 1244 break; 1245 } 1246 1247 /* Implement helper call to bpf_get_current_task/_btf() inline */ 1248 if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task || 1249 insn->imm == BPF_FUNC_get_current_task_btf)) { 1250 emit(A64_MRS_SP_EL0(r0), ctx); 1251 break; 1252 } 1253 1254 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1255 &func_addr, &func_addr_fixed); 1256 if (ret < 0) 1257 return ret; 1258 emit_call(func_addr, ctx); 1259 emit(A64_MOV(1, r0, A64_R(0)), ctx); 1260 break; 1261 } 1262 /* tail call */ 1263 case BPF_JMP | BPF_TAIL_CALL: 1264 if (emit_bpf_tail_call(ctx)) 1265 return -EFAULT; 1266 break; 1267 /* function return */ 1268 case BPF_JMP | BPF_EXIT: 1269 /* Optimization: when last instruction is EXIT, 1270 simply fallthrough to epilogue. */ 1271 if (i == ctx->prog->len - 1) 1272 break; 1273 jmp_offset = epilogue_offset(ctx); 1274 check_imm26(jmp_offset); 1275 emit(A64_B(jmp_offset), ctx); 1276 break; 1277 1278 /* dst = imm64 */ 1279 case BPF_LD | BPF_IMM | BPF_DW: 1280 { 1281 const struct bpf_insn insn1 = insn[1]; 1282 u64 imm64; 1283 1284 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1285 if (bpf_pseudo_func(insn)) 1286 emit_addr_mov_i64(dst, imm64, ctx); 1287 else 1288 emit_a64_mov_i64(dst, imm64, ctx); 1289 1290 return 1; 1291 } 1292 1293 /* LDX: dst = (u64)*(unsigned size *)(src + off) */ 1294 case BPF_LDX | BPF_MEM | BPF_W: 1295 case BPF_LDX | BPF_MEM | BPF_H: 1296 case BPF_LDX | BPF_MEM | BPF_B: 1297 case BPF_LDX | BPF_MEM | BPF_DW: 1298 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1299 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1300 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1301 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1302 /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ 1303 case BPF_LDX | BPF_MEMSX | BPF_B: 1304 case BPF_LDX | BPF_MEMSX | BPF_H: 1305 case BPF_LDX | BPF_MEMSX | BPF_W: 1306 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1307 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1308 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1309 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1310 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1311 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1312 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1313 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1314 emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); 1315 src = tmp2; 1316 } 1317 if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { 1318 src_adj = fpb; 1319 off_adj = off + ctx->fpb_offset; 1320 } else { 1321 src_adj = src; 1322 off_adj = off; 1323 } 1324 sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || 1325 BPF_MODE(insn->code) == BPF_PROBE_MEMSX); 1326 switch (BPF_SIZE(code)) { 1327 case BPF_W: 1328 if (is_lsi_offset(off_adj, 2)) { 1329 if (sign_extend) 1330 emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); 1331 else 1332 emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1333 } else { 1334 emit_a64_mov_i(1, tmp, off, ctx); 1335 if (sign_extend) 1336 emit(A64_LDRSW(dst, src, tmp), ctx); 1337 else 1338 emit(A64_LDR32(dst, src, tmp), ctx); 1339 } 1340 break; 1341 case BPF_H: 1342 if (is_lsi_offset(off_adj, 1)) { 1343 if (sign_extend) 1344 emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); 1345 else 1346 emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1347 } else { 1348 emit_a64_mov_i(1, tmp, off, ctx); 1349 if (sign_extend) 1350 emit(A64_LDRSH(dst, src, tmp), ctx); 1351 else 1352 emit(A64_LDRH(dst, src, tmp), ctx); 1353 } 1354 break; 1355 case BPF_B: 1356 if (is_lsi_offset(off_adj, 0)) { 1357 if (sign_extend) 1358 emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); 1359 else 1360 emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1361 } else { 1362 emit_a64_mov_i(1, tmp, off, ctx); 1363 if (sign_extend) 1364 emit(A64_LDRSB(dst, src, tmp), ctx); 1365 else 1366 emit(A64_LDRB(dst, src, tmp), ctx); 1367 } 1368 break; 1369 case BPF_DW: 1370 if (is_lsi_offset(off_adj, 3)) { 1371 emit(A64_LDR64I(dst, src_adj, off_adj), ctx); 1372 } else { 1373 emit_a64_mov_i(1, tmp, off, ctx); 1374 emit(A64_LDR64(dst, src, tmp), ctx); 1375 } 1376 break; 1377 } 1378 1379 ret = add_exception_handler(insn, ctx, dst); 1380 if (ret) 1381 return ret; 1382 break; 1383 1384 /* speculation barrier */ 1385 case BPF_ST | BPF_NOSPEC: 1386 /* 1387 * Nothing required here. 1388 * 1389 * In case of arm64, we rely on the firmware mitigation of 1390 * Speculative Store Bypass as controlled via the ssbd kernel 1391 * parameter. Whenever the mitigation is enabled, it works 1392 * for all of the kernel code with no need to provide any 1393 * additional instructions. 1394 */ 1395 break; 1396 1397 /* ST: *(size *)(dst + off) = imm */ 1398 case BPF_ST | BPF_MEM | BPF_W: 1399 case BPF_ST | BPF_MEM | BPF_H: 1400 case BPF_ST | BPF_MEM | BPF_B: 1401 case BPF_ST | BPF_MEM | BPF_DW: 1402 case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1403 case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1404 case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1405 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1406 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1407 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); 1408 dst = tmp2; 1409 } 1410 if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { 1411 dst_adj = fpb; 1412 off_adj = off + ctx->fpb_offset; 1413 } else { 1414 dst_adj = dst; 1415 off_adj = off; 1416 } 1417 /* Load imm to a register then store it */ 1418 emit_a64_mov_i(1, tmp, imm, ctx); 1419 switch (BPF_SIZE(code)) { 1420 case BPF_W: 1421 if (is_lsi_offset(off_adj, 2)) { 1422 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); 1423 } else { 1424 emit_a64_mov_i(1, tmp2, off, ctx); 1425 emit(A64_STR32(tmp, dst, tmp2), ctx); 1426 } 1427 break; 1428 case BPF_H: 1429 if (is_lsi_offset(off_adj, 1)) { 1430 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); 1431 } else { 1432 emit_a64_mov_i(1, tmp2, off, ctx); 1433 emit(A64_STRH(tmp, dst, tmp2), ctx); 1434 } 1435 break; 1436 case BPF_B: 1437 if (is_lsi_offset(off_adj, 0)) { 1438 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); 1439 } else { 1440 emit_a64_mov_i(1, tmp2, off, ctx); 1441 emit(A64_STRB(tmp, dst, tmp2), ctx); 1442 } 1443 break; 1444 case BPF_DW: 1445 if (is_lsi_offset(off_adj, 3)) { 1446 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); 1447 } else { 1448 emit_a64_mov_i(1, tmp2, off, ctx); 1449 emit(A64_STR64(tmp, dst, tmp2), ctx); 1450 } 1451 break; 1452 } 1453 1454 ret = add_exception_handler(insn, ctx, dst); 1455 if (ret) 1456 return ret; 1457 break; 1458 1459 /* STX: *(size *)(dst + off) = src */ 1460 case BPF_STX | BPF_MEM | BPF_W: 1461 case BPF_STX | BPF_MEM | BPF_H: 1462 case BPF_STX | BPF_MEM | BPF_B: 1463 case BPF_STX | BPF_MEM | BPF_DW: 1464 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 1465 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 1466 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 1467 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 1468 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1469 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); 1470 dst = tmp2; 1471 } 1472 if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { 1473 dst_adj = fpb; 1474 off_adj = off + ctx->fpb_offset; 1475 } else { 1476 dst_adj = dst; 1477 off_adj = off; 1478 } 1479 switch (BPF_SIZE(code)) { 1480 case BPF_W: 1481 if (is_lsi_offset(off_adj, 2)) { 1482 emit(A64_STR32I(src, dst_adj, off_adj), ctx); 1483 } else { 1484 emit_a64_mov_i(1, tmp, off, ctx); 1485 emit(A64_STR32(src, dst, tmp), ctx); 1486 } 1487 break; 1488 case BPF_H: 1489 if (is_lsi_offset(off_adj, 1)) { 1490 emit(A64_STRHI(src, dst_adj, off_adj), ctx); 1491 } else { 1492 emit_a64_mov_i(1, tmp, off, ctx); 1493 emit(A64_STRH(src, dst, tmp), ctx); 1494 } 1495 break; 1496 case BPF_B: 1497 if (is_lsi_offset(off_adj, 0)) { 1498 emit(A64_STRBI(src, dst_adj, off_adj), ctx); 1499 } else { 1500 emit_a64_mov_i(1, tmp, off, ctx); 1501 emit(A64_STRB(src, dst, tmp), ctx); 1502 } 1503 break; 1504 case BPF_DW: 1505 if (is_lsi_offset(off_adj, 3)) { 1506 emit(A64_STR64I(src, dst_adj, off_adj), ctx); 1507 } else { 1508 emit_a64_mov_i(1, tmp, off, ctx); 1509 emit(A64_STR64(src, dst, tmp), ctx); 1510 } 1511 break; 1512 } 1513 1514 ret = add_exception_handler(insn, ctx, dst); 1515 if (ret) 1516 return ret; 1517 break; 1518 1519 case BPF_STX | BPF_ATOMIC | BPF_W: 1520 case BPF_STX | BPF_ATOMIC | BPF_DW: 1521 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: 1522 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: 1523 if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1524 ret = emit_lse_atomic(insn, ctx); 1525 else 1526 ret = emit_ll_sc_atomic(insn, ctx); 1527 if (ret) 1528 return ret; 1529 1530 ret = add_exception_handler(insn, ctx, dst); 1531 if (ret) 1532 return ret; 1533 break; 1534 1535 default: 1536 pr_err_once("unknown opcode %02x\n", code); 1537 return -EINVAL; 1538 } 1539 1540 return 0; 1541 } 1542 1543 /* 1544 * Return 0 if FP may change at runtime, otherwise find the minimum negative 1545 * offset to FP, converts it to positive number, and align down to 8 bytes. 1546 */ 1547 static int find_fpb_offset(struct bpf_prog *prog) 1548 { 1549 int i; 1550 int offset = 0; 1551 1552 for (i = 0; i < prog->len; i++) { 1553 const struct bpf_insn *insn = &prog->insnsi[i]; 1554 const u8 class = BPF_CLASS(insn->code); 1555 const u8 mode = BPF_MODE(insn->code); 1556 const u8 src = insn->src_reg; 1557 const u8 dst = insn->dst_reg; 1558 const s32 imm = insn->imm; 1559 const s16 off = insn->off; 1560 1561 switch (class) { 1562 case BPF_STX: 1563 case BPF_ST: 1564 /* fp holds atomic operation result */ 1565 if (class == BPF_STX && mode == BPF_ATOMIC && 1566 ((imm == BPF_XCHG || 1567 imm == (BPF_FETCH | BPF_ADD) || 1568 imm == (BPF_FETCH | BPF_AND) || 1569 imm == (BPF_FETCH | BPF_XOR) || 1570 imm == (BPF_FETCH | BPF_OR)) && 1571 src == BPF_REG_FP)) 1572 return 0; 1573 1574 if (mode == BPF_MEM && dst == BPF_REG_FP && 1575 off < offset) 1576 offset = insn->off; 1577 break; 1578 1579 case BPF_JMP32: 1580 case BPF_JMP: 1581 break; 1582 1583 case BPF_LDX: 1584 case BPF_LD: 1585 /* fp holds load result */ 1586 if (dst == BPF_REG_FP) 1587 return 0; 1588 1589 if (class == BPF_LDX && mode == BPF_MEM && 1590 src == BPF_REG_FP && off < offset) 1591 offset = off; 1592 break; 1593 1594 case BPF_ALU: 1595 case BPF_ALU64: 1596 default: 1597 /* fp holds ALU result */ 1598 if (dst == BPF_REG_FP) 1599 return 0; 1600 } 1601 } 1602 1603 if (offset < 0) { 1604 /* 1605 * safely be converted to a positive 'int', since insn->off 1606 * is 's16' 1607 */ 1608 offset = -offset; 1609 /* align down to 8 bytes */ 1610 offset = ALIGN_DOWN(offset, 8); 1611 } 1612 1613 return offset; 1614 } 1615 1616 static int build_body(struct jit_ctx *ctx, bool extra_pass) 1617 { 1618 const struct bpf_prog *prog = ctx->prog; 1619 int i; 1620 1621 /* 1622 * - offset[0] offset of the end of prologue, 1623 * start of the 1st instruction. 1624 * - offset[1] - offset of the end of 1st instruction, 1625 * start of the 2nd instruction 1626 * [....] 1627 * - offset[3] - offset of the end of 3rd instruction, 1628 * start of 4th instruction 1629 */ 1630 for (i = 0; i < prog->len; i++) { 1631 const struct bpf_insn *insn = &prog->insnsi[i]; 1632 int ret; 1633 1634 if (ctx->image == NULL) 1635 ctx->offset[i] = ctx->idx; 1636 ret = build_insn(insn, ctx, extra_pass); 1637 if (ret > 0) { 1638 i++; 1639 if (ctx->image == NULL) 1640 ctx->offset[i] = ctx->idx; 1641 continue; 1642 } 1643 if (ret) 1644 return ret; 1645 } 1646 /* 1647 * offset is allocated with prog->len + 1 so fill in 1648 * the last element with the offset after the last 1649 * instruction (end of program) 1650 */ 1651 if (ctx->image == NULL) 1652 ctx->offset[i] = ctx->idx; 1653 1654 return 0; 1655 } 1656 1657 static int validate_code(struct jit_ctx *ctx) 1658 { 1659 int i; 1660 1661 for (i = 0; i < ctx->idx; i++) { 1662 u32 a64_insn = le32_to_cpu(ctx->image[i]); 1663 1664 if (a64_insn == AARCH64_BREAK_FAULT) 1665 return -1; 1666 } 1667 return 0; 1668 } 1669 1670 static int validate_ctx(struct jit_ctx *ctx) 1671 { 1672 if (validate_code(ctx)) 1673 return -1; 1674 1675 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) 1676 return -1; 1677 1678 return 0; 1679 } 1680 1681 static inline void bpf_flush_icache(void *start, void *end) 1682 { 1683 flush_icache_range((unsigned long)start, (unsigned long)end); 1684 } 1685 1686 struct arm64_jit_data { 1687 struct bpf_binary_header *header; 1688 u8 *ro_image; 1689 struct bpf_binary_header *ro_header; 1690 struct jit_ctx ctx; 1691 }; 1692 1693 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1694 { 1695 int image_size, prog_size, extable_size, extable_align, extable_offset; 1696 struct bpf_prog *tmp, *orig_prog = prog; 1697 struct bpf_binary_header *header; 1698 struct bpf_binary_header *ro_header; 1699 struct arm64_jit_data *jit_data; 1700 bool was_classic = bpf_prog_was_classic(prog); 1701 bool tmp_blinded = false; 1702 bool extra_pass = false; 1703 struct jit_ctx ctx; 1704 u64 arena_vm_start; 1705 u8 *image_ptr; 1706 u8 *ro_image_ptr; 1707 1708 if (!prog->jit_requested) 1709 return orig_prog; 1710 1711 tmp = bpf_jit_blind_constants(prog); 1712 /* If blinding was requested and we failed during blinding, 1713 * we must fall back to the interpreter. 1714 */ 1715 if (IS_ERR(tmp)) 1716 return orig_prog; 1717 if (tmp != prog) { 1718 tmp_blinded = true; 1719 prog = tmp; 1720 } 1721 1722 arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); 1723 jit_data = prog->aux->jit_data; 1724 if (!jit_data) { 1725 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1726 if (!jit_data) { 1727 prog = orig_prog; 1728 goto out; 1729 } 1730 prog->aux->jit_data = jit_data; 1731 } 1732 if (jit_data->ctx.offset) { 1733 ctx = jit_data->ctx; 1734 ro_image_ptr = jit_data->ro_image; 1735 ro_header = jit_data->ro_header; 1736 header = jit_data->header; 1737 image_ptr = (void *)header + ((void *)ro_image_ptr 1738 - (void *)ro_header); 1739 extra_pass = true; 1740 prog_size = sizeof(u32) * ctx.idx; 1741 goto skip_init_ctx; 1742 } 1743 memset(&ctx, 0, sizeof(ctx)); 1744 ctx.prog = prog; 1745 1746 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); 1747 if (ctx.offset == NULL) { 1748 prog = orig_prog; 1749 goto out_off; 1750 } 1751 1752 ctx.fpb_offset = find_fpb_offset(prog); 1753 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); 1754 1755 /* 1756 * 1. Initial fake pass to compute ctx->idx and ctx->offset. 1757 * 1758 * BPF line info needs ctx->offset[i] to be the offset of 1759 * instruction[i] in jited image, so build prologue first. 1760 */ 1761 if (build_prologue(&ctx, was_classic, prog->aux->exception_cb, 1762 arena_vm_start)) { 1763 prog = orig_prog; 1764 goto out_off; 1765 } 1766 1767 if (build_body(&ctx, extra_pass)) { 1768 prog = orig_prog; 1769 goto out_off; 1770 } 1771 1772 ctx.epilogue_offset = ctx.idx; 1773 build_epilogue(&ctx, prog->aux->exception_cb); 1774 build_plt(&ctx); 1775 1776 extable_align = __alignof__(struct exception_table_entry); 1777 extable_size = prog->aux->num_exentries * 1778 sizeof(struct exception_table_entry); 1779 1780 /* Now we know the actual image size. */ 1781 prog_size = sizeof(u32) * ctx.idx; 1782 /* also allocate space for plt target */ 1783 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); 1784 image_size = extable_offset + extable_size; 1785 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, 1786 sizeof(u32), &header, &image_ptr, 1787 jit_fill_hole); 1788 if (!ro_header) { 1789 prog = orig_prog; 1790 goto out_off; 1791 } 1792 1793 /* 2. Now, the actual pass. */ 1794 1795 /* 1796 * Use the image(RW) for writing the JITed instructions. But also save 1797 * the ro_image(RX) for calculating the offsets in the image. The RW 1798 * image will be later copied to the RX image from where the program 1799 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the 1800 * final step. 1801 */ 1802 ctx.image = (__le32 *)image_ptr; 1803 ctx.ro_image = (__le32 *)ro_image_ptr; 1804 if (extable_size) 1805 prog->aux->extable = (void *)ro_image_ptr + extable_offset; 1806 skip_init_ctx: 1807 ctx.idx = 0; 1808 ctx.exentry_idx = 0; 1809 1810 build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start); 1811 1812 if (build_body(&ctx, extra_pass)) { 1813 prog = orig_prog; 1814 goto out_free_hdr; 1815 } 1816 1817 build_epilogue(&ctx, prog->aux->exception_cb); 1818 build_plt(&ctx); 1819 1820 /* 3. Extra pass to validate JITed code. */ 1821 if (validate_ctx(&ctx)) { 1822 prog = orig_prog; 1823 goto out_free_hdr; 1824 } 1825 1826 /* And we're done. */ 1827 if (bpf_jit_enable > 1) 1828 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 1829 1830 if (!prog->is_func || extra_pass) { 1831 if (extra_pass && ctx.idx != jit_data->ctx.idx) { 1832 pr_err_once("multi-func JIT bug %d != %d\n", 1833 ctx.idx, jit_data->ctx.idx); 1834 prog->bpf_func = NULL; 1835 prog->jited = 0; 1836 prog->jited_len = 0; 1837 goto out_free_hdr; 1838 } 1839 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { 1840 /* ro_header has been freed */ 1841 ro_header = NULL; 1842 prog = orig_prog; 1843 goto out_off; 1844 } 1845 /* 1846 * The instructions have now been copied to the ROX region from 1847 * where they will execute. Now the data cache has to be cleaned to 1848 * the PoU and the I-cache has to be invalidated for the VAs. 1849 */ 1850 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); 1851 } else { 1852 jit_data->ctx = ctx; 1853 jit_data->ro_image = ro_image_ptr; 1854 jit_data->header = header; 1855 jit_data->ro_header = ro_header; 1856 } 1857 1858 prog->bpf_func = (void *)ctx.ro_image; 1859 prog->jited = 1; 1860 prog->jited_len = prog_size; 1861 1862 if (!prog->is_func || extra_pass) { 1863 int i; 1864 1865 /* offset[prog->len] is the size of program */ 1866 for (i = 0; i <= prog->len; i++) 1867 ctx.offset[i] *= AARCH64_INSN_SIZE; 1868 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); 1869 out_off: 1870 kvfree(ctx.offset); 1871 kfree(jit_data); 1872 prog->aux->jit_data = NULL; 1873 } 1874 out: 1875 if (tmp_blinded) 1876 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1877 tmp : orig_prog); 1878 return prog; 1879 1880 out_free_hdr: 1881 if (header) { 1882 bpf_arch_text_copy(&ro_header->size, &header->size, 1883 sizeof(header->size)); 1884 bpf_jit_binary_pack_free(ro_header, header); 1885 } 1886 goto out_off; 1887 } 1888 1889 bool bpf_jit_supports_kfunc_call(void) 1890 { 1891 return true; 1892 } 1893 1894 void *bpf_arch_text_copy(void *dst, void *src, size_t len) 1895 { 1896 if (!aarch64_insn_copy(dst, src, len)) 1897 return ERR_PTR(-EINVAL); 1898 return dst; 1899 } 1900 1901 u64 bpf_jit_alloc_exec_limit(void) 1902 { 1903 return VMALLOC_END - VMALLOC_START; 1904 } 1905 1906 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 1907 bool bpf_jit_supports_subprog_tailcalls(void) 1908 { 1909 return true; 1910 } 1911 1912 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, 1913 int args_off, int retval_off, int run_ctx_off, 1914 bool save_ret) 1915 { 1916 __le32 *branch; 1917 u64 enter_prog; 1918 u64 exit_prog; 1919 struct bpf_prog *p = l->link.prog; 1920 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 1921 1922 enter_prog = (u64)bpf_trampoline_enter(p); 1923 exit_prog = (u64)bpf_trampoline_exit(p); 1924 1925 if (l->cookie == 0) { 1926 /* if cookie is zero, one instruction is enough to store it */ 1927 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); 1928 } else { 1929 emit_a64_mov_i64(A64_R(10), l->cookie, ctx); 1930 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), 1931 ctx); 1932 } 1933 1934 /* save p to callee saved register x19 to avoid loading p with mov_i64 1935 * each time. 1936 */ 1937 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); 1938 1939 /* arg1: prog */ 1940 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1941 /* arg2: &run_ctx */ 1942 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); 1943 1944 emit_call(enter_prog, ctx); 1945 1946 /* save return value to callee saved register x20 */ 1947 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); 1948 1949 /* if (__bpf_prog_enter(prog) == 0) 1950 * goto skip_exec_of_prog; 1951 */ 1952 branch = ctx->image + ctx->idx; 1953 emit(A64_NOP, ctx); 1954 1955 emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); 1956 if (!p->jited) 1957 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); 1958 1959 emit_call((const u64)p->bpf_func, ctx); 1960 1961 if (save_ret) 1962 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 1963 1964 if (ctx->image) { 1965 int offset = &ctx->image[ctx->idx] - branch; 1966 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); 1967 } 1968 1969 /* arg1: prog */ 1970 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1971 /* arg2: start time */ 1972 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); 1973 /* arg3: &run_ctx */ 1974 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); 1975 1976 emit_call(exit_prog, ctx); 1977 } 1978 1979 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, 1980 int args_off, int retval_off, int run_ctx_off, 1981 __le32 **branches) 1982 { 1983 int i; 1984 1985 /* The first fmod_ret program will receive a garbage return value. 1986 * Set this to 0 to avoid confusing the program. 1987 */ 1988 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); 1989 for (i = 0; i < tl->nr_links; i++) { 1990 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, 1991 run_ctx_off, true); 1992 /* if (*(u64 *)(sp + retval_off) != 0) 1993 * goto do_fexit; 1994 */ 1995 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); 1996 /* Save the location of branch, and generate a nop. 1997 * This nop will be replaced with a cbnz later. 1998 */ 1999 branches[i] = ctx->image + ctx->idx; 2000 emit(A64_NOP, ctx); 2001 } 2002 } 2003 2004 static void save_args(struct jit_ctx *ctx, int args_off, int nregs) 2005 { 2006 int i; 2007 2008 for (i = 0; i < nregs; i++) { 2009 emit(A64_STR64I(i, A64_SP, args_off), ctx); 2010 args_off += 8; 2011 } 2012 } 2013 2014 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) 2015 { 2016 int i; 2017 2018 for (i = 0; i < nregs; i++) { 2019 emit(A64_LDR64I(i, A64_SP, args_off), ctx); 2020 args_off += 8; 2021 } 2022 } 2023 2024 /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). 2025 * 2026 * bpf prog and function entry before bpf trampoline hooked: 2027 * mov x9, lr 2028 * nop 2029 * 2030 * bpf prog and function entry after bpf trampoline hooked: 2031 * mov x9, lr 2032 * bl <bpf_trampoline or plt> 2033 * 2034 */ 2035 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, 2036 struct bpf_tramp_links *tlinks, void *func_addr, 2037 int nregs, u32 flags) 2038 { 2039 int i; 2040 int stack_size; 2041 int retaddr_off; 2042 int regs_off; 2043 int retval_off; 2044 int args_off; 2045 int nregs_off; 2046 int ip_off; 2047 int run_ctx_off; 2048 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 2049 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 2050 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 2051 bool save_ret; 2052 __le32 **branches = NULL; 2053 2054 /* trampoline stack layout: 2055 * [ parent ip ] 2056 * [ FP ] 2057 * SP + retaddr_off [ self ip ] 2058 * [ FP ] 2059 * 2060 * [ padding ] align SP to multiples of 16 2061 * 2062 * [ x20 ] callee saved reg x20 2063 * SP + regs_off [ x19 ] callee saved reg x19 2064 * 2065 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 2066 * BPF_TRAMP_F_RET_FENTRY_RET 2067 * 2068 * [ arg reg N ] 2069 * [ ... ] 2070 * SP + args_off [ arg reg 1 ] 2071 * 2072 * SP + nregs_off [ arg regs count ] 2073 * 2074 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag 2075 * 2076 * SP + run_ctx_off [ bpf_tramp_run_ctx ] 2077 */ 2078 2079 stack_size = 0; 2080 run_ctx_off = stack_size; 2081 /* room for bpf_tramp_run_ctx */ 2082 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 2083 2084 ip_off = stack_size; 2085 /* room for IP address argument */ 2086 if (flags & BPF_TRAMP_F_IP_ARG) 2087 stack_size += 8; 2088 2089 nregs_off = stack_size; 2090 /* room for args count */ 2091 stack_size += 8; 2092 2093 args_off = stack_size; 2094 /* room for args */ 2095 stack_size += nregs * 8; 2096 2097 /* room for return value */ 2098 retval_off = stack_size; 2099 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 2100 if (save_ret) 2101 stack_size += 8; 2102 2103 /* room for callee saved registers, currently x19 and x20 are used */ 2104 regs_off = stack_size; 2105 stack_size += 16; 2106 2107 /* round up to multiples of 16 to avoid SPAlignmentFault */ 2108 stack_size = round_up(stack_size, 16); 2109 2110 /* return address locates above FP */ 2111 retaddr_off = stack_size + 8; 2112 2113 /* bpf trampoline may be invoked by 3 instruction types: 2114 * 1. bl, attached to bpf prog or kernel function via short jump 2115 * 2. br, attached to bpf prog or kernel function via long jump 2116 * 3. blr, working as a function pointer, used by struct_ops. 2117 * So BTI_JC should used here to support both br and blr. 2118 */ 2119 emit_bti(A64_BTI_JC, ctx); 2120 2121 /* frame for parent function */ 2122 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); 2123 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 2124 2125 /* frame for patched function */ 2126 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 2127 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 2128 2129 /* allocate stack space */ 2130 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); 2131 2132 if (flags & BPF_TRAMP_F_IP_ARG) { 2133 /* save ip address of the traced function */ 2134 emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); 2135 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); 2136 } 2137 2138 /* save arg regs count*/ 2139 emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); 2140 emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); 2141 2142 /* save arg regs */ 2143 save_args(ctx, args_off, nregs); 2144 2145 /* save callee saved registers */ 2146 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); 2147 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 2148 2149 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2150 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); 2151 emit_call((const u64)__bpf_tramp_enter, ctx); 2152 } 2153 2154 for (i = 0; i < fentry->nr_links; i++) 2155 invoke_bpf_prog(ctx, fentry->links[i], args_off, 2156 retval_off, run_ctx_off, 2157 flags & BPF_TRAMP_F_RET_FENTRY_RET); 2158 2159 if (fmod_ret->nr_links) { 2160 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), 2161 GFP_KERNEL); 2162 if (!branches) 2163 return -ENOMEM; 2164 2165 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, 2166 run_ctx_off, branches); 2167 } 2168 2169 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2170 restore_args(ctx, args_off, nregs); 2171 /* call original func */ 2172 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); 2173 emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); 2174 emit(A64_RET(A64_R(10)), ctx); 2175 /* store return value */ 2176 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 2177 /* reserve a nop for bpf_tramp_image_put */ 2178 im->ip_after_call = ctx->ro_image + ctx->idx; 2179 emit(A64_NOP, ctx); 2180 } 2181 2182 /* update the branches saved in invoke_bpf_mod_ret with cbnz */ 2183 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { 2184 int offset = &ctx->image[ctx->idx] - branches[i]; 2185 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); 2186 } 2187 2188 for (i = 0; i < fexit->nr_links; i++) 2189 invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, 2190 run_ctx_off, false); 2191 2192 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2193 im->ip_epilogue = ctx->ro_image + ctx->idx; 2194 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); 2195 emit_call((const u64)__bpf_tramp_exit, ctx); 2196 } 2197 2198 if (flags & BPF_TRAMP_F_RESTORE_REGS) 2199 restore_args(ctx, args_off, nregs); 2200 2201 /* restore callee saved register x19 and x20 */ 2202 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); 2203 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 2204 2205 if (save_ret) 2206 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); 2207 2208 /* reset SP */ 2209 emit(A64_MOV(1, A64_SP, A64_FP), ctx); 2210 2211 /* pop frames */ 2212 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 2213 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); 2214 2215 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 2216 /* skip patched function, return to parent */ 2217 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 2218 emit(A64_RET(A64_R(9)), ctx); 2219 } else { 2220 /* return to patched function */ 2221 emit(A64_MOV(1, A64_R(10), A64_LR), ctx); 2222 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 2223 emit(A64_RET(A64_R(10)), ctx); 2224 } 2225 2226 kfree(branches); 2227 2228 return ctx->idx; 2229 } 2230 2231 static int btf_func_model_nregs(const struct btf_func_model *m) 2232 { 2233 int nregs = m->nr_args; 2234 int i; 2235 2236 /* extra registers needed for struct argument */ 2237 for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { 2238 /* The arg_size is at most 16 bytes, enforced by the verifier. */ 2239 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 2240 nregs += (m->arg_size[i] + 7) / 8 - 1; 2241 } 2242 2243 return nregs; 2244 } 2245 2246 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 2247 struct bpf_tramp_links *tlinks, void *func_addr) 2248 { 2249 struct jit_ctx ctx = { 2250 .image = NULL, 2251 .idx = 0, 2252 }; 2253 struct bpf_tramp_image im; 2254 int nregs, ret; 2255 2256 nregs = btf_func_model_nregs(m); 2257 /* the first 8 registers are used for arguments */ 2258 if (nregs > 8) 2259 return -ENOTSUPP; 2260 2261 ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); 2262 if (ret < 0) 2263 return ret; 2264 2265 return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; 2266 } 2267 2268 void *arch_alloc_bpf_trampoline(unsigned int size) 2269 { 2270 return bpf_prog_pack_alloc(size, jit_fill_hole); 2271 } 2272 2273 void arch_free_bpf_trampoline(void *image, unsigned int size) 2274 { 2275 bpf_prog_pack_free(image, size); 2276 } 2277 2278 int arch_protect_bpf_trampoline(void *image, unsigned int size) 2279 { 2280 return 0; 2281 } 2282 2283 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, 2284 void *ro_image_end, const struct btf_func_model *m, 2285 u32 flags, struct bpf_tramp_links *tlinks, 2286 void *func_addr) 2287 { 2288 int ret, nregs; 2289 void *image, *tmp; 2290 u32 size = ro_image_end - ro_image; 2291 2292 /* image doesn't need to be in module memory range, so we can 2293 * use kvmalloc. 2294 */ 2295 image = kvmalloc(size, GFP_KERNEL); 2296 if (!image) 2297 return -ENOMEM; 2298 2299 struct jit_ctx ctx = { 2300 .image = image, 2301 .ro_image = ro_image, 2302 .idx = 0, 2303 }; 2304 2305 nregs = btf_func_model_nregs(m); 2306 /* the first 8 registers are used for arguments */ 2307 if (nregs > 8) 2308 return -ENOTSUPP; 2309 2310 jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); 2311 ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); 2312 2313 if (ret > 0 && validate_code(&ctx) < 0) { 2314 ret = -EINVAL; 2315 goto out; 2316 } 2317 2318 if (ret > 0) 2319 ret *= AARCH64_INSN_SIZE; 2320 2321 tmp = bpf_arch_text_copy(ro_image, image, size); 2322 if (IS_ERR(tmp)) { 2323 ret = PTR_ERR(tmp); 2324 goto out; 2325 } 2326 2327 bpf_flush_icache(ro_image, ro_image + size); 2328 out: 2329 kvfree(image); 2330 return ret; 2331 } 2332 2333 static bool is_long_jump(void *ip, void *target) 2334 { 2335 long offset; 2336 2337 /* NULL target means this is a NOP */ 2338 if (!target) 2339 return false; 2340 2341 offset = (long)target - (long)ip; 2342 return offset < -SZ_128M || offset >= SZ_128M; 2343 } 2344 2345 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, 2346 void *addr, void *plt, u32 *insn) 2347 { 2348 void *target; 2349 2350 if (!addr) { 2351 *insn = aarch64_insn_gen_nop(); 2352 return 0; 2353 } 2354 2355 if (is_long_jump(ip, addr)) 2356 target = plt; 2357 else 2358 target = addr; 2359 2360 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, 2361 (unsigned long)target, 2362 type); 2363 2364 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; 2365 } 2366 2367 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf 2368 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr 2369 * or @new_addr is NULL, the old or new instruction is NOP. 2370 * 2371 * When @ip is the bpf prog entry, a bpf trampoline is being attached or 2372 * detached. Since bpf trampoline and bpf prog are allocated separately with 2373 * vmalloc, the address distance may exceed 128MB, the maximum branch range. 2374 * So long jump should be handled. 2375 * 2376 * When a bpf prog is constructed, a plt pointing to empty trampoline 2377 * dummy_tramp is placed at the end: 2378 * 2379 * bpf_prog: 2380 * mov x9, lr 2381 * nop // patchsite 2382 * ... 2383 * ret 2384 * 2385 * plt: 2386 * ldr x10, target 2387 * br x10 2388 * target: 2389 * .quad dummy_tramp // plt target 2390 * 2391 * This is also the state when no trampoline is attached. 2392 * 2393 * When a short-jump bpf trampoline is attached, the patchsite is patched 2394 * to a bl instruction to the trampoline directly: 2395 * 2396 * bpf_prog: 2397 * mov x9, lr 2398 * bl <short-jump bpf trampoline address> // patchsite 2399 * ... 2400 * ret 2401 * 2402 * plt: 2403 * ldr x10, target 2404 * br x10 2405 * target: 2406 * .quad dummy_tramp // plt target 2407 * 2408 * When a long-jump bpf trampoline is attached, the plt target is filled with 2409 * the trampoline address and the patchsite is patched to a bl instruction to 2410 * the plt: 2411 * 2412 * bpf_prog: 2413 * mov x9, lr 2414 * bl plt // patchsite 2415 * ... 2416 * ret 2417 * 2418 * plt: 2419 * ldr x10, target 2420 * br x10 2421 * target: 2422 * .quad <long-jump bpf trampoline address> // plt target 2423 * 2424 * The dummy_tramp is used to prevent another CPU from jumping to unknown 2425 * locations during the patching process, making the patching process easier. 2426 */ 2427 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 2428 void *old_addr, void *new_addr) 2429 { 2430 int ret; 2431 u32 old_insn; 2432 u32 new_insn; 2433 u32 replaced; 2434 struct bpf_plt *plt = NULL; 2435 unsigned long size = 0UL; 2436 unsigned long offset = ~0UL; 2437 enum aarch64_insn_branch_type branch_type; 2438 char namebuf[KSYM_NAME_LEN]; 2439 void *image = NULL; 2440 u64 plt_target = 0ULL; 2441 bool poking_bpf_entry; 2442 2443 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) 2444 /* Only poking bpf text is supported. Since kernel function 2445 * entry is set up by ftrace, we reply on ftrace to poke kernel 2446 * functions. 2447 */ 2448 return -ENOTSUPP; 2449 2450 image = ip - offset; 2451 /* zero offset means we're poking bpf prog entry */ 2452 poking_bpf_entry = (offset == 0UL); 2453 2454 /* bpf prog entry, find plt and the real patchsite */ 2455 if (poking_bpf_entry) { 2456 /* plt locates at the end of bpf prog */ 2457 plt = image + size - PLT_TARGET_OFFSET; 2458 2459 /* skip to the nop instruction in bpf prog entry: 2460 * bti c // if BTI enabled 2461 * mov x9, x30 2462 * nop 2463 */ 2464 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; 2465 } 2466 2467 /* long jump is only possible at bpf prog entry */ 2468 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && 2469 !poking_bpf_entry)) 2470 return -EINVAL; 2471 2472 if (poke_type == BPF_MOD_CALL) 2473 branch_type = AARCH64_INSN_BRANCH_LINK; 2474 else 2475 branch_type = AARCH64_INSN_BRANCH_NOLINK; 2476 2477 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) 2478 return -EFAULT; 2479 2480 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) 2481 return -EFAULT; 2482 2483 if (is_long_jump(ip, new_addr)) 2484 plt_target = (u64)new_addr; 2485 else if (is_long_jump(ip, old_addr)) 2486 /* if the old target is a long jump and the new target is not, 2487 * restore the plt target to dummy_tramp, so there is always a 2488 * legal and harmless address stored in plt target, and we'll 2489 * never jump from plt to an unknown place. 2490 */ 2491 plt_target = (u64)&dummy_tramp; 2492 2493 if (plt_target) { 2494 /* non-zero plt_target indicates we're patching a bpf prog, 2495 * which is read only. 2496 */ 2497 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) 2498 return -EFAULT; 2499 WRITE_ONCE(plt->target, plt_target); 2500 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); 2501 /* since plt target points to either the new trampoline 2502 * or dummy_tramp, even if another CPU reads the old plt 2503 * target value before fetching the bl instruction to plt, 2504 * it will be brought back by dummy_tramp, so no barrier is 2505 * required here. 2506 */ 2507 } 2508 2509 /* if the old target and the new target are both long jumps, no 2510 * patching is required 2511 */ 2512 if (old_insn == new_insn) 2513 return 0; 2514 2515 mutex_lock(&text_mutex); 2516 if (aarch64_insn_read(ip, &replaced)) { 2517 ret = -EFAULT; 2518 goto out; 2519 } 2520 2521 if (replaced != old_insn) { 2522 ret = -EFAULT; 2523 goto out; 2524 } 2525 2526 /* We call aarch64_insn_patch_text_nosync() to replace instruction 2527 * atomically, so no other CPUs will fetch a half-new and half-old 2528 * instruction. But there is chance that another CPU executes the 2529 * old instruction after the patching operation finishes (e.g., 2530 * pipeline not flushed, or icache not synchronized yet). 2531 * 2532 * 1. when a new trampoline is attached, it is not a problem for 2533 * different CPUs to jump to different trampolines temporarily. 2534 * 2535 * 2. when an old trampoline is freed, we should wait for all other 2536 * CPUs to exit the trampoline and make sure the trampoline is no 2537 * longer reachable, since bpf_tramp_image_put() function already 2538 * uses percpu_ref and task-based rcu to do the sync, no need to call 2539 * the sync version here, see bpf_tramp_image_put() for details. 2540 */ 2541 ret = aarch64_insn_patch_text_nosync(ip, new_insn); 2542 out: 2543 mutex_unlock(&text_mutex); 2544 2545 return ret; 2546 } 2547 2548 bool bpf_jit_supports_ptr_xchg(void) 2549 { 2550 return true; 2551 } 2552 2553 bool bpf_jit_supports_exceptions(void) 2554 { 2555 /* We unwind through both kernel frames starting from within bpf_throw 2556 * call and BPF frames. Therefore we require FP unwinder to be enabled 2557 * to walk kernel frames and reach BPF frames in the stack trace. 2558 * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y 2559 */ 2560 return true; 2561 } 2562 2563 bool bpf_jit_supports_arena(void) 2564 { 2565 return true; 2566 } 2567 2568 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) 2569 { 2570 if (!in_arena) 2571 return true; 2572 switch (insn->code) { 2573 case BPF_STX | BPF_ATOMIC | BPF_W: 2574 case BPF_STX | BPF_ATOMIC | BPF_DW: 2575 if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 2576 return false; 2577 } 2578 return true; 2579 } 2580 2581 bool bpf_jit_supports_percpu_insn(void) 2582 { 2583 return true; 2584 } 2585 2586 bool bpf_jit_inlines_helper_call(s32 imm) 2587 { 2588 switch (imm) { 2589 case BPF_FUNC_get_smp_processor_id: 2590 case BPF_FUNC_get_current_task: 2591 case BPF_FUNC_get_current_task_btf: 2592 return true; 2593 default: 2594 return false; 2595 } 2596 } 2597 2598 void bpf_jit_free(struct bpf_prog *prog) 2599 { 2600 if (prog->jited) { 2601 struct arm64_jit_data *jit_data = prog->aux->jit_data; 2602 struct bpf_binary_header *hdr; 2603 2604 /* 2605 * If we fail the final pass of JIT (from jit_subprogs), 2606 * the program may not be finalized yet. Call finalize here 2607 * before freeing it. 2608 */ 2609 if (jit_data) { 2610 bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, 2611 sizeof(jit_data->header->size)); 2612 kfree(jit_data); 2613 } 2614 hdr = bpf_jit_binary_pack_hdr(prog); 2615 bpf_jit_binary_pack_free(hdr, NULL); 2616 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 2617 } 2618 2619 bpf_prog_unlock_free(prog); 2620 } 2621