1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * BPF JIT compiler for ARM64 4 * 5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> 6 */ 7 8 #define pr_fmt(fmt) "bpf_jit: " fmt 9 10 #include <linux/bitfield.h> 11 #include <linux/bpf.h> 12 #include <linux/filter.h> 13 #include <linux/memory.h> 14 #include <linux/printk.h> 15 #include <linux/slab.h> 16 17 #include <asm/asm-extable.h> 18 #include <asm/byteorder.h> 19 #include <asm/cacheflush.h> 20 #include <asm/debug-monitors.h> 21 #include <asm/insn.h> 22 #include <asm/patching.h> 23 #include <asm/set_memory.h> 24 25 #include "bpf_jit.h" 26 27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 29 #define TCALL_CNT (MAX_BPF_JIT_REG + 2) 30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) 31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) 32 33 #define check_imm(bits, imm) do { \ 34 if ((((imm) > 0) && ((imm) >> (bits))) || \ 35 (((imm) < 0) && (~(imm) >> (bits)))) { \ 36 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 37 i, imm, imm); \ 38 return -EINVAL; \ 39 } \ 40 } while (0) 41 #define check_imm19(imm) check_imm(19, imm) 42 #define check_imm26(imm) check_imm(26, imm) 43 44 /* Map BPF registers to A64 registers */ 45 static const int bpf2a64[] = { 46 /* return value from in-kernel function, and exit value from eBPF */ 47 [BPF_REG_0] = A64_R(7), 48 /* arguments from eBPF program to in-kernel function */ 49 [BPF_REG_1] = A64_R(0), 50 [BPF_REG_2] = A64_R(1), 51 [BPF_REG_3] = A64_R(2), 52 [BPF_REG_4] = A64_R(3), 53 [BPF_REG_5] = A64_R(4), 54 /* callee saved registers that in-kernel function will preserve */ 55 [BPF_REG_6] = A64_R(19), 56 [BPF_REG_7] = A64_R(20), 57 [BPF_REG_8] = A64_R(21), 58 [BPF_REG_9] = A64_R(22), 59 /* read-only frame pointer to access stack */ 60 [BPF_REG_FP] = A64_R(25), 61 /* temporary registers for BPF JIT */ 62 [TMP_REG_1] = A64_R(10), 63 [TMP_REG_2] = A64_R(11), 64 [TMP_REG_3] = A64_R(12), 65 /* tail_call_cnt */ 66 [TCALL_CNT] = A64_R(26), 67 /* temporary register for blinding constants */ 68 [BPF_REG_AX] = A64_R(9), 69 [FP_BOTTOM] = A64_R(27), 70 }; 71 72 struct jit_ctx { 73 const struct bpf_prog *prog; 74 int idx; 75 int epilogue_offset; 76 int *offset; 77 int exentry_idx; 78 __le32 *image; 79 u32 stack_size; 80 int fpb_offset; 81 }; 82 83 struct bpf_plt { 84 u32 insn_ldr; /* load target */ 85 u32 insn_br; /* branch to target */ 86 u64 target; /* target value */ 87 }; 88 89 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) 90 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) 91 92 static inline void emit(const u32 insn, struct jit_ctx *ctx) 93 { 94 if (ctx->image != NULL) 95 ctx->image[ctx->idx] = cpu_to_le32(insn); 96 97 ctx->idx++; 98 } 99 100 static inline void emit_a64_mov_i(const int is64, const int reg, 101 const s32 val, struct jit_ctx *ctx) 102 { 103 u16 hi = val >> 16; 104 u16 lo = val & 0xffff; 105 106 if (hi & 0x8000) { 107 if (hi == 0xffff) { 108 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); 109 } else { 110 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); 111 if (lo != 0xffff) 112 emit(A64_MOVK(is64, reg, lo, 0), ctx); 113 } 114 } else { 115 emit(A64_MOVZ(is64, reg, lo, 0), ctx); 116 if (hi) 117 emit(A64_MOVK(is64, reg, hi, 16), ctx); 118 } 119 } 120 121 static int i64_i16_blocks(const u64 val, bool inverse) 122 { 123 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 124 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 125 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 126 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); 127 } 128 129 static inline void emit_a64_mov_i64(const int reg, const u64 val, 130 struct jit_ctx *ctx) 131 { 132 u64 nrm_tmp = val, rev_tmp = ~val; 133 bool inverse; 134 int shift; 135 136 if (!(nrm_tmp >> 32)) 137 return emit_a64_mov_i(0, reg, (u32)val, ctx); 138 139 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); 140 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : 141 (fls64(nrm_tmp) - 1)), 16), 0); 142 if (inverse) 143 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); 144 else 145 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 146 shift -= 16; 147 while (shift >= 0) { 148 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) 149 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 150 shift -= 16; 151 } 152 } 153 154 static inline void emit_bti(u32 insn, struct jit_ctx *ctx) 155 { 156 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) 157 emit(insn, ctx); 158 } 159 160 /* 161 * Kernel addresses in the vmalloc space use at most 48 bits, and the 162 * remaining bits are guaranteed to be 0x1. So we can compose the address 163 * with a fixed length movn/movk/movk sequence. 164 */ 165 static inline void emit_addr_mov_i64(const int reg, const u64 val, 166 struct jit_ctx *ctx) 167 { 168 u64 tmp = val; 169 int shift = 0; 170 171 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx); 172 while (shift < 32) { 173 tmp >>= 16; 174 shift += 16; 175 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); 176 } 177 } 178 179 static inline void emit_call(u64 target, struct jit_ctx *ctx) 180 { 181 u8 tmp = bpf2a64[TMP_REG_1]; 182 183 emit_addr_mov_i64(tmp, target, ctx); 184 emit(A64_BLR(tmp), ctx); 185 } 186 187 static inline int bpf2a64_offset(int bpf_insn, int off, 188 const struct jit_ctx *ctx) 189 { 190 /* BPF JMP offset is relative to the next instruction */ 191 bpf_insn++; 192 /* 193 * Whereas arm64 branch instructions encode the offset 194 * from the branch itself, so we must subtract 1 from the 195 * instruction offset. 196 */ 197 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); 198 } 199 200 static void jit_fill_hole(void *area, unsigned int size) 201 { 202 __le32 *ptr; 203 /* We are guaranteed to have aligned memory. */ 204 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 205 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); 206 } 207 208 static inline int epilogue_offset(const struct jit_ctx *ctx) 209 { 210 int to = ctx->epilogue_offset; 211 int from = ctx->idx; 212 213 return to - from; 214 } 215 216 static bool is_addsub_imm(u32 imm) 217 { 218 /* Either imm12 or shifted imm12. */ 219 return !(imm & ~0xfff) || !(imm & ~0xfff000); 220 } 221 222 /* 223 * There are 3 types of AArch64 LDR/STR (immediate) instruction: 224 * Post-index, Pre-index, Unsigned offset. 225 * 226 * For BPF ldr/str, the "unsigned offset" type is sufficient. 227 * 228 * "Unsigned offset" type LDR(immediate) format: 229 * 230 * 3 2 1 0 231 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 232 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 233 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | 234 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 235 * scale 236 * 237 * "Unsigned offset" type STR(immediate) format: 238 * 3 2 1 0 239 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 240 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 241 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | 242 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 243 * scale 244 * 245 * The offset is calculated from imm12 and scale in the following way: 246 * 247 * offset = (u64)imm12 << scale 248 */ 249 static bool is_lsi_offset(int offset, int scale) 250 { 251 if (offset < 0) 252 return false; 253 254 if (offset > (0xFFF << scale)) 255 return false; 256 257 if (offset & ((1 << scale) - 1)) 258 return false; 259 260 return true; 261 } 262 263 /* generated prologue: 264 * bti c // if CONFIG_ARM64_BTI_KERNEL 265 * mov x9, lr 266 * nop // POKE_OFFSET 267 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL 268 * stp x29, lr, [sp, #-16]! 269 * mov x29, sp 270 * stp x19, x20, [sp, #-16]! 271 * stp x21, x22, [sp, #-16]! 272 * stp x25, x26, [sp, #-16]! 273 * stp x27, x28, [sp, #-16]! 274 * mov x25, sp 275 * mov tcc, #0 276 * // PROLOGUE_OFFSET 277 */ 278 279 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) 280 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) 281 282 /* Offset of nop instruction in bpf prog entry to be poked */ 283 #define POKE_OFFSET (BTI_INSNS + 1) 284 285 /* Tail call offset to jump into */ 286 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) 287 288 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) 289 { 290 const struct bpf_prog *prog = ctx->prog; 291 const bool is_main_prog = prog->aux->func_idx == 0; 292 const u8 r6 = bpf2a64[BPF_REG_6]; 293 const u8 r7 = bpf2a64[BPF_REG_7]; 294 const u8 r8 = bpf2a64[BPF_REG_8]; 295 const u8 r9 = bpf2a64[BPF_REG_9]; 296 const u8 fp = bpf2a64[BPF_REG_FP]; 297 const u8 tcc = bpf2a64[TCALL_CNT]; 298 const u8 fpb = bpf2a64[FP_BOTTOM]; 299 const int idx0 = ctx->idx; 300 int cur_offset; 301 302 /* 303 * BPF prog stack layout 304 * 305 * high 306 * original A64_SP => 0:+-----+ BPF prologue 307 * |FP/LR| 308 * current A64_FP => -16:+-----+ 309 * | ... | callee saved registers 310 * BPF fp register => -64:+-----+ <= (BPF_FP) 311 * | | 312 * | ... | BPF prog stack 313 * | | 314 * +-----+ <= (BPF_FP - prog->aux->stack_depth) 315 * |RSVD | padding 316 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) 317 * | | 318 * | ... | Function call stack 319 * | | 320 * +-----+ 321 * low 322 * 323 */ 324 325 /* bpf function may be invoked by 3 instruction types: 326 * 1. bl, attached via freplace to bpf prog via short jump 327 * 2. br, attached via freplace to bpf prog via long jump 328 * 3. blr, working as a function pointer, used by emit_call. 329 * So BTI_JC should used here to support both br and blr. 330 */ 331 emit_bti(A64_BTI_JC, ctx); 332 333 emit(A64_MOV(1, A64_R(9), A64_LR), ctx); 334 emit(A64_NOP, ctx); 335 336 /* Sign lr */ 337 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 338 emit(A64_PACIASP, ctx); 339 340 /* Save FP and LR registers to stay align with ARM64 AAPCS */ 341 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 342 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 343 344 /* Save callee-saved registers */ 345 emit(A64_PUSH(r6, r7, A64_SP), ctx); 346 emit(A64_PUSH(r8, r9, A64_SP), ctx); 347 emit(A64_PUSH(fp, tcc, A64_SP), ctx); 348 emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); 349 350 /* Set up BPF prog stack base register */ 351 emit(A64_MOV(1, fp, A64_SP), ctx); 352 353 if (!ebpf_from_cbpf && is_main_prog) { 354 /* Initialize tail_call_cnt */ 355 emit(A64_MOVZ(1, tcc, 0, 0), ctx); 356 357 cur_offset = ctx->idx - idx0; 358 if (cur_offset != PROLOGUE_OFFSET) { 359 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", 360 cur_offset, PROLOGUE_OFFSET); 361 return -1; 362 } 363 364 /* BTI landing pad for the tail call, done with a BR */ 365 emit_bti(A64_BTI_J, ctx); 366 } 367 368 emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); 369 370 /* Stack must be multiples of 16B */ 371 ctx->stack_size = round_up(prog->aux->stack_depth, 16); 372 373 /* Set up function call stack */ 374 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 375 return 0; 376 } 377 378 static int out_offset = -1; /* initialized on the first pass of build_body() */ 379 static int emit_bpf_tail_call(struct jit_ctx *ctx) 380 { 381 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 382 const u8 r2 = bpf2a64[BPF_REG_2]; 383 const u8 r3 = bpf2a64[BPF_REG_3]; 384 385 const u8 tmp = bpf2a64[TMP_REG_1]; 386 const u8 prg = bpf2a64[TMP_REG_2]; 387 const u8 tcc = bpf2a64[TCALL_CNT]; 388 const int idx0 = ctx->idx; 389 #define cur_offset (ctx->idx - idx0) 390 #define jmp_offset (out_offset - (cur_offset)) 391 size_t off; 392 393 /* if (index >= array->map.max_entries) 394 * goto out; 395 */ 396 off = offsetof(struct bpf_array, map.max_entries); 397 emit_a64_mov_i64(tmp, off, ctx); 398 emit(A64_LDR32(tmp, r2, tmp), ctx); 399 emit(A64_MOV(0, r3, r3), ctx); 400 emit(A64_CMP(0, r3, tmp), ctx); 401 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 402 403 /* 404 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) 405 * goto out; 406 * tail_call_cnt++; 407 */ 408 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); 409 emit(A64_CMP(1, tcc, tmp), ctx); 410 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 411 emit(A64_ADD_I(1, tcc, tcc, 1), ctx); 412 413 /* prog = array->ptrs[index]; 414 * if (prog == NULL) 415 * goto out; 416 */ 417 off = offsetof(struct bpf_array, ptrs); 418 emit_a64_mov_i64(tmp, off, ctx); 419 emit(A64_ADD(1, tmp, r2, tmp), ctx); 420 emit(A64_LSL(1, prg, r3, 3), ctx); 421 emit(A64_LDR64(prg, tmp, prg), ctx); 422 emit(A64_CBZ(1, prg, jmp_offset), ctx); 423 424 /* goto *(prog->bpf_func + prologue_offset); */ 425 off = offsetof(struct bpf_prog, bpf_func); 426 emit_a64_mov_i64(tmp, off, ctx); 427 emit(A64_LDR64(tmp, prg, tmp), ctx); 428 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); 429 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 430 emit(A64_BR(tmp), ctx); 431 432 /* out: */ 433 if (out_offset == -1) 434 out_offset = cur_offset; 435 if (cur_offset != out_offset) { 436 pr_err_once("tail_call out_offset = %d, expected %d!\n", 437 cur_offset, out_offset); 438 return -1; 439 } 440 return 0; 441 #undef cur_offset 442 #undef jmp_offset 443 } 444 445 #ifdef CONFIG_ARM64_LSE_ATOMICS 446 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 447 { 448 const u8 code = insn->code; 449 const u8 dst = bpf2a64[insn->dst_reg]; 450 const u8 src = bpf2a64[insn->src_reg]; 451 const u8 tmp = bpf2a64[TMP_REG_1]; 452 const u8 tmp2 = bpf2a64[TMP_REG_2]; 453 const bool isdw = BPF_SIZE(code) == BPF_DW; 454 const s16 off = insn->off; 455 u8 reg; 456 457 if (!off) { 458 reg = dst; 459 } else { 460 emit_a64_mov_i(1, tmp, off, ctx); 461 emit(A64_ADD(1, tmp, tmp, dst), ctx); 462 reg = tmp; 463 } 464 465 switch (insn->imm) { 466 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 467 case BPF_ADD: 468 emit(A64_STADD(isdw, reg, src), ctx); 469 break; 470 case BPF_AND: 471 emit(A64_MVN(isdw, tmp2, src), ctx); 472 emit(A64_STCLR(isdw, reg, tmp2), ctx); 473 break; 474 case BPF_OR: 475 emit(A64_STSET(isdw, reg, src), ctx); 476 break; 477 case BPF_XOR: 478 emit(A64_STEOR(isdw, reg, src), ctx); 479 break; 480 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 481 case BPF_ADD | BPF_FETCH: 482 emit(A64_LDADDAL(isdw, src, reg, src), ctx); 483 break; 484 case BPF_AND | BPF_FETCH: 485 emit(A64_MVN(isdw, tmp2, src), ctx); 486 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); 487 break; 488 case BPF_OR | BPF_FETCH: 489 emit(A64_LDSETAL(isdw, src, reg, src), ctx); 490 break; 491 case BPF_XOR | BPF_FETCH: 492 emit(A64_LDEORAL(isdw, src, reg, src), ctx); 493 break; 494 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 495 case BPF_XCHG: 496 emit(A64_SWPAL(isdw, src, reg, src), ctx); 497 break; 498 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 499 case BPF_CMPXCHG: 500 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); 501 break; 502 default: 503 pr_err_once("unknown atomic op code %02x\n", insn->imm); 504 return -EINVAL; 505 } 506 507 return 0; 508 } 509 #else 510 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 511 { 512 return -EINVAL; 513 } 514 #endif 515 516 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 517 { 518 const u8 code = insn->code; 519 const u8 dst = bpf2a64[insn->dst_reg]; 520 const u8 src = bpf2a64[insn->src_reg]; 521 const u8 tmp = bpf2a64[TMP_REG_1]; 522 const u8 tmp2 = bpf2a64[TMP_REG_2]; 523 const u8 tmp3 = bpf2a64[TMP_REG_3]; 524 const int i = insn - ctx->prog->insnsi; 525 const s32 imm = insn->imm; 526 const s16 off = insn->off; 527 const bool isdw = BPF_SIZE(code) == BPF_DW; 528 u8 reg; 529 s32 jmp_offset; 530 531 if (!off) { 532 reg = dst; 533 } else { 534 emit_a64_mov_i(1, tmp, off, ctx); 535 emit(A64_ADD(1, tmp, tmp, dst), ctx); 536 reg = tmp; 537 } 538 539 if (imm == BPF_ADD || imm == BPF_AND || 540 imm == BPF_OR || imm == BPF_XOR) { 541 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 542 emit(A64_LDXR(isdw, tmp2, reg), ctx); 543 if (imm == BPF_ADD) 544 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); 545 else if (imm == BPF_AND) 546 emit(A64_AND(isdw, tmp2, tmp2, src), ctx); 547 else if (imm == BPF_OR) 548 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); 549 else 550 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); 551 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); 552 jmp_offset = -3; 553 check_imm19(jmp_offset); 554 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 555 } else if (imm == (BPF_ADD | BPF_FETCH) || 556 imm == (BPF_AND | BPF_FETCH) || 557 imm == (BPF_OR | BPF_FETCH) || 558 imm == (BPF_XOR | BPF_FETCH)) { 559 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 560 const u8 ax = bpf2a64[BPF_REG_AX]; 561 562 emit(A64_MOV(isdw, ax, src), ctx); 563 emit(A64_LDXR(isdw, src, reg), ctx); 564 if (imm == (BPF_ADD | BPF_FETCH)) 565 emit(A64_ADD(isdw, tmp2, src, ax), ctx); 566 else if (imm == (BPF_AND | BPF_FETCH)) 567 emit(A64_AND(isdw, tmp2, src, ax), ctx); 568 else if (imm == (BPF_OR | BPF_FETCH)) 569 emit(A64_ORR(isdw, tmp2, src, ax), ctx); 570 else 571 emit(A64_EOR(isdw, tmp2, src, ax), ctx); 572 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 573 jmp_offset = -3; 574 check_imm19(jmp_offset); 575 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 576 emit(A64_DMB_ISH, ctx); 577 } else if (imm == BPF_XCHG) { 578 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 579 emit(A64_MOV(isdw, tmp2, src), ctx); 580 emit(A64_LDXR(isdw, src, reg), ctx); 581 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 582 jmp_offset = -2; 583 check_imm19(jmp_offset); 584 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 585 emit(A64_DMB_ISH, ctx); 586 } else if (imm == BPF_CMPXCHG) { 587 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 588 const u8 r0 = bpf2a64[BPF_REG_0]; 589 590 emit(A64_MOV(isdw, tmp2, r0), ctx); 591 emit(A64_LDXR(isdw, r0, reg), ctx); 592 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); 593 jmp_offset = 4; 594 check_imm19(jmp_offset); 595 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); 596 emit(A64_STLXR(isdw, src, reg, tmp3), ctx); 597 jmp_offset = -4; 598 check_imm19(jmp_offset); 599 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 600 emit(A64_DMB_ISH, ctx); 601 } else { 602 pr_err_once("unknown atomic op code %02x\n", imm); 603 return -EINVAL; 604 } 605 606 return 0; 607 } 608 609 void dummy_tramp(void); 610 611 asm ( 612 " .pushsection .text, \"ax\", @progbits\n" 613 " .global dummy_tramp\n" 614 " .type dummy_tramp, %function\n" 615 "dummy_tramp:" 616 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) 617 " bti j\n" /* dummy_tramp is called via "br x10" */ 618 #endif 619 " mov x10, x30\n" 620 " mov x30, x9\n" 621 " ret x10\n" 622 " .size dummy_tramp, .-dummy_tramp\n" 623 " .popsection\n" 624 ); 625 626 /* build a plt initialized like this: 627 * 628 * plt: 629 * ldr tmp, target 630 * br tmp 631 * target: 632 * .quad dummy_tramp 633 * 634 * when a long jump trampoline is attached, target is filled with the 635 * trampoline address, and when the trampoline is removed, target is 636 * restored to dummy_tramp address. 637 */ 638 static void build_plt(struct jit_ctx *ctx) 639 { 640 const u8 tmp = bpf2a64[TMP_REG_1]; 641 struct bpf_plt *plt = NULL; 642 643 /* make sure target is 64-bit aligned */ 644 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) 645 emit(A64_NOP, ctx); 646 647 plt = (struct bpf_plt *)(ctx->image + ctx->idx); 648 /* plt is called via bl, no BTI needed here */ 649 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); 650 emit(A64_BR(tmp), ctx); 651 652 if (ctx->image) 653 plt->target = (u64)&dummy_tramp; 654 } 655 656 static void build_epilogue(struct jit_ctx *ctx) 657 { 658 const u8 r0 = bpf2a64[BPF_REG_0]; 659 const u8 r6 = bpf2a64[BPF_REG_6]; 660 const u8 r7 = bpf2a64[BPF_REG_7]; 661 const u8 r8 = bpf2a64[BPF_REG_8]; 662 const u8 r9 = bpf2a64[BPF_REG_9]; 663 const u8 fp = bpf2a64[BPF_REG_FP]; 664 const u8 fpb = bpf2a64[FP_BOTTOM]; 665 666 /* We're done with BPF stack */ 667 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 668 669 /* Restore x27 and x28 */ 670 emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); 671 /* Restore fs (x25) and x26 */ 672 emit(A64_POP(fp, A64_R(26), A64_SP), ctx); 673 674 /* Restore callee-saved register */ 675 emit(A64_POP(r8, r9, A64_SP), ctx); 676 emit(A64_POP(r6, r7, A64_SP), ctx); 677 678 /* Restore FP/LR registers */ 679 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 680 681 /* Set return value */ 682 emit(A64_MOV(1, A64_R(0), r0), ctx); 683 684 /* Authenticate lr */ 685 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 686 emit(A64_AUTIASP, ctx); 687 688 emit(A64_RET(A64_LR), ctx); 689 } 690 691 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 692 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 693 694 bool ex_handler_bpf(const struct exception_table_entry *ex, 695 struct pt_regs *regs) 696 { 697 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 698 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 699 700 regs->regs[dst_reg] = 0; 701 regs->pc = (unsigned long)&ex->fixup - offset; 702 return true; 703 } 704 705 /* For accesses to BTF pointers, add an entry to the exception table */ 706 static int add_exception_handler(const struct bpf_insn *insn, 707 struct jit_ctx *ctx, 708 int dst_reg) 709 { 710 off_t offset; 711 unsigned long pc; 712 struct exception_table_entry *ex; 713 714 if (!ctx->image) 715 /* First pass */ 716 return 0; 717 718 if (BPF_MODE(insn->code) != BPF_PROBE_MEM) 719 return 0; 720 721 if (!ctx->prog->aux->extable || 722 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) 723 return -EINVAL; 724 725 ex = &ctx->prog->aux->extable[ctx->exentry_idx]; 726 pc = (unsigned long)&ctx->image[ctx->idx - 1]; 727 728 offset = pc - (long)&ex->insn; 729 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 730 return -ERANGE; 731 ex->insn = offset; 732 733 /* 734 * Since the extable follows the program, the fixup offset is always 735 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 736 * to keep things simple, and put the destination register in the upper 737 * bits. We don't need to worry about buildtime or runtime sort 738 * modifying the upper bits because the table is already sorted, and 739 * isn't part of the main exception table. 740 */ 741 offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); 742 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) 743 return -ERANGE; 744 745 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | 746 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 747 748 ex->type = EX_TYPE_BPF; 749 750 ctx->exentry_idx++; 751 return 0; 752 } 753 754 /* JITs an eBPF instruction. 755 * Returns: 756 * 0 - successfully JITed an 8-byte eBPF instruction. 757 * >0 - successfully JITed a 16-byte eBPF instruction. 758 * <0 - failed to JIT. 759 */ 760 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, 761 bool extra_pass) 762 { 763 const u8 code = insn->code; 764 const u8 dst = bpf2a64[insn->dst_reg]; 765 const u8 src = bpf2a64[insn->src_reg]; 766 const u8 tmp = bpf2a64[TMP_REG_1]; 767 const u8 tmp2 = bpf2a64[TMP_REG_2]; 768 const u8 fp = bpf2a64[BPF_REG_FP]; 769 const u8 fpb = bpf2a64[FP_BOTTOM]; 770 const s16 off = insn->off; 771 const s32 imm = insn->imm; 772 const int i = insn - ctx->prog->insnsi; 773 const bool is64 = BPF_CLASS(code) == BPF_ALU64 || 774 BPF_CLASS(code) == BPF_JMP; 775 u8 jmp_cond; 776 s32 jmp_offset; 777 u32 a64_insn; 778 u8 src_adj; 779 u8 dst_adj; 780 int off_adj; 781 int ret; 782 783 switch (code) { 784 /* dst = src */ 785 case BPF_ALU | BPF_MOV | BPF_X: 786 case BPF_ALU64 | BPF_MOV | BPF_X: 787 emit(A64_MOV(is64, dst, src), ctx); 788 break; 789 /* dst = dst OP src */ 790 case BPF_ALU | BPF_ADD | BPF_X: 791 case BPF_ALU64 | BPF_ADD | BPF_X: 792 emit(A64_ADD(is64, dst, dst, src), ctx); 793 break; 794 case BPF_ALU | BPF_SUB | BPF_X: 795 case BPF_ALU64 | BPF_SUB | BPF_X: 796 emit(A64_SUB(is64, dst, dst, src), ctx); 797 break; 798 case BPF_ALU | BPF_AND | BPF_X: 799 case BPF_ALU64 | BPF_AND | BPF_X: 800 emit(A64_AND(is64, dst, dst, src), ctx); 801 break; 802 case BPF_ALU | BPF_OR | BPF_X: 803 case BPF_ALU64 | BPF_OR | BPF_X: 804 emit(A64_ORR(is64, dst, dst, src), ctx); 805 break; 806 case BPF_ALU | BPF_XOR | BPF_X: 807 case BPF_ALU64 | BPF_XOR | BPF_X: 808 emit(A64_EOR(is64, dst, dst, src), ctx); 809 break; 810 case BPF_ALU | BPF_MUL | BPF_X: 811 case BPF_ALU64 | BPF_MUL | BPF_X: 812 emit(A64_MUL(is64, dst, dst, src), ctx); 813 break; 814 case BPF_ALU | BPF_DIV | BPF_X: 815 case BPF_ALU64 | BPF_DIV | BPF_X: 816 emit(A64_UDIV(is64, dst, dst, src), ctx); 817 break; 818 case BPF_ALU | BPF_MOD | BPF_X: 819 case BPF_ALU64 | BPF_MOD | BPF_X: 820 emit(A64_UDIV(is64, tmp, dst, src), ctx); 821 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 822 break; 823 case BPF_ALU | BPF_LSH | BPF_X: 824 case BPF_ALU64 | BPF_LSH | BPF_X: 825 emit(A64_LSLV(is64, dst, dst, src), ctx); 826 break; 827 case BPF_ALU | BPF_RSH | BPF_X: 828 case BPF_ALU64 | BPF_RSH | BPF_X: 829 emit(A64_LSRV(is64, dst, dst, src), ctx); 830 break; 831 case BPF_ALU | BPF_ARSH | BPF_X: 832 case BPF_ALU64 | BPF_ARSH | BPF_X: 833 emit(A64_ASRV(is64, dst, dst, src), ctx); 834 break; 835 /* dst = -dst */ 836 case BPF_ALU | BPF_NEG: 837 case BPF_ALU64 | BPF_NEG: 838 emit(A64_NEG(is64, dst, dst), ctx); 839 break; 840 /* dst = BSWAP##imm(dst) */ 841 case BPF_ALU | BPF_END | BPF_FROM_LE: 842 case BPF_ALU | BPF_END | BPF_FROM_BE: 843 #ifdef CONFIG_CPU_BIG_ENDIAN 844 if (BPF_SRC(code) == BPF_FROM_BE) 845 goto emit_bswap_uxt; 846 #else /* !CONFIG_CPU_BIG_ENDIAN */ 847 if (BPF_SRC(code) == BPF_FROM_LE) 848 goto emit_bswap_uxt; 849 #endif 850 switch (imm) { 851 case 16: 852 emit(A64_REV16(is64, dst, dst), ctx); 853 /* zero-extend 16 bits into 64 bits */ 854 emit(A64_UXTH(is64, dst, dst), ctx); 855 break; 856 case 32: 857 emit(A64_REV32(is64, dst, dst), ctx); 858 /* upper 32 bits already cleared */ 859 break; 860 case 64: 861 emit(A64_REV64(dst, dst), ctx); 862 break; 863 } 864 break; 865 emit_bswap_uxt: 866 switch (imm) { 867 case 16: 868 /* zero-extend 16 bits into 64 bits */ 869 emit(A64_UXTH(is64, dst, dst), ctx); 870 break; 871 case 32: 872 /* zero-extend 32 bits into 64 bits */ 873 emit(A64_UXTW(is64, dst, dst), ctx); 874 break; 875 case 64: 876 /* nop */ 877 break; 878 } 879 break; 880 /* dst = imm */ 881 case BPF_ALU | BPF_MOV | BPF_K: 882 case BPF_ALU64 | BPF_MOV | BPF_K: 883 emit_a64_mov_i(is64, dst, imm, ctx); 884 break; 885 /* dst = dst OP imm */ 886 case BPF_ALU | BPF_ADD | BPF_K: 887 case BPF_ALU64 | BPF_ADD | BPF_K: 888 if (is_addsub_imm(imm)) { 889 emit(A64_ADD_I(is64, dst, dst, imm), ctx); 890 } else if (is_addsub_imm(-imm)) { 891 emit(A64_SUB_I(is64, dst, dst, -imm), ctx); 892 } else { 893 emit_a64_mov_i(is64, tmp, imm, ctx); 894 emit(A64_ADD(is64, dst, dst, tmp), ctx); 895 } 896 break; 897 case BPF_ALU | BPF_SUB | BPF_K: 898 case BPF_ALU64 | BPF_SUB | BPF_K: 899 if (is_addsub_imm(imm)) { 900 emit(A64_SUB_I(is64, dst, dst, imm), ctx); 901 } else if (is_addsub_imm(-imm)) { 902 emit(A64_ADD_I(is64, dst, dst, -imm), ctx); 903 } else { 904 emit_a64_mov_i(is64, tmp, imm, ctx); 905 emit(A64_SUB(is64, dst, dst, tmp), ctx); 906 } 907 break; 908 case BPF_ALU | BPF_AND | BPF_K: 909 case BPF_ALU64 | BPF_AND | BPF_K: 910 a64_insn = A64_AND_I(is64, dst, dst, imm); 911 if (a64_insn != AARCH64_BREAK_FAULT) { 912 emit(a64_insn, ctx); 913 } else { 914 emit_a64_mov_i(is64, tmp, imm, ctx); 915 emit(A64_AND(is64, dst, dst, tmp), ctx); 916 } 917 break; 918 case BPF_ALU | BPF_OR | BPF_K: 919 case BPF_ALU64 | BPF_OR | BPF_K: 920 a64_insn = A64_ORR_I(is64, dst, dst, imm); 921 if (a64_insn != AARCH64_BREAK_FAULT) { 922 emit(a64_insn, ctx); 923 } else { 924 emit_a64_mov_i(is64, tmp, imm, ctx); 925 emit(A64_ORR(is64, dst, dst, tmp), ctx); 926 } 927 break; 928 case BPF_ALU | BPF_XOR | BPF_K: 929 case BPF_ALU64 | BPF_XOR | BPF_K: 930 a64_insn = A64_EOR_I(is64, dst, dst, imm); 931 if (a64_insn != AARCH64_BREAK_FAULT) { 932 emit(a64_insn, ctx); 933 } else { 934 emit_a64_mov_i(is64, tmp, imm, ctx); 935 emit(A64_EOR(is64, dst, dst, tmp), ctx); 936 } 937 break; 938 case BPF_ALU | BPF_MUL | BPF_K: 939 case BPF_ALU64 | BPF_MUL | BPF_K: 940 emit_a64_mov_i(is64, tmp, imm, ctx); 941 emit(A64_MUL(is64, dst, dst, tmp), ctx); 942 break; 943 case BPF_ALU | BPF_DIV | BPF_K: 944 case BPF_ALU64 | BPF_DIV | BPF_K: 945 emit_a64_mov_i(is64, tmp, imm, ctx); 946 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 947 break; 948 case BPF_ALU | BPF_MOD | BPF_K: 949 case BPF_ALU64 | BPF_MOD | BPF_K: 950 emit_a64_mov_i(is64, tmp2, imm, ctx); 951 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 952 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 953 break; 954 case BPF_ALU | BPF_LSH | BPF_K: 955 case BPF_ALU64 | BPF_LSH | BPF_K: 956 emit(A64_LSL(is64, dst, dst, imm), ctx); 957 break; 958 case BPF_ALU | BPF_RSH | BPF_K: 959 case BPF_ALU64 | BPF_RSH | BPF_K: 960 emit(A64_LSR(is64, dst, dst, imm), ctx); 961 break; 962 case BPF_ALU | BPF_ARSH | BPF_K: 963 case BPF_ALU64 | BPF_ARSH | BPF_K: 964 emit(A64_ASR(is64, dst, dst, imm), ctx); 965 break; 966 967 /* JUMP off */ 968 case BPF_JMP | BPF_JA: 969 jmp_offset = bpf2a64_offset(i, off, ctx); 970 check_imm26(jmp_offset); 971 emit(A64_B(jmp_offset), ctx); 972 break; 973 /* IF (dst COND src) JUMP off */ 974 case BPF_JMP | BPF_JEQ | BPF_X: 975 case BPF_JMP | BPF_JGT | BPF_X: 976 case BPF_JMP | BPF_JLT | BPF_X: 977 case BPF_JMP | BPF_JGE | BPF_X: 978 case BPF_JMP | BPF_JLE | BPF_X: 979 case BPF_JMP | BPF_JNE | BPF_X: 980 case BPF_JMP | BPF_JSGT | BPF_X: 981 case BPF_JMP | BPF_JSLT | BPF_X: 982 case BPF_JMP | BPF_JSGE | BPF_X: 983 case BPF_JMP | BPF_JSLE | BPF_X: 984 case BPF_JMP32 | BPF_JEQ | BPF_X: 985 case BPF_JMP32 | BPF_JGT | BPF_X: 986 case BPF_JMP32 | BPF_JLT | BPF_X: 987 case BPF_JMP32 | BPF_JGE | BPF_X: 988 case BPF_JMP32 | BPF_JLE | BPF_X: 989 case BPF_JMP32 | BPF_JNE | BPF_X: 990 case BPF_JMP32 | BPF_JSGT | BPF_X: 991 case BPF_JMP32 | BPF_JSLT | BPF_X: 992 case BPF_JMP32 | BPF_JSGE | BPF_X: 993 case BPF_JMP32 | BPF_JSLE | BPF_X: 994 emit(A64_CMP(is64, dst, src), ctx); 995 emit_cond_jmp: 996 jmp_offset = bpf2a64_offset(i, off, ctx); 997 check_imm19(jmp_offset); 998 switch (BPF_OP(code)) { 999 case BPF_JEQ: 1000 jmp_cond = A64_COND_EQ; 1001 break; 1002 case BPF_JGT: 1003 jmp_cond = A64_COND_HI; 1004 break; 1005 case BPF_JLT: 1006 jmp_cond = A64_COND_CC; 1007 break; 1008 case BPF_JGE: 1009 jmp_cond = A64_COND_CS; 1010 break; 1011 case BPF_JLE: 1012 jmp_cond = A64_COND_LS; 1013 break; 1014 case BPF_JSET: 1015 case BPF_JNE: 1016 jmp_cond = A64_COND_NE; 1017 break; 1018 case BPF_JSGT: 1019 jmp_cond = A64_COND_GT; 1020 break; 1021 case BPF_JSLT: 1022 jmp_cond = A64_COND_LT; 1023 break; 1024 case BPF_JSGE: 1025 jmp_cond = A64_COND_GE; 1026 break; 1027 case BPF_JSLE: 1028 jmp_cond = A64_COND_LE; 1029 break; 1030 default: 1031 return -EFAULT; 1032 } 1033 emit(A64_B_(jmp_cond, jmp_offset), ctx); 1034 break; 1035 case BPF_JMP | BPF_JSET | BPF_X: 1036 case BPF_JMP32 | BPF_JSET | BPF_X: 1037 emit(A64_TST(is64, dst, src), ctx); 1038 goto emit_cond_jmp; 1039 /* IF (dst COND imm) JUMP off */ 1040 case BPF_JMP | BPF_JEQ | BPF_K: 1041 case BPF_JMP | BPF_JGT | BPF_K: 1042 case BPF_JMP | BPF_JLT | BPF_K: 1043 case BPF_JMP | BPF_JGE | BPF_K: 1044 case BPF_JMP | BPF_JLE | BPF_K: 1045 case BPF_JMP | BPF_JNE | BPF_K: 1046 case BPF_JMP | BPF_JSGT | BPF_K: 1047 case BPF_JMP | BPF_JSLT | BPF_K: 1048 case BPF_JMP | BPF_JSGE | BPF_K: 1049 case BPF_JMP | BPF_JSLE | BPF_K: 1050 case BPF_JMP32 | BPF_JEQ | BPF_K: 1051 case BPF_JMP32 | BPF_JGT | BPF_K: 1052 case BPF_JMP32 | BPF_JLT | BPF_K: 1053 case BPF_JMP32 | BPF_JGE | BPF_K: 1054 case BPF_JMP32 | BPF_JLE | BPF_K: 1055 case BPF_JMP32 | BPF_JNE | BPF_K: 1056 case BPF_JMP32 | BPF_JSGT | BPF_K: 1057 case BPF_JMP32 | BPF_JSLT | BPF_K: 1058 case BPF_JMP32 | BPF_JSGE | BPF_K: 1059 case BPF_JMP32 | BPF_JSLE | BPF_K: 1060 if (is_addsub_imm(imm)) { 1061 emit(A64_CMP_I(is64, dst, imm), ctx); 1062 } else if (is_addsub_imm(-imm)) { 1063 emit(A64_CMN_I(is64, dst, -imm), ctx); 1064 } else { 1065 emit_a64_mov_i(is64, tmp, imm, ctx); 1066 emit(A64_CMP(is64, dst, tmp), ctx); 1067 } 1068 goto emit_cond_jmp; 1069 case BPF_JMP | BPF_JSET | BPF_K: 1070 case BPF_JMP32 | BPF_JSET | BPF_K: 1071 a64_insn = A64_TST_I(is64, dst, imm); 1072 if (a64_insn != AARCH64_BREAK_FAULT) { 1073 emit(a64_insn, ctx); 1074 } else { 1075 emit_a64_mov_i(is64, tmp, imm, ctx); 1076 emit(A64_TST(is64, dst, tmp), ctx); 1077 } 1078 goto emit_cond_jmp; 1079 /* function call */ 1080 case BPF_JMP | BPF_CALL: 1081 { 1082 const u8 r0 = bpf2a64[BPF_REG_0]; 1083 bool func_addr_fixed; 1084 u64 func_addr; 1085 1086 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1087 &func_addr, &func_addr_fixed); 1088 if (ret < 0) 1089 return ret; 1090 emit_call(func_addr, ctx); 1091 emit(A64_MOV(1, r0, A64_R(0)), ctx); 1092 break; 1093 } 1094 /* tail call */ 1095 case BPF_JMP | BPF_TAIL_CALL: 1096 if (emit_bpf_tail_call(ctx)) 1097 return -EFAULT; 1098 break; 1099 /* function return */ 1100 case BPF_JMP | BPF_EXIT: 1101 /* Optimization: when last instruction is EXIT, 1102 simply fallthrough to epilogue. */ 1103 if (i == ctx->prog->len - 1) 1104 break; 1105 jmp_offset = epilogue_offset(ctx); 1106 check_imm26(jmp_offset); 1107 emit(A64_B(jmp_offset), ctx); 1108 break; 1109 1110 /* dst = imm64 */ 1111 case BPF_LD | BPF_IMM | BPF_DW: 1112 { 1113 const struct bpf_insn insn1 = insn[1]; 1114 u64 imm64; 1115 1116 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1117 if (bpf_pseudo_func(insn)) 1118 emit_addr_mov_i64(dst, imm64, ctx); 1119 else 1120 emit_a64_mov_i64(dst, imm64, ctx); 1121 1122 return 1; 1123 } 1124 1125 /* LDX: dst = *(size *)(src + off) */ 1126 case BPF_LDX | BPF_MEM | BPF_W: 1127 case BPF_LDX | BPF_MEM | BPF_H: 1128 case BPF_LDX | BPF_MEM | BPF_B: 1129 case BPF_LDX | BPF_MEM | BPF_DW: 1130 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1131 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1132 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1133 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1134 if (ctx->fpb_offset > 0 && src == fp) { 1135 src_adj = fpb; 1136 off_adj = off + ctx->fpb_offset; 1137 } else { 1138 src_adj = src; 1139 off_adj = off; 1140 } 1141 switch (BPF_SIZE(code)) { 1142 case BPF_W: 1143 if (is_lsi_offset(off_adj, 2)) { 1144 emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1145 } else { 1146 emit_a64_mov_i(1, tmp, off, ctx); 1147 emit(A64_LDR32(dst, src, tmp), ctx); 1148 } 1149 break; 1150 case BPF_H: 1151 if (is_lsi_offset(off_adj, 1)) { 1152 emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1153 } else { 1154 emit_a64_mov_i(1, tmp, off, ctx); 1155 emit(A64_LDRH(dst, src, tmp), ctx); 1156 } 1157 break; 1158 case BPF_B: 1159 if (is_lsi_offset(off_adj, 0)) { 1160 emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1161 } else { 1162 emit_a64_mov_i(1, tmp, off, ctx); 1163 emit(A64_LDRB(dst, src, tmp), ctx); 1164 } 1165 break; 1166 case BPF_DW: 1167 if (is_lsi_offset(off_adj, 3)) { 1168 emit(A64_LDR64I(dst, src_adj, off_adj), ctx); 1169 } else { 1170 emit_a64_mov_i(1, tmp, off, ctx); 1171 emit(A64_LDR64(dst, src, tmp), ctx); 1172 } 1173 break; 1174 } 1175 1176 ret = add_exception_handler(insn, ctx, dst); 1177 if (ret) 1178 return ret; 1179 break; 1180 1181 /* speculation barrier */ 1182 case BPF_ST | BPF_NOSPEC: 1183 /* 1184 * Nothing required here. 1185 * 1186 * In case of arm64, we rely on the firmware mitigation of 1187 * Speculative Store Bypass as controlled via the ssbd kernel 1188 * parameter. Whenever the mitigation is enabled, it works 1189 * for all of the kernel code with no need to provide any 1190 * additional instructions. 1191 */ 1192 break; 1193 1194 /* ST: *(size *)(dst + off) = imm */ 1195 case BPF_ST | BPF_MEM | BPF_W: 1196 case BPF_ST | BPF_MEM | BPF_H: 1197 case BPF_ST | BPF_MEM | BPF_B: 1198 case BPF_ST | BPF_MEM | BPF_DW: 1199 if (ctx->fpb_offset > 0 && dst == fp) { 1200 dst_adj = fpb; 1201 off_adj = off + ctx->fpb_offset; 1202 } else { 1203 dst_adj = dst; 1204 off_adj = off; 1205 } 1206 /* Load imm to a register then store it */ 1207 emit_a64_mov_i(1, tmp, imm, ctx); 1208 switch (BPF_SIZE(code)) { 1209 case BPF_W: 1210 if (is_lsi_offset(off_adj, 2)) { 1211 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); 1212 } else { 1213 emit_a64_mov_i(1, tmp2, off, ctx); 1214 emit(A64_STR32(tmp, dst, tmp2), ctx); 1215 } 1216 break; 1217 case BPF_H: 1218 if (is_lsi_offset(off_adj, 1)) { 1219 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); 1220 } else { 1221 emit_a64_mov_i(1, tmp2, off, ctx); 1222 emit(A64_STRH(tmp, dst, tmp2), ctx); 1223 } 1224 break; 1225 case BPF_B: 1226 if (is_lsi_offset(off_adj, 0)) { 1227 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); 1228 } else { 1229 emit_a64_mov_i(1, tmp2, off, ctx); 1230 emit(A64_STRB(tmp, dst, tmp2), ctx); 1231 } 1232 break; 1233 case BPF_DW: 1234 if (is_lsi_offset(off_adj, 3)) { 1235 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); 1236 } else { 1237 emit_a64_mov_i(1, tmp2, off, ctx); 1238 emit(A64_STR64(tmp, dst, tmp2), ctx); 1239 } 1240 break; 1241 } 1242 break; 1243 1244 /* STX: *(size *)(dst + off) = src */ 1245 case BPF_STX | BPF_MEM | BPF_W: 1246 case BPF_STX | BPF_MEM | BPF_H: 1247 case BPF_STX | BPF_MEM | BPF_B: 1248 case BPF_STX | BPF_MEM | BPF_DW: 1249 if (ctx->fpb_offset > 0 && dst == fp) { 1250 dst_adj = fpb; 1251 off_adj = off + ctx->fpb_offset; 1252 } else { 1253 dst_adj = dst; 1254 off_adj = off; 1255 } 1256 switch (BPF_SIZE(code)) { 1257 case BPF_W: 1258 if (is_lsi_offset(off_adj, 2)) { 1259 emit(A64_STR32I(src, dst_adj, off_adj), ctx); 1260 } else { 1261 emit_a64_mov_i(1, tmp, off, ctx); 1262 emit(A64_STR32(src, dst, tmp), ctx); 1263 } 1264 break; 1265 case BPF_H: 1266 if (is_lsi_offset(off_adj, 1)) { 1267 emit(A64_STRHI(src, dst_adj, off_adj), ctx); 1268 } else { 1269 emit_a64_mov_i(1, tmp, off, ctx); 1270 emit(A64_STRH(src, dst, tmp), ctx); 1271 } 1272 break; 1273 case BPF_B: 1274 if (is_lsi_offset(off_adj, 0)) { 1275 emit(A64_STRBI(src, dst_adj, off_adj), ctx); 1276 } else { 1277 emit_a64_mov_i(1, tmp, off, ctx); 1278 emit(A64_STRB(src, dst, tmp), ctx); 1279 } 1280 break; 1281 case BPF_DW: 1282 if (is_lsi_offset(off_adj, 3)) { 1283 emit(A64_STR64I(src, dst_adj, off_adj), ctx); 1284 } else { 1285 emit_a64_mov_i(1, tmp, off, ctx); 1286 emit(A64_STR64(src, dst, tmp), ctx); 1287 } 1288 break; 1289 } 1290 break; 1291 1292 case BPF_STX | BPF_ATOMIC | BPF_W: 1293 case BPF_STX | BPF_ATOMIC | BPF_DW: 1294 if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1295 ret = emit_lse_atomic(insn, ctx); 1296 else 1297 ret = emit_ll_sc_atomic(insn, ctx); 1298 if (ret) 1299 return ret; 1300 break; 1301 1302 default: 1303 pr_err_once("unknown opcode %02x\n", code); 1304 return -EINVAL; 1305 } 1306 1307 return 0; 1308 } 1309 1310 /* 1311 * Return 0 if FP may change at runtime, otherwise find the minimum negative 1312 * offset to FP, converts it to positive number, and align down to 8 bytes. 1313 */ 1314 static int find_fpb_offset(struct bpf_prog *prog) 1315 { 1316 int i; 1317 int offset = 0; 1318 1319 for (i = 0; i < prog->len; i++) { 1320 const struct bpf_insn *insn = &prog->insnsi[i]; 1321 const u8 class = BPF_CLASS(insn->code); 1322 const u8 mode = BPF_MODE(insn->code); 1323 const u8 src = insn->src_reg; 1324 const u8 dst = insn->dst_reg; 1325 const s32 imm = insn->imm; 1326 const s16 off = insn->off; 1327 1328 switch (class) { 1329 case BPF_STX: 1330 case BPF_ST: 1331 /* fp holds atomic operation result */ 1332 if (class == BPF_STX && mode == BPF_ATOMIC && 1333 ((imm == BPF_XCHG || 1334 imm == (BPF_FETCH | BPF_ADD) || 1335 imm == (BPF_FETCH | BPF_AND) || 1336 imm == (BPF_FETCH | BPF_XOR) || 1337 imm == (BPF_FETCH | BPF_OR)) && 1338 src == BPF_REG_FP)) 1339 return 0; 1340 1341 if (mode == BPF_MEM && dst == BPF_REG_FP && 1342 off < offset) 1343 offset = insn->off; 1344 break; 1345 1346 case BPF_JMP32: 1347 case BPF_JMP: 1348 break; 1349 1350 case BPF_LDX: 1351 case BPF_LD: 1352 /* fp holds load result */ 1353 if (dst == BPF_REG_FP) 1354 return 0; 1355 1356 if (class == BPF_LDX && mode == BPF_MEM && 1357 src == BPF_REG_FP && off < offset) 1358 offset = off; 1359 break; 1360 1361 case BPF_ALU: 1362 case BPF_ALU64: 1363 default: 1364 /* fp holds ALU result */ 1365 if (dst == BPF_REG_FP) 1366 return 0; 1367 } 1368 } 1369 1370 if (offset < 0) { 1371 /* 1372 * safely be converted to a positive 'int', since insn->off 1373 * is 's16' 1374 */ 1375 offset = -offset; 1376 /* align down to 8 bytes */ 1377 offset = ALIGN_DOWN(offset, 8); 1378 } 1379 1380 return offset; 1381 } 1382 1383 static int build_body(struct jit_ctx *ctx, bool extra_pass) 1384 { 1385 const struct bpf_prog *prog = ctx->prog; 1386 int i; 1387 1388 /* 1389 * - offset[0] offset of the end of prologue, 1390 * start of the 1st instruction. 1391 * - offset[1] - offset of the end of 1st instruction, 1392 * start of the 2nd instruction 1393 * [....] 1394 * - offset[3] - offset of the end of 3rd instruction, 1395 * start of 4th instruction 1396 */ 1397 for (i = 0; i < prog->len; i++) { 1398 const struct bpf_insn *insn = &prog->insnsi[i]; 1399 int ret; 1400 1401 if (ctx->image == NULL) 1402 ctx->offset[i] = ctx->idx; 1403 ret = build_insn(insn, ctx, extra_pass); 1404 if (ret > 0) { 1405 i++; 1406 if (ctx->image == NULL) 1407 ctx->offset[i] = ctx->idx; 1408 continue; 1409 } 1410 if (ret) 1411 return ret; 1412 } 1413 /* 1414 * offset is allocated with prog->len + 1 so fill in 1415 * the last element with the offset after the last 1416 * instruction (end of program) 1417 */ 1418 if (ctx->image == NULL) 1419 ctx->offset[i] = ctx->idx; 1420 1421 return 0; 1422 } 1423 1424 static int validate_code(struct jit_ctx *ctx) 1425 { 1426 int i; 1427 1428 for (i = 0; i < ctx->idx; i++) { 1429 u32 a64_insn = le32_to_cpu(ctx->image[i]); 1430 1431 if (a64_insn == AARCH64_BREAK_FAULT) 1432 return -1; 1433 } 1434 return 0; 1435 } 1436 1437 static int validate_ctx(struct jit_ctx *ctx) 1438 { 1439 if (validate_code(ctx)) 1440 return -1; 1441 1442 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) 1443 return -1; 1444 1445 return 0; 1446 } 1447 1448 static inline void bpf_flush_icache(void *start, void *end) 1449 { 1450 flush_icache_range((unsigned long)start, (unsigned long)end); 1451 } 1452 1453 struct arm64_jit_data { 1454 struct bpf_binary_header *header; 1455 u8 *image; 1456 struct jit_ctx ctx; 1457 }; 1458 1459 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1460 { 1461 int image_size, prog_size, extable_size, extable_align, extable_offset; 1462 struct bpf_prog *tmp, *orig_prog = prog; 1463 struct bpf_binary_header *header; 1464 struct arm64_jit_data *jit_data; 1465 bool was_classic = bpf_prog_was_classic(prog); 1466 bool tmp_blinded = false; 1467 bool extra_pass = false; 1468 struct jit_ctx ctx; 1469 u8 *image_ptr; 1470 1471 if (!prog->jit_requested) 1472 return orig_prog; 1473 1474 tmp = bpf_jit_blind_constants(prog); 1475 /* If blinding was requested and we failed during blinding, 1476 * we must fall back to the interpreter. 1477 */ 1478 if (IS_ERR(tmp)) 1479 return orig_prog; 1480 if (tmp != prog) { 1481 tmp_blinded = true; 1482 prog = tmp; 1483 } 1484 1485 jit_data = prog->aux->jit_data; 1486 if (!jit_data) { 1487 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1488 if (!jit_data) { 1489 prog = orig_prog; 1490 goto out; 1491 } 1492 prog->aux->jit_data = jit_data; 1493 } 1494 if (jit_data->ctx.offset) { 1495 ctx = jit_data->ctx; 1496 image_ptr = jit_data->image; 1497 header = jit_data->header; 1498 extra_pass = true; 1499 prog_size = sizeof(u32) * ctx.idx; 1500 goto skip_init_ctx; 1501 } 1502 memset(&ctx, 0, sizeof(ctx)); 1503 ctx.prog = prog; 1504 1505 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); 1506 if (ctx.offset == NULL) { 1507 prog = orig_prog; 1508 goto out_off; 1509 } 1510 1511 ctx.fpb_offset = find_fpb_offset(prog); 1512 1513 /* 1514 * 1. Initial fake pass to compute ctx->idx and ctx->offset. 1515 * 1516 * BPF line info needs ctx->offset[i] to be the offset of 1517 * instruction[i] in jited image, so build prologue first. 1518 */ 1519 if (build_prologue(&ctx, was_classic)) { 1520 prog = orig_prog; 1521 goto out_off; 1522 } 1523 1524 if (build_body(&ctx, extra_pass)) { 1525 prog = orig_prog; 1526 goto out_off; 1527 } 1528 1529 ctx.epilogue_offset = ctx.idx; 1530 build_epilogue(&ctx); 1531 build_plt(&ctx); 1532 1533 extable_align = __alignof__(struct exception_table_entry); 1534 extable_size = prog->aux->num_exentries * 1535 sizeof(struct exception_table_entry); 1536 1537 /* Now we know the actual image size. */ 1538 prog_size = sizeof(u32) * ctx.idx; 1539 /* also allocate space for plt target */ 1540 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); 1541 image_size = extable_offset + extable_size; 1542 header = bpf_jit_binary_alloc(image_size, &image_ptr, 1543 sizeof(u32), jit_fill_hole); 1544 if (header == NULL) { 1545 prog = orig_prog; 1546 goto out_off; 1547 } 1548 1549 /* 2. Now, the actual pass. */ 1550 1551 ctx.image = (__le32 *)image_ptr; 1552 if (extable_size) 1553 prog->aux->extable = (void *)image_ptr + extable_offset; 1554 skip_init_ctx: 1555 ctx.idx = 0; 1556 ctx.exentry_idx = 0; 1557 1558 build_prologue(&ctx, was_classic); 1559 1560 if (build_body(&ctx, extra_pass)) { 1561 bpf_jit_binary_free(header); 1562 prog = orig_prog; 1563 goto out_off; 1564 } 1565 1566 build_epilogue(&ctx); 1567 build_plt(&ctx); 1568 1569 /* 3. Extra pass to validate JITed code. */ 1570 if (validate_ctx(&ctx)) { 1571 bpf_jit_binary_free(header); 1572 prog = orig_prog; 1573 goto out_off; 1574 } 1575 1576 /* And we're done. */ 1577 if (bpf_jit_enable > 1) 1578 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 1579 1580 bpf_flush_icache(header, ctx.image + ctx.idx); 1581 1582 if (!prog->is_func || extra_pass) { 1583 if (extra_pass && ctx.idx != jit_data->ctx.idx) { 1584 pr_err_once("multi-func JIT bug %d != %d\n", 1585 ctx.idx, jit_data->ctx.idx); 1586 bpf_jit_binary_free(header); 1587 prog->bpf_func = NULL; 1588 prog->jited = 0; 1589 prog->jited_len = 0; 1590 goto out_off; 1591 } 1592 bpf_jit_binary_lock_ro(header); 1593 } else { 1594 jit_data->ctx = ctx; 1595 jit_data->image = image_ptr; 1596 jit_data->header = header; 1597 } 1598 prog->bpf_func = (void *)ctx.image; 1599 prog->jited = 1; 1600 prog->jited_len = prog_size; 1601 1602 if (!prog->is_func || extra_pass) { 1603 int i; 1604 1605 /* offset[prog->len] is the size of program */ 1606 for (i = 0; i <= prog->len; i++) 1607 ctx.offset[i] *= AARCH64_INSN_SIZE; 1608 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); 1609 out_off: 1610 kvfree(ctx.offset); 1611 kfree(jit_data); 1612 prog->aux->jit_data = NULL; 1613 } 1614 out: 1615 if (tmp_blinded) 1616 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1617 tmp : orig_prog); 1618 return prog; 1619 } 1620 1621 bool bpf_jit_supports_kfunc_call(void) 1622 { 1623 return true; 1624 } 1625 1626 u64 bpf_jit_alloc_exec_limit(void) 1627 { 1628 return VMALLOC_END - VMALLOC_START; 1629 } 1630 1631 void *bpf_jit_alloc_exec(unsigned long size) 1632 { 1633 /* Memory is intended to be executable, reset the pointer tag. */ 1634 return kasan_reset_tag(vmalloc(size)); 1635 } 1636 1637 void bpf_jit_free_exec(void *addr) 1638 { 1639 return vfree(addr); 1640 } 1641 1642 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 1643 bool bpf_jit_supports_subprog_tailcalls(void) 1644 { 1645 return true; 1646 } 1647 1648 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, 1649 int args_off, int retval_off, int run_ctx_off, 1650 bool save_ret) 1651 { 1652 __le32 *branch; 1653 u64 enter_prog; 1654 u64 exit_prog; 1655 struct bpf_prog *p = l->link.prog; 1656 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 1657 1658 enter_prog = (u64)bpf_trampoline_enter(p); 1659 exit_prog = (u64)bpf_trampoline_exit(p); 1660 1661 if (l->cookie == 0) { 1662 /* if cookie is zero, one instruction is enough to store it */ 1663 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); 1664 } else { 1665 emit_a64_mov_i64(A64_R(10), l->cookie, ctx); 1666 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), 1667 ctx); 1668 } 1669 1670 /* save p to callee saved register x19 to avoid loading p with mov_i64 1671 * each time. 1672 */ 1673 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); 1674 1675 /* arg1: prog */ 1676 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1677 /* arg2: &run_ctx */ 1678 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); 1679 1680 emit_call(enter_prog, ctx); 1681 1682 /* if (__bpf_prog_enter(prog) == 0) 1683 * goto skip_exec_of_prog; 1684 */ 1685 branch = ctx->image + ctx->idx; 1686 emit(A64_NOP, ctx); 1687 1688 /* save return value to callee saved register x20 */ 1689 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); 1690 1691 emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); 1692 if (!p->jited) 1693 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); 1694 1695 emit_call((const u64)p->bpf_func, ctx); 1696 1697 if (save_ret) 1698 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 1699 1700 if (ctx->image) { 1701 int offset = &ctx->image[ctx->idx] - branch; 1702 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); 1703 } 1704 1705 /* arg1: prog */ 1706 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1707 /* arg2: start time */ 1708 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); 1709 /* arg3: &run_ctx */ 1710 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); 1711 1712 emit_call(exit_prog, ctx); 1713 } 1714 1715 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, 1716 int args_off, int retval_off, int run_ctx_off, 1717 __le32 **branches) 1718 { 1719 int i; 1720 1721 /* The first fmod_ret program will receive a garbage return value. 1722 * Set this to 0 to avoid confusing the program. 1723 */ 1724 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); 1725 for (i = 0; i < tl->nr_links; i++) { 1726 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, 1727 run_ctx_off, true); 1728 /* if (*(u64 *)(sp + retval_off) != 0) 1729 * goto do_fexit; 1730 */ 1731 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); 1732 /* Save the location of branch, and generate a nop. 1733 * This nop will be replaced with a cbnz later. 1734 */ 1735 branches[i] = ctx->image + ctx->idx; 1736 emit(A64_NOP, ctx); 1737 } 1738 } 1739 1740 static void save_args(struct jit_ctx *ctx, int args_off, int nregs) 1741 { 1742 int i; 1743 1744 for (i = 0; i < nregs; i++) { 1745 emit(A64_STR64I(i, A64_SP, args_off), ctx); 1746 args_off += 8; 1747 } 1748 } 1749 1750 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) 1751 { 1752 int i; 1753 1754 for (i = 0; i < nregs; i++) { 1755 emit(A64_LDR64I(i, A64_SP, args_off), ctx); 1756 args_off += 8; 1757 } 1758 } 1759 1760 /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). 1761 * 1762 * bpf prog and function entry before bpf trampoline hooked: 1763 * mov x9, lr 1764 * nop 1765 * 1766 * bpf prog and function entry after bpf trampoline hooked: 1767 * mov x9, lr 1768 * bl <bpf_trampoline or plt> 1769 * 1770 */ 1771 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, 1772 struct bpf_tramp_links *tlinks, void *orig_call, 1773 int nregs, u32 flags) 1774 { 1775 int i; 1776 int stack_size; 1777 int retaddr_off; 1778 int regs_off; 1779 int retval_off; 1780 int args_off; 1781 int nregs_off; 1782 int ip_off; 1783 int run_ctx_off; 1784 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 1785 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 1786 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 1787 bool save_ret; 1788 __le32 **branches = NULL; 1789 1790 /* trampoline stack layout: 1791 * [ parent ip ] 1792 * [ FP ] 1793 * SP + retaddr_off [ self ip ] 1794 * [ FP ] 1795 * 1796 * [ padding ] align SP to multiples of 16 1797 * 1798 * [ x20 ] callee saved reg x20 1799 * SP + regs_off [ x19 ] callee saved reg x19 1800 * 1801 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 1802 * BPF_TRAMP_F_RET_FENTRY_RET 1803 * 1804 * [ arg reg N ] 1805 * [ ... ] 1806 * SP + args_off [ arg reg 1 ] 1807 * 1808 * SP + nregs_off [ arg regs count ] 1809 * 1810 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag 1811 * 1812 * SP + run_ctx_off [ bpf_tramp_run_ctx ] 1813 */ 1814 1815 stack_size = 0; 1816 run_ctx_off = stack_size; 1817 /* room for bpf_tramp_run_ctx */ 1818 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 1819 1820 ip_off = stack_size; 1821 /* room for IP address argument */ 1822 if (flags & BPF_TRAMP_F_IP_ARG) 1823 stack_size += 8; 1824 1825 nregs_off = stack_size; 1826 /* room for args count */ 1827 stack_size += 8; 1828 1829 args_off = stack_size; 1830 /* room for args */ 1831 stack_size += nregs * 8; 1832 1833 /* room for return value */ 1834 retval_off = stack_size; 1835 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 1836 if (save_ret) 1837 stack_size += 8; 1838 1839 /* room for callee saved registers, currently x19 and x20 are used */ 1840 regs_off = stack_size; 1841 stack_size += 16; 1842 1843 /* round up to multiples of 16 to avoid SPAlignmentFault */ 1844 stack_size = round_up(stack_size, 16); 1845 1846 /* return address locates above FP */ 1847 retaddr_off = stack_size + 8; 1848 1849 /* bpf trampoline may be invoked by 3 instruction types: 1850 * 1. bl, attached to bpf prog or kernel function via short jump 1851 * 2. br, attached to bpf prog or kernel function via long jump 1852 * 3. blr, working as a function pointer, used by struct_ops. 1853 * So BTI_JC should used here to support both br and blr. 1854 */ 1855 emit_bti(A64_BTI_JC, ctx); 1856 1857 /* frame for parent function */ 1858 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); 1859 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 1860 1861 /* frame for patched function */ 1862 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 1863 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 1864 1865 /* allocate stack space */ 1866 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); 1867 1868 if (flags & BPF_TRAMP_F_IP_ARG) { 1869 /* save ip address of the traced function */ 1870 emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); 1871 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); 1872 } 1873 1874 /* save arg regs count*/ 1875 emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); 1876 emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); 1877 1878 /* save arg regs */ 1879 save_args(ctx, args_off, nregs); 1880 1881 /* save callee saved registers */ 1882 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); 1883 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 1884 1885 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1886 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); 1887 emit_call((const u64)__bpf_tramp_enter, ctx); 1888 } 1889 1890 for (i = 0; i < fentry->nr_links; i++) 1891 invoke_bpf_prog(ctx, fentry->links[i], args_off, 1892 retval_off, run_ctx_off, 1893 flags & BPF_TRAMP_F_RET_FENTRY_RET); 1894 1895 if (fmod_ret->nr_links) { 1896 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), 1897 GFP_KERNEL); 1898 if (!branches) 1899 return -ENOMEM; 1900 1901 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, 1902 run_ctx_off, branches); 1903 } 1904 1905 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1906 restore_args(ctx, args_off, nregs); 1907 /* call original func */ 1908 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); 1909 emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); 1910 emit(A64_RET(A64_R(10)), ctx); 1911 /* store return value */ 1912 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 1913 /* reserve a nop for bpf_tramp_image_put */ 1914 im->ip_after_call = ctx->image + ctx->idx; 1915 emit(A64_NOP, ctx); 1916 } 1917 1918 /* update the branches saved in invoke_bpf_mod_ret with cbnz */ 1919 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { 1920 int offset = &ctx->image[ctx->idx] - branches[i]; 1921 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); 1922 } 1923 1924 for (i = 0; i < fexit->nr_links; i++) 1925 invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, 1926 run_ctx_off, false); 1927 1928 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1929 im->ip_epilogue = ctx->image + ctx->idx; 1930 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); 1931 emit_call((const u64)__bpf_tramp_exit, ctx); 1932 } 1933 1934 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1935 restore_args(ctx, args_off, nregs); 1936 1937 /* restore callee saved register x19 and x20 */ 1938 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); 1939 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 1940 1941 if (save_ret) 1942 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); 1943 1944 /* reset SP */ 1945 emit(A64_MOV(1, A64_SP, A64_FP), ctx); 1946 1947 /* pop frames */ 1948 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 1949 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); 1950 1951 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 1952 /* skip patched function, return to parent */ 1953 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 1954 emit(A64_RET(A64_R(9)), ctx); 1955 } else { 1956 /* return to patched function */ 1957 emit(A64_MOV(1, A64_R(10), A64_LR), ctx); 1958 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 1959 emit(A64_RET(A64_R(10)), ctx); 1960 } 1961 1962 if (ctx->image) 1963 bpf_flush_icache(ctx->image, ctx->image + ctx->idx); 1964 1965 kfree(branches); 1966 1967 return ctx->idx; 1968 } 1969 1970 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, 1971 void *image_end, const struct btf_func_model *m, 1972 u32 flags, struct bpf_tramp_links *tlinks, 1973 void *orig_call) 1974 { 1975 int i, ret; 1976 int nregs = m->nr_args; 1977 int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; 1978 struct jit_ctx ctx = { 1979 .image = NULL, 1980 .idx = 0, 1981 }; 1982 1983 /* extra registers needed for struct argument */ 1984 for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { 1985 /* The arg_size is at most 16 bytes, enforced by the verifier. */ 1986 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 1987 nregs += (m->arg_size[i] + 7) / 8 - 1; 1988 } 1989 1990 /* the first 8 registers are used for arguments */ 1991 if (nregs > 8) 1992 return -ENOTSUPP; 1993 1994 ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags); 1995 if (ret < 0) 1996 return ret; 1997 1998 if (ret > max_insns) 1999 return -EFBIG; 2000 2001 ctx.image = image; 2002 ctx.idx = 0; 2003 2004 jit_fill_hole(image, (unsigned int)(image_end - image)); 2005 ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags); 2006 2007 if (ret > 0 && validate_code(&ctx) < 0) 2008 ret = -EINVAL; 2009 2010 if (ret > 0) 2011 ret *= AARCH64_INSN_SIZE; 2012 2013 return ret; 2014 } 2015 2016 static bool is_long_jump(void *ip, void *target) 2017 { 2018 long offset; 2019 2020 /* NULL target means this is a NOP */ 2021 if (!target) 2022 return false; 2023 2024 offset = (long)target - (long)ip; 2025 return offset < -SZ_128M || offset >= SZ_128M; 2026 } 2027 2028 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, 2029 void *addr, void *plt, u32 *insn) 2030 { 2031 void *target; 2032 2033 if (!addr) { 2034 *insn = aarch64_insn_gen_nop(); 2035 return 0; 2036 } 2037 2038 if (is_long_jump(ip, addr)) 2039 target = plt; 2040 else 2041 target = addr; 2042 2043 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, 2044 (unsigned long)target, 2045 type); 2046 2047 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; 2048 } 2049 2050 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf 2051 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr 2052 * or @new_addr is NULL, the old or new instruction is NOP. 2053 * 2054 * When @ip is the bpf prog entry, a bpf trampoline is being attached or 2055 * detached. Since bpf trampoline and bpf prog are allocated separately with 2056 * vmalloc, the address distance may exceed 128MB, the maximum branch range. 2057 * So long jump should be handled. 2058 * 2059 * When a bpf prog is constructed, a plt pointing to empty trampoline 2060 * dummy_tramp is placed at the end: 2061 * 2062 * bpf_prog: 2063 * mov x9, lr 2064 * nop // patchsite 2065 * ... 2066 * ret 2067 * 2068 * plt: 2069 * ldr x10, target 2070 * br x10 2071 * target: 2072 * .quad dummy_tramp // plt target 2073 * 2074 * This is also the state when no trampoline is attached. 2075 * 2076 * When a short-jump bpf trampoline is attached, the patchsite is patched 2077 * to a bl instruction to the trampoline directly: 2078 * 2079 * bpf_prog: 2080 * mov x9, lr 2081 * bl <short-jump bpf trampoline address> // patchsite 2082 * ... 2083 * ret 2084 * 2085 * plt: 2086 * ldr x10, target 2087 * br x10 2088 * target: 2089 * .quad dummy_tramp // plt target 2090 * 2091 * When a long-jump bpf trampoline is attached, the plt target is filled with 2092 * the trampoline address and the patchsite is patched to a bl instruction to 2093 * the plt: 2094 * 2095 * bpf_prog: 2096 * mov x9, lr 2097 * bl plt // patchsite 2098 * ... 2099 * ret 2100 * 2101 * plt: 2102 * ldr x10, target 2103 * br x10 2104 * target: 2105 * .quad <long-jump bpf trampoline address> // plt target 2106 * 2107 * The dummy_tramp is used to prevent another CPU from jumping to unknown 2108 * locations during the patching process, making the patching process easier. 2109 */ 2110 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 2111 void *old_addr, void *new_addr) 2112 { 2113 int ret; 2114 u32 old_insn; 2115 u32 new_insn; 2116 u32 replaced; 2117 struct bpf_plt *plt = NULL; 2118 unsigned long size = 0UL; 2119 unsigned long offset = ~0UL; 2120 enum aarch64_insn_branch_type branch_type; 2121 char namebuf[KSYM_NAME_LEN]; 2122 void *image = NULL; 2123 u64 plt_target = 0ULL; 2124 bool poking_bpf_entry; 2125 2126 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) 2127 /* Only poking bpf text is supported. Since kernel function 2128 * entry is set up by ftrace, we reply on ftrace to poke kernel 2129 * functions. 2130 */ 2131 return -ENOTSUPP; 2132 2133 image = ip - offset; 2134 /* zero offset means we're poking bpf prog entry */ 2135 poking_bpf_entry = (offset == 0UL); 2136 2137 /* bpf prog entry, find plt and the real patchsite */ 2138 if (poking_bpf_entry) { 2139 /* plt locates at the end of bpf prog */ 2140 plt = image + size - PLT_TARGET_OFFSET; 2141 2142 /* skip to the nop instruction in bpf prog entry: 2143 * bti c // if BTI enabled 2144 * mov x9, x30 2145 * nop 2146 */ 2147 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; 2148 } 2149 2150 /* long jump is only possible at bpf prog entry */ 2151 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && 2152 !poking_bpf_entry)) 2153 return -EINVAL; 2154 2155 if (poke_type == BPF_MOD_CALL) 2156 branch_type = AARCH64_INSN_BRANCH_LINK; 2157 else 2158 branch_type = AARCH64_INSN_BRANCH_NOLINK; 2159 2160 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) 2161 return -EFAULT; 2162 2163 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) 2164 return -EFAULT; 2165 2166 if (is_long_jump(ip, new_addr)) 2167 plt_target = (u64)new_addr; 2168 else if (is_long_jump(ip, old_addr)) 2169 /* if the old target is a long jump and the new target is not, 2170 * restore the plt target to dummy_tramp, so there is always a 2171 * legal and harmless address stored in plt target, and we'll 2172 * never jump from plt to an unknown place. 2173 */ 2174 plt_target = (u64)&dummy_tramp; 2175 2176 if (plt_target) { 2177 /* non-zero plt_target indicates we're patching a bpf prog, 2178 * which is read only. 2179 */ 2180 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) 2181 return -EFAULT; 2182 WRITE_ONCE(plt->target, plt_target); 2183 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); 2184 /* since plt target points to either the new trampoline 2185 * or dummy_tramp, even if another CPU reads the old plt 2186 * target value before fetching the bl instruction to plt, 2187 * it will be brought back by dummy_tramp, so no barrier is 2188 * required here. 2189 */ 2190 } 2191 2192 /* if the old target and the new target are both long jumps, no 2193 * patching is required 2194 */ 2195 if (old_insn == new_insn) 2196 return 0; 2197 2198 mutex_lock(&text_mutex); 2199 if (aarch64_insn_read(ip, &replaced)) { 2200 ret = -EFAULT; 2201 goto out; 2202 } 2203 2204 if (replaced != old_insn) { 2205 ret = -EFAULT; 2206 goto out; 2207 } 2208 2209 /* We call aarch64_insn_patch_text_nosync() to replace instruction 2210 * atomically, so no other CPUs will fetch a half-new and half-old 2211 * instruction. But there is chance that another CPU executes the 2212 * old instruction after the patching operation finishes (e.g., 2213 * pipeline not flushed, or icache not synchronized yet). 2214 * 2215 * 1. when a new trampoline is attached, it is not a problem for 2216 * different CPUs to jump to different trampolines temporarily. 2217 * 2218 * 2. when an old trampoline is freed, we should wait for all other 2219 * CPUs to exit the trampoline and make sure the trampoline is no 2220 * longer reachable, since bpf_tramp_image_put() function already 2221 * uses percpu_ref and task-based rcu to do the sync, no need to call 2222 * the sync version here, see bpf_tramp_image_put() for details. 2223 */ 2224 ret = aarch64_insn_patch_text_nosync(ip, new_insn); 2225 out: 2226 mutex_unlock(&text_mutex); 2227 2228 return ret; 2229 } 2230