1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * BPF JIT compiler for ARM64 4 * 5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> 6 */ 7 8 #define pr_fmt(fmt) "bpf_jit: " fmt 9 10 #include <linux/bitfield.h> 11 #include <linux/bpf.h> 12 #include <linux/filter.h> 13 #include <linux/memory.h> 14 #include <linux/printk.h> 15 #include <linux/slab.h> 16 17 #include <asm/asm-extable.h> 18 #include <asm/byteorder.h> 19 #include <asm/cacheflush.h> 20 #include <asm/debug-monitors.h> 21 #include <asm/insn.h> 22 #include <asm/patching.h> 23 #include <asm/set_memory.h> 24 25 #include "bpf_jit.h" 26 27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 29 #define TCALL_CNT (MAX_BPF_JIT_REG + 2) 30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) 31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) 32 33 #define check_imm(bits, imm) do { \ 34 if ((((imm) > 0) && ((imm) >> (bits))) || \ 35 (((imm) < 0) && (~(imm) >> (bits)))) { \ 36 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 37 i, imm, imm); \ 38 return -EINVAL; \ 39 } \ 40 } while (0) 41 #define check_imm19(imm) check_imm(19, imm) 42 #define check_imm26(imm) check_imm(26, imm) 43 44 /* Map BPF registers to A64 registers */ 45 static const int bpf2a64[] = { 46 /* return value from in-kernel function, and exit value from eBPF */ 47 [BPF_REG_0] = A64_R(7), 48 /* arguments from eBPF program to in-kernel function */ 49 [BPF_REG_1] = A64_R(0), 50 [BPF_REG_2] = A64_R(1), 51 [BPF_REG_3] = A64_R(2), 52 [BPF_REG_4] = A64_R(3), 53 [BPF_REG_5] = A64_R(4), 54 /* callee saved registers that in-kernel function will preserve */ 55 [BPF_REG_6] = A64_R(19), 56 [BPF_REG_7] = A64_R(20), 57 [BPF_REG_8] = A64_R(21), 58 [BPF_REG_9] = A64_R(22), 59 /* read-only frame pointer to access stack */ 60 [BPF_REG_FP] = A64_R(25), 61 /* temporary registers for BPF JIT */ 62 [TMP_REG_1] = A64_R(10), 63 [TMP_REG_2] = A64_R(11), 64 [TMP_REG_3] = A64_R(12), 65 /* tail_call_cnt */ 66 [TCALL_CNT] = A64_R(26), 67 /* temporary register for blinding constants */ 68 [BPF_REG_AX] = A64_R(9), 69 [FP_BOTTOM] = A64_R(27), 70 }; 71 72 struct jit_ctx { 73 const struct bpf_prog *prog; 74 int idx; 75 int epilogue_offset; 76 int *offset; 77 int exentry_idx; 78 __le32 *image; 79 u32 stack_size; 80 int fpb_offset; 81 }; 82 83 struct bpf_plt { 84 u32 insn_ldr; /* load target */ 85 u32 insn_br; /* branch to target */ 86 u64 target; /* target value */ 87 }; 88 89 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) 90 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) 91 92 static inline void emit(const u32 insn, struct jit_ctx *ctx) 93 { 94 if (ctx->image != NULL) 95 ctx->image[ctx->idx] = cpu_to_le32(insn); 96 97 ctx->idx++; 98 } 99 100 static inline void emit_a64_mov_i(const int is64, const int reg, 101 const s32 val, struct jit_ctx *ctx) 102 { 103 u16 hi = val >> 16; 104 u16 lo = val & 0xffff; 105 106 if (hi & 0x8000) { 107 if (hi == 0xffff) { 108 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); 109 } else { 110 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); 111 if (lo != 0xffff) 112 emit(A64_MOVK(is64, reg, lo, 0), ctx); 113 } 114 } else { 115 emit(A64_MOVZ(is64, reg, lo, 0), ctx); 116 if (hi) 117 emit(A64_MOVK(is64, reg, hi, 16), ctx); 118 } 119 } 120 121 static int i64_i16_blocks(const u64 val, bool inverse) 122 { 123 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 124 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 125 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 126 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); 127 } 128 129 static inline void emit_a64_mov_i64(const int reg, const u64 val, 130 struct jit_ctx *ctx) 131 { 132 u64 nrm_tmp = val, rev_tmp = ~val; 133 bool inverse; 134 int shift; 135 136 if (!(nrm_tmp >> 32)) 137 return emit_a64_mov_i(0, reg, (u32)val, ctx); 138 139 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); 140 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : 141 (fls64(nrm_tmp) - 1)), 16), 0); 142 if (inverse) 143 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); 144 else 145 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 146 shift -= 16; 147 while (shift >= 0) { 148 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) 149 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 150 shift -= 16; 151 } 152 } 153 154 static inline void emit_bti(u32 insn, struct jit_ctx *ctx) 155 { 156 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) 157 emit(insn, ctx); 158 } 159 160 /* 161 * Kernel addresses in the vmalloc space use at most 48 bits, and the 162 * remaining bits are guaranteed to be 0x1. So we can compose the address 163 * with a fixed length movn/movk/movk sequence. 164 */ 165 static inline void emit_addr_mov_i64(const int reg, const u64 val, 166 struct jit_ctx *ctx) 167 { 168 u64 tmp = val; 169 int shift = 0; 170 171 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx); 172 while (shift < 32) { 173 tmp >>= 16; 174 shift += 16; 175 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); 176 } 177 } 178 179 static inline void emit_call(u64 target, struct jit_ctx *ctx) 180 { 181 u8 tmp = bpf2a64[TMP_REG_1]; 182 183 emit_addr_mov_i64(tmp, target, ctx); 184 emit(A64_BLR(tmp), ctx); 185 } 186 187 static inline int bpf2a64_offset(int bpf_insn, int off, 188 const struct jit_ctx *ctx) 189 { 190 /* BPF JMP offset is relative to the next instruction */ 191 bpf_insn++; 192 /* 193 * Whereas arm64 branch instructions encode the offset 194 * from the branch itself, so we must subtract 1 from the 195 * instruction offset. 196 */ 197 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); 198 } 199 200 static void jit_fill_hole(void *area, unsigned int size) 201 { 202 __le32 *ptr; 203 /* We are guaranteed to have aligned memory. */ 204 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 205 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); 206 } 207 208 static inline int epilogue_offset(const struct jit_ctx *ctx) 209 { 210 int to = ctx->epilogue_offset; 211 int from = ctx->idx; 212 213 return to - from; 214 } 215 216 static bool is_addsub_imm(u32 imm) 217 { 218 /* Either imm12 or shifted imm12. */ 219 return !(imm & ~0xfff) || !(imm & ~0xfff000); 220 } 221 222 /* 223 * There are 3 types of AArch64 LDR/STR (immediate) instruction: 224 * Post-index, Pre-index, Unsigned offset. 225 * 226 * For BPF ldr/str, the "unsigned offset" type is sufficient. 227 * 228 * "Unsigned offset" type LDR(immediate) format: 229 * 230 * 3 2 1 0 231 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 232 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 233 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | 234 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 235 * scale 236 * 237 * "Unsigned offset" type STR(immediate) format: 238 * 3 2 1 0 239 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 240 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 241 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | 242 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 243 * scale 244 * 245 * The offset is calculated from imm12 and scale in the following way: 246 * 247 * offset = (u64)imm12 << scale 248 */ 249 static bool is_lsi_offset(int offset, int scale) 250 { 251 if (offset < 0) 252 return false; 253 254 if (offset > (0xFFF << scale)) 255 return false; 256 257 if (offset & ((1 << scale) - 1)) 258 return false; 259 260 return true; 261 } 262 263 /* generated prologue: 264 * bti c // if CONFIG_ARM64_BTI_KERNEL 265 * mov x9, lr 266 * nop // POKE_OFFSET 267 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL 268 * stp x29, lr, [sp, #-16]! 269 * mov x29, sp 270 * stp x19, x20, [sp, #-16]! 271 * stp x21, x22, [sp, #-16]! 272 * stp x25, x26, [sp, #-16]! 273 * stp x27, x28, [sp, #-16]! 274 * mov x25, sp 275 * mov tcc, #0 276 * // PROLOGUE_OFFSET 277 */ 278 279 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) 280 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) 281 282 /* Offset of nop instruction in bpf prog entry to be poked */ 283 #define POKE_OFFSET (BTI_INSNS + 1) 284 285 /* Tail call offset to jump into */ 286 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) 287 288 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) 289 { 290 const struct bpf_prog *prog = ctx->prog; 291 const bool is_main_prog = prog->aux->func_idx == 0; 292 const u8 r6 = bpf2a64[BPF_REG_6]; 293 const u8 r7 = bpf2a64[BPF_REG_7]; 294 const u8 r8 = bpf2a64[BPF_REG_8]; 295 const u8 r9 = bpf2a64[BPF_REG_9]; 296 const u8 fp = bpf2a64[BPF_REG_FP]; 297 const u8 tcc = bpf2a64[TCALL_CNT]; 298 const u8 fpb = bpf2a64[FP_BOTTOM]; 299 const int idx0 = ctx->idx; 300 int cur_offset; 301 302 /* 303 * BPF prog stack layout 304 * 305 * high 306 * original A64_SP => 0:+-----+ BPF prologue 307 * |FP/LR| 308 * current A64_FP => -16:+-----+ 309 * | ... | callee saved registers 310 * BPF fp register => -64:+-----+ <= (BPF_FP) 311 * | | 312 * | ... | BPF prog stack 313 * | | 314 * +-----+ <= (BPF_FP - prog->aux->stack_depth) 315 * |RSVD | padding 316 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) 317 * | | 318 * | ... | Function call stack 319 * | | 320 * +-----+ 321 * low 322 * 323 */ 324 325 emit_bti(A64_BTI_C, ctx); 326 327 emit(A64_MOV(1, A64_R(9), A64_LR), ctx); 328 emit(A64_NOP, ctx); 329 330 /* Sign lr */ 331 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 332 emit(A64_PACIASP, ctx); 333 334 /* Save FP and LR registers to stay align with ARM64 AAPCS */ 335 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 336 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 337 338 /* Save callee-saved registers */ 339 emit(A64_PUSH(r6, r7, A64_SP), ctx); 340 emit(A64_PUSH(r8, r9, A64_SP), ctx); 341 emit(A64_PUSH(fp, tcc, A64_SP), ctx); 342 emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); 343 344 /* Set up BPF prog stack base register */ 345 emit(A64_MOV(1, fp, A64_SP), ctx); 346 347 if (!ebpf_from_cbpf && is_main_prog) { 348 /* Initialize tail_call_cnt */ 349 emit(A64_MOVZ(1, tcc, 0, 0), ctx); 350 351 cur_offset = ctx->idx - idx0; 352 if (cur_offset != PROLOGUE_OFFSET) { 353 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", 354 cur_offset, PROLOGUE_OFFSET); 355 return -1; 356 } 357 358 /* BTI landing pad for the tail call, done with a BR */ 359 emit_bti(A64_BTI_J, ctx); 360 } 361 362 emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); 363 364 /* Stack must be multiples of 16B */ 365 ctx->stack_size = round_up(prog->aux->stack_depth, 16); 366 367 /* Set up function call stack */ 368 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 369 return 0; 370 } 371 372 static int out_offset = -1; /* initialized on the first pass of build_body() */ 373 static int emit_bpf_tail_call(struct jit_ctx *ctx) 374 { 375 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 376 const u8 r2 = bpf2a64[BPF_REG_2]; 377 const u8 r3 = bpf2a64[BPF_REG_3]; 378 379 const u8 tmp = bpf2a64[TMP_REG_1]; 380 const u8 prg = bpf2a64[TMP_REG_2]; 381 const u8 tcc = bpf2a64[TCALL_CNT]; 382 const int idx0 = ctx->idx; 383 #define cur_offset (ctx->idx - idx0) 384 #define jmp_offset (out_offset - (cur_offset)) 385 size_t off; 386 387 /* if (index >= array->map.max_entries) 388 * goto out; 389 */ 390 off = offsetof(struct bpf_array, map.max_entries); 391 emit_a64_mov_i64(tmp, off, ctx); 392 emit(A64_LDR32(tmp, r2, tmp), ctx); 393 emit(A64_MOV(0, r3, r3), ctx); 394 emit(A64_CMP(0, r3, tmp), ctx); 395 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 396 397 /* 398 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) 399 * goto out; 400 * tail_call_cnt++; 401 */ 402 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); 403 emit(A64_CMP(1, tcc, tmp), ctx); 404 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 405 emit(A64_ADD_I(1, tcc, tcc, 1), ctx); 406 407 /* prog = array->ptrs[index]; 408 * if (prog == NULL) 409 * goto out; 410 */ 411 off = offsetof(struct bpf_array, ptrs); 412 emit_a64_mov_i64(tmp, off, ctx); 413 emit(A64_ADD(1, tmp, r2, tmp), ctx); 414 emit(A64_LSL(1, prg, r3, 3), ctx); 415 emit(A64_LDR64(prg, tmp, prg), ctx); 416 emit(A64_CBZ(1, prg, jmp_offset), ctx); 417 418 /* goto *(prog->bpf_func + prologue_offset); */ 419 off = offsetof(struct bpf_prog, bpf_func); 420 emit_a64_mov_i64(tmp, off, ctx); 421 emit(A64_LDR64(tmp, prg, tmp), ctx); 422 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); 423 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 424 emit(A64_BR(tmp), ctx); 425 426 /* out: */ 427 if (out_offset == -1) 428 out_offset = cur_offset; 429 if (cur_offset != out_offset) { 430 pr_err_once("tail_call out_offset = %d, expected %d!\n", 431 cur_offset, out_offset); 432 return -1; 433 } 434 return 0; 435 #undef cur_offset 436 #undef jmp_offset 437 } 438 439 #ifdef CONFIG_ARM64_LSE_ATOMICS 440 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 441 { 442 const u8 code = insn->code; 443 const u8 dst = bpf2a64[insn->dst_reg]; 444 const u8 src = bpf2a64[insn->src_reg]; 445 const u8 tmp = bpf2a64[TMP_REG_1]; 446 const u8 tmp2 = bpf2a64[TMP_REG_2]; 447 const bool isdw = BPF_SIZE(code) == BPF_DW; 448 const s16 off = insn->off; 449 u8 reg; 450 451 if (!off) { 452 reg = dst; 453 } else { 454 emit_a64_mov_i(1, tmp, off, ctx); 455 emit(A64_ADD(1, tmp, tmp, dst), ctx); 456 reg = tmp; 457 } 458 459 switch (insn->imm) { 460 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 461 case BPF_ADD: 462 emit(A64_STADD(isdw, reg, src), ctx); 463 break; 464 case BPF_AND: 465 emit(A64_MVN(isdw, tmp2, src), ctx); 466 emit(A64_STCLR(isdw, reg, tmp2), ctx); 467 break; 468 case BPF_OR: 469 emit(A64_STSET(isdw, reg, src), ctx); 470 break; 471 case BPF_XOR: 472 emit(A64_STEOR(isdw, reg, src), ctx); 473 break; 474 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 475 case BPF_ADD | BPF_FETCH: 476 emit(A64_LDADDAL(isdw, src, reg, src), ctx); 477 break; 478 case BPF_AND | BPF_FETCH: 479 emit(A64_MVN(isdw, tmp2, src), ctx); 480 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); 481 break; 482 case BPF_OR | BPF_FETCH: 483 emit(A64_LDSETAL(isdw, src, reg, src), ctx); 484 break; 485 case BPF_XOR | BPF_FETCH: 486 emit(A64_LDEORAL(isdw, src, reg, src), ctx); 487 break; 488 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 489 case BPF_XCHG: 490 emit(A64_SWPAL(isdw, src, reg, src), ctx); 491 break; 492 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 493 case BPF_CMPXCHG: 494 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); 495 break; 496 default: 497 pr_err_once("unknown atomic op code %02x\n", insn->imm); 498 return -EINVAL; 499 } 500 501 return 0; 502 } 503 #else 504 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 505 { 506 return -EINVAL; 507 } 508 #endif 509 510 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 511 { 512 const u8 code = insn->code; 513 const u8 dst = bpf2a64[insn->dst_reg]; 514 const u8 src = bpf2a64[insn->src_reg]; 515 const u8 tmp = bpf2a64[TMP_REG_1]; 516 const u8 tmp2 = bpf2a64[TMP_REG_2]; 517 const u8 tmp3 = bpf2a64[TMP_REG_3]; 518 const int i = insn - ctx->prog->insnsi; 519 const s32 imm = insn->imm; 520 const s16 off = insn->off; 521 const bool isdw = BPF_SIZE(code) == BPF_DW; 522 u8 reg; 523 s32 jmp_offset; 524 525 if (!off) { 526 reg = dst; 527 } else { 528 emit_a64_mov_i(1, tmp, off, ctx); 529 emit(A64_ADD(1, tmp, tmp, dst), ctx); 530 reg = tmp; 531 } 532 533 if (imm == BPF_ADD || imm == BPF_AND || 534 imm == BPF_OR || imm == BPF_XOR) { 535 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 536 emit(A64_LDXR(isdw, tmp2, reg), ctx); 537 if (imm == BPF_ADD) 538 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); 539 else if (imm == BPF_AND) 540 emit(A64_AND(isdw, tmp2, tmp2, src), ctx); 541 else if (imm == BPF_OR) 542 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); 543 else 544 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); 545 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); 546 jmp_offset = -3; 547 check_imm19(jmp_offset); 548 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 549 } else if (imm == (BPF_ADD | BPF_FETCH) || 550 imm == (BPF_AND | BPF_FETCH) || 551 imm == (BPF_OR | BPF_FETCH) || 552 imm == (BPF_XOR | BPF_FETCH)) { 553 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 554 const u8 ax = bpf2a64[BPF_REG_AX]; 555 556 emit(A64_MOV(isdw, ax, src), ctx); 557 emit(A64_LDXR(isdw, src, reg), ctx); 558 if (imm == (BPF_ADD | BPF_FETCH)) 559 emit(A64_ADD(isdw, tmp2, src, ax), ctx); 560 else if (imm == (BPF_AND | BPF_FETCH)) 561 emit(A64_AND(isdw, tmp2, src, ax), ctx); 562 else if (imm == (BPF_OR | BPF_FETCH)) 563 emit(A64_ORR(isdw, tmp2, src, ax), ctx); 564 else 565 emit(A64_EOR(isdw, tmp2, src, ax), ctx); 566 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 567 jmp_offset = -3; 568 check_imm19(jmp_offset); 569 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 570 emit(A64_DMB_ISH, ctx); 571 } else if (imm == BPF_XCHG) { 572 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 573 emit(A64_MOV(isdw, tmp2, src), ctx); 574 emit(A64_LDXR(isdw, src, reg), ctx); 575 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 576 jmp_offset = -2; 577 check_imm19(jmp_offset); 578 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 579 emit(A64_DMB_ISH, ctx); 580 } else if (imm == BPF_CMPXCHG) { 581 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 582 const u8 r0 = bpf2a64[BPF_REG_0]; 583 584 emit(A64_MOV(isdw, tmp2, r0), ctx); 585 emit(A64_LDXR(isdw, r0, reg), ctx); 586 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); 587 jmp_offset = 4; 588 check_imm19(jmp_offset); 589 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); 590 emit(A64_STLXR(isdw, src, reg, tmp3), ctx); 591 jmp_offset = -4; 592 check_imm19(jmp_offset); 593 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 594 emit(A64_DMB_ISH, ctx); 595 } else { 596 pr_err_once("unknown atomic op code %02x\n", imm); 597 return -EINVAL; 598 } 599 600 return 0; 601 } 602 603 void dummy_tramp(void); 604 605 asm ( 606 " .pushsection .text, \"ax\", @progbits\n" 607 " .global dummy_tramp\n" 608 " .type dummy_tramp, %function\n" 609 "dummy_tramp:" 610 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) 611 " bti j\n" /* dummy_tramp is called via "br x10" */ 612 #endif 613 " mov x10, x30\n" 614 " mov x30, x9\n" 615 " ret x10\n" 616 " .size dummy_tramp, .-dummy_tramp\n" 617 " .popsection\n" 618 ); 619 620 /* build a plt initialized like this: 621 * 622 * plt: 623 * ldr tmp, target 624 * br tmp 625 * target: 626 * .quad dummy_tramp 627 * 628 * when a long jump trampoline is attached, target is filled with the 629 * trampoline address, and when the trampoline is removed, target is 630 * restored to dummy_tramp address. 631 */ 632 static void build_plt(struct jit_ctx *ctx) 633 { 634 const u8 tmp = bpf2a64[TMP_REG_1]; 635 struct bpf_plt *plt = NULL; 636 637 /* make sure target is 64-bit aligned */ 638 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) 639 emit(A64_NOP, ctx); 640 641 plt = (struct bpf_plt *)(ctx->image + ctx->idx); 642 /* plt is called via bl, no BTI needed here */ 643 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); 644 emit(A64_BR(tmp), ctx); 645 646 if (ctx->image) 647 plt->target = (u64)&dummy_tramp; 648 } 649 650 static void build_epilogue(struct jit_ctx *ctx) 651 { 652 const u8 r0 = bpf2a64[BPF_REG_0]; 653 const u8 r6 = bpf2a64[BPF_REG_6]; 654 const u8 r7 = bpf2a64[BPF_REG_7]; 655 const u8 r8 = bpf2a64[BPF_REG_8]; 656 const u8 r9 = bpf2a64[BPF_REG_9]; 657 const u8 fp = bpf2a64[BPF_REG_FP]; 658 const u8 fpb = bpf2a64[FP_BOTTOM]; 659 660 /* We're done with BPF stack */ 661 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 662 663 /* Restore x27 and x28 */ 664 emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); 665 /* Restore fs (x25) and x26 */ 666 emit(A64_POP(fp, A64_R(26), A64_SP), ctx); 667 668 /* Restore callee-saved register */ 669 emit(A64_POP(r8, r9, A64_SP), ctx); 670 emit(A64_POP(r6, r7, A64_SP), ctx); 671 672 /* Restore FP/LR registers */ 673 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 674 675 /* Set return value */ 676 emit(A64_MOV(1, A64_R(0), r0), ctx); 677 678 /* Authenticate lr */ 679 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 680 emit(A64_AUTIASP, ctx); 681 682 emit(A64_RET(A64_LR), ctx); 683 } 684 685 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 686 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 687 688 bool ex_handler_bpf(const struct exception_table_entry *ex, 689 struct pt_regs *regs) 690 { 691 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 692 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 693 694 regs->regs[dst_reg] = 0; 695 regs->pc = (unsigned long)&ex->fixup - offset; 696 return true; 697 } 698 699 /* For accesses to BTF pointers, add an entry to the exception table */ 700 static int add_exception_handler(const struct bpf_insn *insn, 701 struct jit_ctx *ctx, 702 int dst_reg) 703 { 704 off_t offset; 705 unsigned long pc; 706 struct exception_table_entry *ex; 707 708 if (!ctx->image) 709 /* First pass */ 710 return 0; 711 712 if (BPF_MODE(insn->code) != BPF_PROBE_MEM) 713 return 0; 714 715 if (!ctx->prog->aux->extable || 716 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) 717 return -EINVAL; 718 719 ex = &ctx->prog->aux->extable[ctx->exentry_idx]; 720 pc = (unsigned long)&ctx->image[ctx->idx - 1]; 721 722 offset = pc - (long)&ex->insn; 723 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 724 return -ERANGE; 725 ex->insn = offset; 726 727 /* 728 * Since the extable follows the program, the fixup offset is always 729 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 730 * to keep things simple, and put the destination register in the upper 731 * bits. We don't need to worry about buildtime or runtime sort 732 * modifying the upper bits because the table is already sorted, and 733 * isn't part of the main exception table. 734 */ 735 offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); 736 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) 737 return -ERANGE; 738 739 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | 740 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 741 742 ex->type = EX_TYPE_BPF; 743 744 ctx->exentry_idx++; 745 return 0; 746 } 747 748 /* JITs an eBPF instruction. 749 * Returns: 750 * 0 - successfully JITed an 8-byte eBPF instruction. 751 * >0 - successfully JITed a 16-byte eBPF instruction. 752 * <0 - failed to JIT. 753 */ 754 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, 755 bool extra_pass) 756 { 757 const u8 code = insn->code; 758 const u8 dst = bpf2a64[insn->dst_reg]; 759 const u8 src = bpf2a64[insn->src_reg]; 760 const u8 tmp = bpf2a64[TMP_REG_1]; 761 const u8 tmp2 = bpf2a64[TMP_REG_2]; 762 const u8 fp = bpf2a64[BPF_REG_FP]; 763 const u8 fpb = bpf2a64[FP_BOTTOM]; 764 const s16 off = insn->off; 765 const s32 imm = insn->imm; 766 const int i = insn - ctx->prog->insnsi; 767 const bool is64 = BPF_CLASS(code) == BPF_ALU64 || 768 BPF_CLASS(code) == BPF_JMP; 769 u8 jmp_cond; 770 s32 jmp_offset; 771 u32 a64_insn; 772 u8 src_adj; 773 u8 dst_adj; 774 int off_adj; 775 int ret; 776 777 switch (code) { 778 /* dst = src */ 779 case BPF_ALU | BPF_MOV | BPF_X: 780 case BPF_ALU64 | BPF_MOV | BPF_X: 781 emit(A64_MOV(is64, dst, src), ctx); 782 break; 783 /* dst = dst OP src */ 784 case BPF_ALU | BPF_ADD | BPF_X: 785 case BPF_ALU64 | BPF_ADD | BPF_X: 786 emit(A64_ADD(is64, dst, dst, src), ctx); 787 break; 788 case BPF_ALU | BPF_SUB | BPF_X: 789 case BPF_ALU64 | BPF_SUB | BPF_X: 790 emit(A64_SUB(is64, dst, dst, src), ctx); 791 break; 792 case BPF_ALU | BPF_AND | BPF_X: 793 case BPF_ALU64 | BPF_AND | BPF_X: 794 emit(A64_AND(is64, dst, dst, src), ctx); 795 break; 796 case BPF_ALU | BPF_OR | BPF_X: 797 case BPF_ALU64 | BPF_OR | BPF_X: 798 emit(A64_ORR(is64, dst, dst, src), ctx); 799 break; 800 case BPF_ALU | BPF_XOR | BPF_X: 801 case BPF_ALU64 | BPF_XOR | BPF_X: 802 emit(A64_EOR(is64, dst, dst, src), ctx); 803 break; 804 case BPF_ALU | BPF_MUL | BPF_X: 805 case BPF_ALU64 | BPF_MUL | BPF_X: 806 emit(A64_MUL(is64, dst, dst, src), ctx); 807 break; 808 case BPF_ALU | BPF_DIV | BPF_X: 809 case BPF_ALU64 | BPF_DIV | BPF_X: 810 emit(A64_UDIV(is64, dst, dst, src), ctx); 811 break; 812 case BPF_ALU | BPF_MOD | BPF_X: 813 case BPF_ALU64 | BPF_MOD | BPF_X: 814 emit(A64_UDIV(is64, tmp, dst, src), ctx); 815 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 816 break; 817 case BPF_ALU | BPF_LSH | BPF_X: 818 case BPF_ALU64 | BPF_LSH | BPF_X: 819 emit(A64_LSLV(is64, dst, dst, src), ctx); 820 break; 821 case BPF_ALU | BPF_RSH | BPF_X: 822 case BPF_ALU64 | BPF_RSH | BPF_X: 823 emit(A64_LSRV(is64, dst, dst, src), ctx); 824 break; 825 case BPF_ALU | BPF_ARSH | BPF_X: 826 case BPF_ALU64 | BPF_ARSH | BPF_X: 827 emit(A64_ASRV(is64, dst, dst, src), ctx); 828 break; 829 /* dst = -dst */ 830 case BPF_ALU | BPF_NEG: 831 case BPF_ALU64 | BPF_NEG: 832 emit(A64_NEG(is64, dst, dst), ctx); 833 break; 834 /* dst = BSWAP##imm(dst) */ 835 case BPF_ALU | BPF_END | BPF_FROM_LE: 836 case BPF_ALU | BPF_END | BPF_FROM_BE: 837 #ifdef CONFIG_CPU_BIG_ENDIAN 838 if (BPF_SRC(code) == BPF_FROM_BE) 839 goto emit_bswap_uxt; 840 #else /* !CONFIG_CPU_BIG_ENDIAN */ 841 if (BPF_SRC(code) == BPF_FROM_LE) 842 goto emit_bswap_uxt; 843 #endif 844 switch (imm) { 845 case 16: 846 emit(A64_REV16(is64, dst, dst), ctx); 847 /* zero-extend 16 bits into 64 bits */ 848 emit(A64_UXTH(is64, dst, dst), ctx); 849 break; 850 case 32: 851 emit(A64_REV32(is64, dst, dst), ctx); 852 /* upper 32 bits already cleared */ 853 break; 854 case 64: 855 emit(A64_REV64(dst, dst), ctx); 856 break; 857 } 858 break; 859 emit_bswap_uxt: 860 switch (imm) { 861 case 16: 862 /* zero-extend 16 bits into 64 bits */ 863 emit(A64_UXTH(is64, dst, dst), ctx); 864 break; 865 case 32: 866 /* zero-extend 32 bits into 64 bits */ 867 emit(A64_UXTW(is64, dst, dst), ctx); 868 break; 869 case 64: 870 /* nop */ 871 break; 872 } 873 break; 874 /* dst = imm */ 875 case BPF_ALU | BPF_MOV | BPF_K: 876 case BPF_ALU64 | BPF_MOV | BPF_K: 877 emit_a64_mov_i(is64, dst, imm, ctx); 878 break; 879 /* dst = dst OP imm */ 880 case BPF_ALU | BPF_ADD | BPF_K: 881 case BPF_ALU64 | BPF_ADD | BPF_K: 882 if (is_addsub_imm(imm)) { 883 emit(A64_ADD_I(is64, dst, dst, imm), ctx); 884 } else if (is_addsub_imm(-imm)) { 885 emit(A64_SUB_I(is64, dst, dst, -imm), ctx); 886 } else { 887 emit_a64_mov_i(is64, tmp, imm, ctx); 888 emit(A64_ADD(is64, dst, dst, tmp), ctx); 889 } 890 break; 891 case BPF_ALU | BPF_SUB | BPF_K: 892 case BPF_ALU64 | BPF_SUB | BPF_K: 893 if (is_addsub_imm(imm)) { 894 emit(A64_SUB_I(is64, dst, dst, imm), ctx); 895 } else if (is_addsub_imm(-imm)) { 896 emit(A64_ADD_I(is64, dst, dst, -imm), ctx); 897 } else { 898 emit_a64_mov_i(is64, tmp, imm, ctx); 899 emit(A64_SUB(is64, dst, dst, tmp), ctx); 900 } 901 break; 902 case BPF_ALU | BPF_AND | BPF_K: 903 case BPF_ALU64 | BPF_AND | BPF_K: 904 a64_insn = A64_AND_I(is64, dst, dst, imm); 905 if (a64_insn != AARCH64_BREAK_FAULT) { 906 emit(a64_insn, ctx); 907 } else { 908 emit_a64_mov_i(is64, tmp, imm, ctx); 909 emit(A64_AND(is64, dst, dst, tmp), ctx); 910 } 911 break; 912 case BPF_ALU | BPF_OR | BPF_K: 913 case BPF_ALU64 | BPF_OR | BPF_K: 914 a64_insn = A64_ORR_I(is64, dst, dst, imm); 915 if (a64_insn != AARCH64_BREAK_FAULT) { 916 emit(a64_insn, ctx); 917 } else { 918 emit_a64_mov_i(is64, tmp, imm, ctx); 919 emit(A64_ORR(is64, dst, dst, tmp), ctx); 920 } 921 break; 922 case BPF_ALU | BPF_XOR | BPF_K: 923 case BPF_ALU64 | BPF_XOR | BPF_K: 924 a64_insn = A64_EOR_I(is64, dst, dst, imm); 925 if (a64_insn != AARCH64_BREAK_FAULT) { 926 emit(a64_insn, ctx); 927 } else { 928 emit_a64_mov_i(is64, tmp, imm, ctx); 929 emit(A64_EOR(is64, dst, dst, tmp), ctx); 930 } 931 break; 932 case BPF_ALU | BPF_MUL | BPF_K: 933 case BPF_ALU64 | BPF_MUL | BPF_K: 934 emit_a64_mov_i(is64, tmp, imm, ctx); 935 emit(A64_MUL(is64, dst, dst, tmp), ctx); 936 break; 937 case BPF_ALU | BPF_DIV | BPF_K: 938 case BPF_ALU64 | BPF_DIV | BPF_K: 939 emit_a64_mov_i(is64, tmp, imm, ctx); 940 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 941 break; 942 case BPF_ALU | BPF_MOD | BPF_K: 943 case BPF_ALU64 | BPF_MOD | BPF_K: 944 emit_a64_mov_i(is64, tmp2, imm, ctx); 945 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 946 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 947 break; 948 case BPF_ALU | BPF_LSH | BPF_K: 949 case BPF_ALU64 | BPF_LSH | BPF_K: 950 emit(A64_LSL(is64, dst, dst, imm), ctx); 951 break; 952 case BPF_ALU | BPF_RSH | BPF_K: 953 case BPF_ALU64 | BPF_RSH | BPF_K: 954 emit(A64_LSR(is64, dst, dst, imm), ctx); 955 break; 956 case BPF_ALU | BPF_ARSH | BPF_K: 957 case BPF_ALU64 | BPF_ARSH | BPF_K: 958 emit(A64_ASR(is64, dst, dst, imm), ctx); 959 break; 960 961 /* JUMP off */ 962 case BPF_JMP | BPF_JA: 963 jmp_offset = bpf2a64_offset(i, off, ctx); 964 check_imm26(jmp_offset); 965 emit(A64_B(jmp_offset), ctx); 966 break; 967 /* IF (dst COND src) JUMP off */ 968 case BPF_JMP | BPF_JEQ | BPF_X: 969 case BPF_JMP | BPF_JGT | BPF_X: 970 case BPF_JMP | BPF_JLT | BPF_X: 971 case BPF_JMP | BPF_JGE | BPF_X: 972 case BPF_JMP | BPF_JLE | BPF_X: 973 case BPF_JMP | BPF_JNE | BPF_X: 974 case BPF_JMP | BPF_JSGT | BPF_X: 975 case BPF_JMP | BPF_JSLT | BPF_X: 976 case BPF_JMP | BPF_JSGE | BPF_X: 977 case BPF_JMP | BPF_JSLE | BPF_X: 978 case BPF_JMP32 | BPF_JEQ | BPF_X: 979 case BPF_JMP32 | BPF_JGT | BPF_X: 980 case BPF_JMP32 | BPF_JLT | BPF_X: 981 case BPF_JMP32 | BPF_JGE | BPF_X: 982 case BPF_JMP32 | BPF_JLE | BPF_X: 983 case BPF_JMP32 | BPF_JNE | BPF_X: 984 case BPF_JMP32 | BPF_JSGT | BPF_X: 985 case BPF_JMP32 | BPF_JSLT | BPF_X: 986 case BPF_JMP32 | BPF_JSGE | BPF_X: 987 case BPF_JMP32 | BPF_JSLE | BPF_X: 988 emit(A64_CMP(is64, dst, src), ctx); 989 emit_cond_jmp: 990 jmp_offset = bpf2a64_offset(i, off, ctx); 991 check_imm19(jmp_offset); 992 switch (BPF_OP(code)) { 993 case BPF_JEQ: 994 jmp_cond = A64_COND_EQ; 995 break; 996 case BPF_JGT: 997 jmp_cond = A64_COND_HI; 998 break; 999 case BPF_JLT: 1000 jmp_cond = A64_COND_CC; 1001 break; 1002 case BPF_JGE: 1003 jmp_cond = A64_COND_CS; 1004 break; 1005 case BPF_JLE: 1006 jmp_cond = A64_COND_LS; 1007 break; 1008 case BPF_JSET: 1009 case BPF_JNE: 1010 jmp_cond = A64_COND_NE; 1011 break; 1012 case BPF_JSGT: 1013 jmp_cond = A64_COND_GT; 1014 break; 1015 case BPF_JSLT: 1016 jmp_cond = A64_COND_LT; 1017 break; 1018 case BPF_JSGE: 1019 jmp_cond = A64_COND_GE; 1020 break; 1021 case BPF_JSLE: 1022 jmp_cond = A64_COND_LE; 1023 break; 1024 default: 1025 return -EFAULT; 1026 } 1027 emit(A64_B_(jmp_cond, jmp_offset), ctx); 1028 break; 1029 case BPF_JMP | BPF_JSET | BPF_X: 1030 case BPF_JMP32 | BPF_JSET | BPF_X: 1031 emit(A64_TST(is64, dst, src), ctx); 1032 goto emit_cond_jmp; 1033 /* IF (dst COND imm) JUMP off */ 1034 case BPF_JMP | BPF_JEQ | BPF_K: 1035 case BPF_JMP | BPF_JGT | BPF_K: 1036 case BPF_JMP | BPF_JLT | BPF_K: 1037 case BPF_JMP | BPF_JGE | BPF_K: 1038 case BPF_JMP | BPF_JLE | BPF_K: 1039 case BPF_JMP | BPF_JNE | BPF_K: 1040 case BPF_JMP | BPF_JSGT | BPF_K: 1041 case BPF_JMP | BPF_JSLT | BPF_K: 1042 case BPF_JMP | BPF_JSGE | BPF_K: 1043 case BPF_JMP | BPF_JSLE | BPF_K: 1044 case BPF_JMP32 | BPF_JEQ | BPF_K: 1045 case BPF_JMP32 | BPF_JGT | BPF_K: 1046 case BPF_JMP32 | BPF_JLT | BPF_K: 1047 case BPF_JMP32 | BPF_JGE | BPF_K: 1048 case BPF_JMP32 | BPF_JLE | BPF_K: 1049 case BPF_JMP32 | BPF_JNE | BPF_K: 1050 case BPF_JMP32 | BPF_JSGT | BPF_K: 1051 case BPF_JMP32 | BPF_JSLT | BPF_K: 1052 case BPF_JMP32 | BPF_JSGE | BPF_K: 1053 case BPF_JMP32 | BPF_JSLE | BPF_K: 1054 if (is_addsub_imm(imm)) { 1055 emit(A64_CMP_I(is64, dst, imm), ctx); 1056 } else if (is_addsub_imm(-imm)) { 1057 emit(A64_CMN_I(is64, dst, -imm), ctx); 1058 } else { 1059 emit_a64_mov_i(is64, tmp, imm, ctx); 1060 emit(A64_CMP(is64, dst, tmp), ctx); 1061 } 1062 goto emit_cond_jmp; 1063 case BPF_JMP | BPF_JSET | BPF_K: 1064 case BPF_JMP32 | BPF_JSET | BPF_K: 1065 a64_insn = A64_TST_I(is64, dst, imm); 1066 if (a64_insn != AARCH64_BREAK_FAULT) { 1067 emit(a64_insn, ctx); 1068 } else { 1069 emit_a64_mov_i(is64, tmp, imm, ctx); 1070 emit(A64_TST(is64, dst, tmp), ctx); 1071 } 1072 goto emit_cond_jmp; 1073 /* function call */ 1074 case BPF_JMP | BPF_CALL: 1075 { 1076 const u8 r0 = bpf2a64[BPF_REG_0]; 1077 bool func_addr_fixed; 1078 u64 func_addr; 1079 1080 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1081 &func_addr, &func_addr_fixed); 1082 if (ret < 0) 1083 return ret; 1084 emit_call(func_addr, ctx); 1085 emit(A64_MOV(1, r0, A64_R(0)), ctx); 1086 break; 1087 } 1088 /* tail call */ 1089 case BPF_JMP | BPF_TAIL_CALL: 1090 if (emit_bpf_tail_call(ctx)) 1091 return -EFAULT; 1092 break; 1093 /* function return */ 1094 case BPF_JMP | BPF_EXIT: 1095 /* Optimization: when last instruction is EXIT, 1096 simply fallthrough to epilogue. */ 1097 if (i == ctx->prog->len - 1) 1098 break; 1099 jmp_offset = epilogue_offset(ctx); 1100 check_imm26(jmp_offset); 1101 emit(A64_B(jmp_offset), ctx); 1102 break; 1103 1104 /* dst = imm64 */ 1105 case BPF_LD | BPF_IMM | BPF_DW: 1106 { 1107 const struct bpf_insn insn1 = insn[1]; 1108 u64 imm64; 1109 1110 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1111 if (bpf_pseudo_func(insn)) 1112 emit_addr_mov_i64(dst, imm64, ctx); 1113 else 1114 emit_a64_mov_i64(dst, imm64, ctx); 1115 1116 return 1; 1117 } 1118 1119 /* LDX: dst = *(size *)(src + off) */ 1120 case BPF_LDX | BPF_MEM | BPF_W: 1121 case BPF_LDX | BPF_MEM | BPF_H: 1122 case BPF_LDX | BPF_MEM | BPF_B: 1123 case BPF_LDX | BPF_MEM | BPF_DW: 1124 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1125 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1126 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1127 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1128 if (ctx->fpb_offset > 0 && src == fp) { 1129 src_adj = fpb; 1130 off_adj = off + ctx->fpb_offset; 1131 } else { 1132 src_adj = src; 1133 off_adj = off; 1134 } 1135 switch (BPF_SIZE(code)) { 1136 case BPF_W: 1137 if (is_lsi_offset(off_adj, 2)) { 1138 emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1139 } else { 1140 emit_a64_mov_i(1, tmp, off, ctx); 1141 emit(A64_LDR32(dst, src, tmp), ctx); 1142 } 1143 break; 1144 case BPF_H: 1145 if (is_lsi_offset(off_adj, 1)) { 1146 emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1147 } else { 1148 emit_a64_mov_i(1, tmp, off, ctx); 1149 emit(A64_LDRH(dst, src, tmp), ctx); 1150 } 1151 break; 1152 case BPF_B: 1153 if (is_lsi_offset(off_adj, 0)) { 1154 emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1155 } else { 1156 emit_a64_mov_i(1, tmp, off, ctx); 1157 emit(A64_LDRB(dst, src, tmp), ctx); 1158 } 1159 break; 1160 case BPF_DW: 1161 if (is_lsi_offset(off_adj, 3)) { 1162 emit(A64_LDR64I(dst, src_adj, off_adj), ctx); 1163 } else { 1164 emit_a64_mov_i(1, tmp, off, ctx); 1165 emit(A64_LDR64(dst, src, tmp), ctx); 1166 } 1167 break; 1168 } 1169 1170 ret = add_exception_handler(insn, ctx, dst); 1171 if (ret) 1172 return ret; 1173 break; 1174 1175 /* speculation barrier */ 1176 case BPF_ST | BPF_NOSPEC: 1177 /* 1178 * Nothing required here. 1179 * 1180 * In case of arm64, we rely on the firmware mitigation of 1181 * Speculative Store Bypass as controlled via the ssbd kernel 1182 * parameter. Whenever the mitigation is enabled, it works 1183 * for all of the kernel code with no need to provide any 1184 * additional instructions. 1185 */ 1186 break; 1187 1188 /* ST: *(size *)(dst + off) = imm */ 1189 case BPF_ST | BPF_MEM | BPF_W: 1190 case BPF_ST | BPF_MEM | BPF_H: 1191 case BPF_ST | BPF_MEM | BPF_B: 1192 case BPF_ST | BPF_MEM | BPF_DW: 1193 if (ctx->fpb_offset > 0 && dst == fp) { 1194 dst_adj = fpb; 1195 off_adj = off + ctx->fpb_offset; 1196 } else { 1197 dst_adj = dst; 1198 off_adj = off; 1199 } 1200 /* Load imm to a register then store it */ 1201 emit_a64_mov_i(1, tmp, imm, ctx); 1202 switch (BPF_SIZE(code)) { 1203 case BPF_W: 1204 if (is_lsi_offset(off_adj, 2)) { 1205 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); 1206 } else { 1207 emit_a64_mov_i(1, tmp2, off, ctx); 1208 emit(A64_STR32(tmp, dst, tmp2), ctx); 1209 } 1210 break; 1211 case BPF_H: 1212 if (is_lsi_offset(off_adj, 1)) { 1213 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); 1214 } else { 1215 emit_a64_mov_i(1, tmp2, off, ctx); 1216 emit(A64_STRH(tmp, dst, tmp2), ctx); 1217 } 1218 break; 1219 case BPF_B: 1220 if (is_lsi_offset(off_adj, 0)) { 1221 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); 1222 } else { 1223 emit_a64_mov_i(1, tmp2, off, ctx); 1224 emit(A64_STRB(tmp, dst, tmp2), ctx); 1225 } 1226 break; 1227 case BPF_DW: 1228 if (is_lsi_offset(off_adj, 3)) { 1229 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); 1230 } else { 1231 emit_a64_mov_i(1, tmp2, off, ctx); 1232 emit(A64_STR64(tmp, dst, tmp2), ctx); 1233 } 1234 break; 1235 } 1236 break; 1237 1238 /* STX: *(size *)(dst + off) = src */ 1239 case BPF_STX | BPF_MEM | BPF_W: 1240 case BPF_STX | BPF_MEM | BPF_H: 1241 case BPF_STX | BPF_MEM | BPF_B: 1242 case BPF_STX | BPF_MEM | BPF_DW: 1243 if (ctx->fpb_offset > 0 && dst == fp) { 1244 dst_adj = fpb; 1245 off_adj = off + ctx->fpb_offset; 1246 } else { 1247 dst_adj = dst; 1248 off_adj = off; 1249 } 1250 switch (BPF_SIZE(code)) { 1251 case BPF_W: 1252 if (is_lsi_offset(off_adj, 2)) { 1253 emit(A64_STR32I(src, dst_adj, off_adj), ctx); 1254 } else { 1255 emit_a64_mov_i(1, tmp, off, ctx); 1256 emit(A64_STR32(src, dst, tmp), ctx); 1257 } 1258 break; 1259 case BPF_H: 1260 if (is_lsi_offset(off_adj, 1)) { 1261 emit(A64_STRHI(src, dst_adj, off_adj), ctx); 1262 } else { 1263 emit_a64_mov_i(1, tmp, off, ctx); 1264 emit(A64_STRH(src, dst, tmp), ctx); 1265 } 1266 break; 1267 case BPF_B: 1268 if (is_lsi_offset(off_adj, 0)) { 1269 emit(A64_STRBI(src, dst_adj, off_adj), ctx); 1270 } else { 1271 emit_a64_mov_i(1, tmp, off, ctx); 1272 emit(A64_STRB(src, dst, tmp), ctx); 1273 } 1274 break; 1275 case BPF_DW: 1276 if (is_lsi_offset(off_adj, 3)) { 1277 emit(A64_STR64I(src, dst_adj, off_adj), ctx); 1278 } else { 1279 emit_a64_mov_i(1, tmp, off, ctx); 1280 emit(A64_STR64(src, dst, tmp), ctx); 1281 } 1282 break; 1283 } 1284 break; 1285 1286 case BPF_STX | BPF_ATOMIC | BPF_W: 1287 case BPF_STX | BPF_ATOMIC | BPF_DW: 1288 if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1289 ret = emit_lse_atomic(insn, ctx); 1290 else 1291 ret = emit_ll_sc_atomic(insn, ctx); 1292 if (ret) 1293 return ret; 1294 break; 1295 1296 default: 1297 pr_err_once("unknown opcode %02x\n", code); 1298 return -EINVAL; 1299 } 1300 1301 return 0; 1302 } 1303 1304 /* 1305 * Return 0 if FP may change at runtime, otherwise find the minimum negative 1306 * offset to FP, converts it to positive number, and align down to 8 bytes. 1307 */ 1308 static int find_fpb_offset(struct bpf_prog *prog) 1309 { 1310 int i; 1311 int offset = 0; 1312 1313 for (i = 0; i < prog->len; i++) { 1314 const struct bpf_insn *insn = &prog->insnsi[i]; 1315 const u8 class = BPF_CLASS(insn->code); 1316 const u8 mode = BPF_MODE(insn->code); 1317 const u8 src = insn->src_reg; 1318 const u8 dst = insn->dst_reg; 1319 const s32 imm = insn->imm; 1320 const s16 off = insn->off; 1321 1322 switch (class) { 1323 case BPF_STX: 1324 case BPF_ST: 1325 /* fp holds atomic operation result */ 1326 if (class == BPF_STX && mode == BPF_ATOMIC && 1327 ((imm == BPF_XCHG || 1328 imm == (BPF_FETCH | BPF_ADD) || 1329 imm == (BPF_FETCH | BPF_AND) || 1330 imm == (BPF_FETCH | BPF_XOR) || 1331 imm == (BPF_FETCH | BPF_OR)) && 1332 src == BPF_REG_FP)) 1333 return 0; 1334 1335 if (mode == BPF_MEM && dst == BPF_REG_FP && 1336 off < offset) 1337 offset = insn->off; 1338 break; 1339 1340 case BPF_JMP32: 1341 case BPF_JMP: 1342 break; 1343 1344 case BPF_LDX: 1345 case BPF_LD: 1346 /* fp holds load result */ 1347 if (dst == BPF_REG_FP) 1348 return 0; 1349 1350 if (class == BPF_LDX && mode == BPF_MEM && 1351 src == BPF_REG_FP && off < offset) 1352 offset = off; 1353 break; 1354 1355 case BPF_ALU: 1356 case BPF_ALU64: 1357 default: 1358 /* fp holds ALU result */ 1359 if (dst == BPF_REG_FP) 1360 return 0; 1361 } 1362 } 1363 1364 if (offset < 0) { 1365 /* 1366 * safely be converted to a positive 'int', since insn->off 1367 * is 's16' 1368 */ 1369 offset = -offset; 1370 /* align down to 8 bytes */ 1371 offset = ALIGN_DOWN(offset, 8); 1372 } 1373 1374 return offset; 1375 } 1376 1377 static int build_body(struct jit_ctx *ctx, bool extra_pass) 1378 { 1379 const struct bpf_prog *prog = ctx->prog; 1380 int i; 1381 1382 /* 1383 * - offset[0] offset of the end of prologue, 1384 * start of the 1st instruction. 1385 * - offset[1] - offset of the end of 1st instruction, 1386 * start of the 2nd instruction 1387 * [....] 1388 * - offset[3] - offset of the end of 3rd instruction, 1389 * start of 4th instruction 1390 */ 1391 for (i = 0; i < prog->len; i++) { 1392 const struct bpf_insn *insn = &prog->insnsi[i]; 1393 int ret; 1394 1395 if (ctx->image == NULL) 1396 ctx->offset[i] = ctx->idx; 1397 ret = build_insn(insn, ctx, extra_pass); 1398 if (ret > 0) { 1399 i++; 1400 if (ctx->image == NULL) 1401 ctx->offset[i] = ctx->idx; 1402 continue; 1403 } 1404 if (ret) 1405 return ret; 1406 } 1407 /* 1408 * offset is allocated with prog->len + 1 so fill in 1409 * the last element with the offset after the last 1410 * instruction (end of program) 1411 */ 1412 if (ctx->image == NULL) 1413 ctx->offset[i] = ctx->idx; 1414 1415 return 0; 1416 } 1417 1418 static int validate_code(struct jit_ctx *ctx) 1419 { 1420 int i; 1421 1422 for (i = 0; i < ctx->idx; i++) { 1423 u32 a64_insn = le32_to_cpu(ctx->image[i]); 1424 1425 if (a64_insn == AARCH64_BREAK_FAULT) 1426 return -1; 1427 } 1428 return 0; 1429 } 1430 1431 static int validate_ctx(struct jit_ctx *ctx) 1432 { 1433 if (validate_code(ctx)) 1434 return -1; 1435 1436 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) 1437 return -1; 1438 1439 return 0; 1440 } 1441 1442 static inline void bpf_flush_icache(void *start, void *end) 1443 { 1444 flush_icache_range((unsigned long)start, (unsigned long)end); 1445 } 1446 1447 struct arm64_jit_data { 1448 struct bpf_binary_header *header; 1449 u8 *image; 1450 struct jit_ctx ctx; 1451 }; 1452 1453 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1454 { 1455 int image_size, prog_size, extable_size, extable_align, extable_offset; 1456 struct bpf_prog *tmp, *orig_prog = prog; 1457 struct bpf_binary_header *header; 1458 struct arm64_jit_data *jit_data; 1459 bool was_classic = bpf_prog_was_classic(prog); 1460 bool tmp_blinded = false; 1461 bool extra_pass = false; 1462 struct jit_ctx ctx; 1463 u8 *image_ptr; 1464 1465 if (!prog->jit_requested) 1466 return orig_prog; 1467 1468 tmp = bpf_jit_blind_constants(prog); 1469 /* If blinding was requested and we failed during blinding, 1470 * we must fall back to the interpreter. 1471 */ 1472 if (IS_ERR(tmp)) 1473 return orig_prog; 1474 if (tmp != prog) { 1475 tmp_blinded = true; 1476 prog = tmp; 1477 } 1478 1479 jit_data = prog->aux->jit_data; 1480 if (!jit_data) { 1481 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1482 if (!jit_data) { 1483 prog = orig_prog; 1484 goto out; 1485 } 1486 prog->aux->jit_data = jit_data; 1487 } 1488 if (jit_data->ctx.offset) { 1489 ctx = jit_data->ctx; 1490 image_ptr = jit_data->image; 1491 header = jit_data->header; 1492 extra_pass = true; 1493 prog_size = sizeof(u32) * ctx.idx; 1494 goto skip_init_ctx; 1495 } 1496 memset(&ctx, 0, sizeof(ctx)); 1497 ctx.prog = prog; 1498 1499 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); 1500 if (ctx.offset == NULL) { 1501 prog = orig_prog; 1502 goto out_off; 1503 } 1504 1505 ctx.fpb_offset = find_fpb_offset(prog); 1506 1507 /* 1508 * 1. Initial fake pass to compute ctx->idx and ctx->offset. 1509 * 1510 * BPF line info needs ctx->offset[i] to be the offset of 1511 * instruction[i] in jited image, so build prologue first. 1512 */ 1513 if (build_prologue(&ctx, was_classic)) { 1514 prog = orig_prog; 1515 goto out_off; 1516 } 1517 1518 if (build_body(&ctx, extra_pass)) { 1519 prog = orig_prog; 1520 goto out_off; 1521 } 1522 1523 ctx.epilogue_offset = ctx.idx; 1524 build_epilogue(&ctx); 1525 build_plt(&ctx); 1526 1527 extable_align = __alignof__(struct exception_table_entry); 1528 extable_size = prog->aux->num_exentries * 1529 sizeof(struct exception_table_entry); 1530 1531 /* Now we know the actual image size. */ 1532 prog_size = sizeof(u32) * ctx.idx; 1533 /* also allocate space for plt target */ 1534 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); 1535 image_size = extable_offset + extable_size; 1536 header = bpf_jit_binary_alloc(image_size, &image_ptr, 1537 sizeof(u32), jit_fill_hole); 1538 if (header == NULL) { 1539 prog = orig_prog; 1540 goto out_off; 1541 } 1542 1543 /* 2. Now, the actual pass. */ 1544 1545 ctx.image = (__le32 *)image_ptr; 1546 if (extable_size) 1547 prog->aux->extable = (void *)image_ptr + extable_offset; 1548 skip_init_ctx: 1549 ctx.idx = 0; 1550 ctx.exentry_idx = 0; 1551 1552 build_prologue(&ctx, was_classic); 1553 1554 if (build_body(&ctx, extra_pass)) { 1555 bpf_jit_binary_free(header); 1556 prog = orig_prog; 1557 goto out_off; 1558 } 1559 1560 build_epilogue(&ctx); 1561 build_plt(&ctx); 1562 1563 /* 3. Extra pass to validate JITed code. */ 1564 if (validate_ctx(&ctx)) { 1565 bpf_jit_binary_free(header); 1566 prog = orig_prog; 1567 goto out_off; 1568 } 1569 1570 /* And we're done. */ 1571 if (bpf_jit_enable > 1) 1572 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 1573 1574 bpf_flush_icache(header, ctx.image + ctx.idx); 1575 1576 if (!prog->is_func || extra_pass) { 1577 if (extra_pass && ctx.idx != jit_data->ctx.idx) { 1578 pr_err_once("multi-func JIT bug %d != %d\n", 1579 ctx.idx, jit_data->ctx.idx); 1580 bpf_jit_binary_free(header); 1581 prog->bpf_func = NULL; 1582 prog->jited = 0; 1583 prog->jited_len = 0; 1584 goto out_off; 1585 } 1586 bpf_jit_binary_lock_ro(header); 1587 } else { 1588 jit_data->ctx = ctx; 1589 jit_data->image = image_ptr; 1590 jit_data->header = header; 1591 } 1592 prog->bpf_func = (void *)ctx.image; 1593 prog->jited = 1; 1594 prog->jited_len = prog_size; 1595 1596 if (!prog->is_func || extra_pass) { 1597 int i; 1598 1599 /* offset[prog->len] is the size of program */ 1600 for (i = 0; i <= prog->len; i++) 1601 ctx.offset[i] *= AARCH64_INSN_SIZE; 1602 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); 1603 out_off: 1604 kvfree(ctx.offset); 1605 kfree(jit_data); 1606 prog->aux->jit_data = NULL; 1607 } 1608 out: 1609 if (tmp_blinded) 1610 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1611 tmp : orig_prog); 1612 return prog; 1613 } 1614 1615 bool bpf_jit_supports_kfunc_call(void) 1616 { 1617 return true; 1618 } 1619 1620 u64 bpf_jit_alloc_exec_limit(void) 1621 { 1622 return VMALLOC_END - VMALLOC_START; 1623 } 1624 1625 void *bpf_jit_alloc_exec(unsigned long size) 1626 { 1627 /* Memory is intended to be executable, reset the pointer tag. */ 1628 return kasan_reset_tag(vmalloc(size)); 1629 } 1630 1631 void bpf_jit_free_exec(void *addr) 1632 { 1633 return vfree(addr); 1634 } 1635 1636 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 1637 bool bpf_jit_supports_subprog_tailcalls(void) 1638 { 1639 return true; 1640 } 1641 1642 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, 1643 int args_off, int retval_off, int run_ctx_off, 1644 bool save_ret) 1645 { 1646 __le32 *branch; 1647 u64 enter_prog; 1648 u64 exit_prog; 1649 struct bpf_prog *p = l->link.prog; 1650 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 1651 1652 if (p->aux->sleepable) { 1653 enter_prog = (u64)__bpf_prog_enter_sleepable; 1654 exit_prog = (u64)__bpf_prog_exit_sleepable; 1655 } else { 1656 enter_prog = (u64)__bpf_prog_enter; 1657 exit_prog = (u64)__bpf_prog_exit; 1658 } 1659 1660 if (l->cookie == 0) { 1661 /* if cookie is zero, one instruction is enough to store it */ 1662 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); 1663 } else { 1664 emit_a64_mov_i64(A64_R(10), l->cookie, ctx); 1665 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), 1666 ctx); 1667 } 1668 1669 /* save p to callee saved register x19 to avoid loading p with mov_i64 1670 * each time. 1671 */ 1672 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); 1673 1674 /* arg1: prog */ 1675 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1676 /* arg2: &run_ctx */ 1677 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); 1678 1679 emit_call(enter_prog, ctx); 1680 1681 /* if (__bpf_prog_enter(prog) == 0) 1682 * goto skip_exec_of_prog; 1683 */ 1684 branch = ctx->image + ctx->idx; 1685 emit(A64_NOP, ctx); 1686 1687 /* save return value to callee saved register x20 */ 1688 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); 1689 1690 emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); 1691 if (!p->jited) 1692 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); 1693 1694 emit_call((const u64)p->bpf_func, ctx); 1695 1696 if (save_ret) 1697 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 1698 1699 if (ctx->image) { 1700 int offset = &ctx->image[ctx->idx] - branch; 1701 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); 1702 } 1703 1704 /* arg1: prog */ 1705 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 1706 /* arg2: start time */ 1707 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); 1708 /* arg3: &run_ctx */ 1709 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); 1710 1711 emit_call(exit_prog, ctx); 1712 } 1713 1714 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, 1715 int args_off, int retval_off, int run_ctx_off, 1716 __le32 **branches) 1717 { 1718 int i; 1719 1720 /* The first fmod_ret program will receive a garbage return value. 1721 * Set this to 0 to avoid confusing the program. 1722 */ 1723 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); 1724 for (i = 0; i < tl->nr_links; i++) { 1725 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, 1726 run_ctx_off, true); 1727 /* if (*(u64 *)(sp + retval_off) != 0) 1728 * goto do_fexit; 1729 */ 1730 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); 1731 /* Save the location of branch, and generate a nop. 1732 * This nop will be replaced with a cbnz later. 1733 */ 1734 branches[i] = ctx->image + ctx->idx; 1735 emit(A64_NOP, ctx); 1736 } 1737 } 1738 1739 static void save_args(struct jit_ctx *ctx, int args_off, int nargs) 1740 { 1741 int i; 1742 1743 for (i = 0; i < nargs; i++) { 1744 emit(A64_STR64I(i, A64_SP, args_off), ctx); 1745 args_off += 8; 1746 } 1747 } 1748 1749 static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) 1750 { 1751 int i; 1752 1753 for (i = 0; i < nargs; i++) { 1754 emit(A64_LDR64I(i, A64_SP, args_off), ctx); 1755 args_off += 8; 1756 } 1757 } 1758 1759 /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). 1760 * 1761 * bpf prog and function entry before bpf trampoline hooked: 1762 * mov x9, lr 1763 * nop 1764 * 1765 * bpf prog and function entry after bpf trampoline hooked: 1766 * mov x9, lr 1767 * bl <bpf_trampoline or plt> 1768 * 1769 */ 1770 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, 1771 struct bpf_tramp_links *tlinks, void *orig_call, 1772 int nargs, u32 flags) 1773 { 1774 int i; 1775 int stack_size; 1776 int retaddr_off; 1777 int regs_off; 1778 int retval_off; 1779 int args_off; 1780 int nargs_off; 1781 int ip_off; 1782 int run_ctx_off; 1783 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 1784 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 1785 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 1786 bool save_ret; 1787 __le32 **branches = NULL; 1788 1789 /* trampoline stack layout: 1790 * [ parent ip ] 1791 * [ FP ] 1792 * SP + retaddr_off [ self ip ] 1793 * [ FP ] 1794 * 1795 * [ padding ] align SP to multiples of 16 1796 * 1797 * [ x20 ] callee saved reg x20 1798 * SP + regs_off [ x19 ] callee saved reg x19 1799 * 1800 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 1801 * BPF_TRAMP_F_RET_FENTRY_RET 1802 * 1803 * [ argN ] 1804 * [ ... ] 1805 * SP + args_off [ arg1 ] 1806 * 1807 * SP + nargs_off [ args count ] 1808 * 1809 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag 1810 * 1811 * SP + run_ctx_off [ bpf_tramp_run_ctx ] 1812 */ 1813 1814 stack_size = 0; 1815 run_ctx_off = stack_size; 1816 /* room for bpf_tramp_run_ctx */ 1817 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 1818 1819 ip_off = stack_size; 1820 /* room for IP address argument */ 1821 if (flags & BPF_TRAMP_F_IP_ARG) 1822 stack_size += 8; 1823 1824 nargs_off = stack_size; 1825 /* room for args count */ 1826 stack_size += 8; 1827 1828 args_off = stack_size; 1829 /* room for args */ 1830 stack_size += nargs * 8; 1831 1832 /* room for return value */ 1833 retval_off = stack_size; 1834 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 1835 if (save_ret) 1836 stack_size += 8; 1837 1838 /* room for callee saved registers, currently x19 and x20 are used */ 1839 regs_off = stack_size; 1840 stack_size += 16; 1841 1842 /* round up to multiples of 16 to avoid SPAlignmentFault */ 1843 stack_size = round_up(stack_size, 16); 1844 1845 /* return address locates above FP */ 1846 retaddr_off = stack_size + 8; 1847 1848 /* bpf trampoline may be invoked by 3 instruction types: 1849 * 1. bl, attached to bpf prog or kernel function via short jump 1850 * 2. br, attached to bpf prog or kernel function via long jump 1851 * 3. blr, working as a function pointer, used by struct_ops. 1852 * So BTI_JC should used here to support both br and blr. 1853 */ 1854 emit_bti(A64_BTI_JC, ctx); 1855 1856 /* frame for parent function */ 1857 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); 1858 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 1859 1860 /* frame for patched function */ 1861 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 1862 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 1863 1864 /* allocate stack space */ 1865 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); 1866 1867 if (flags & BPF_TRAMP_F_IP_ARG) { 1868 /* save ip address of the traced function */ 1869 emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); 1870 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); 1871 } 1872 1873 /* save args count*/ 1874 emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx); 1875 emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx); 1876 1877 /* save args */ 1878 save_args(ctx, args_off, nargs); 1879 1880 /* save callee saved registers */ 1881 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); 1882 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 1883 1884 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1885 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); 1886 emit_call((const u64)__bpf_tramp_enter, ctx); 1887 } 1888 1889 for (i = 0; i < fentry->nr_links; i++) 1890 invoke_bpf_prog(ctx, fentry->links[i], args_off, 1891 retval_off, run_ctx_off, 1892 flags & BPF_TRAMP_F_RET_FENTRY_RET); 1893 1894 if (fmod_ret->nr_links) { 1895 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), 1896 GFP_KERNEL); 1897 if (!branches) 1898 return -ENOMEM; 1899 1900 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, 1901 run_ctx_off, branches); 1902 } 1903 1904 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1905 restore_args(ctx, args_off, nargs); 1906 /* call original func */ 1907 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); 1908 emit(A64_BLR(A64_R(10)), ctx); 1909 /* store return value */ 1910 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 1911 /* reserve a nop for bpf_tramp_image_put */ 1912 im->ip_after_call = ctx->image + ctx->idx; 1913 emit(A64_NOP, ctx); 1914 } 1915 1916 /* update the branches saved in invoke_bpf_mod_ret with cbnz */ 1917 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { 1918 int offset = &ctx->image[ctx->idx] - branches[i]; 1919 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); 1920 } 1921 1922 for (i = 0; i < fexit->nr_links; i++) 1923 invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, 1924 run_ctx_off, false); 1925 1926 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1927 im->ip_epilogue = ctx->image + ctx->idx; 1928 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); 1929 emit_call((const u64)__bpf_tramp_exit, ctx); 1930 } 1931 1932 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1933 restore_args(ctx, args_off, nargs); 1934 1935 /* restore callee saved register x19 and x20 */ 1936 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); 1937 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 1938 1939 if (save_ret) 1940 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); 1941 1942 /* reset SP */ 1943 emit(A64_MOV(1, A64_SP, A64_FP), ctx); 1944 1945 /* pop frames */ 1946 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 1947 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); 1948 1949 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 1950 /* skip patched function, return to parent */ 1951 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 1952 emit(A64_RET(A64_R(9)), ctx); 1953 } else { 1954 /* return to patched function */ 1955 emit(A64_MOV(1, A64_R(10), A64_LR), ctx); 1956 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 1957 emit(A64_RET(A64_R(10)), ctx); 1958 } 1959 1960 if (ctx->image) 1961 bpf_flush_icache(ctx->image, ctx->image + ctx->idx); 1962 1963 kfree(branches); 1964 1965 return ctx->idx; 1966 } 1967 1968 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, 1969 void *image_end, const struct btf_func_model *m, 1970 u32 flags, struct bpf_tramp_links *tlinks, 1971 void *orig_call) 1972 { 1973 int ret; 1974 int nargs = m->nr_args; 1975 int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; 1976 struct jit_ctx ctx = { 1977 .image = NULL, 1978 .idx = 0, 1979 }; 1980 1981 /* the first 8 arguments are passed by registers */ 1982 if (nargs > 8) 1983 return -ENOTSUPP; 1984 1985 ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); 1986 if (ret < 0) 1987 return ret; 1988 1989 if (ret > max_insns) 1990 return -EFBIG; 1991 1992 ctx.image = image; 1993 ctx.idx = 0; 1994 1995 jit_fill_hole(image, (unsigned int)(image_end - image)); 1996 ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); 1997 1998 if (ret > 0 && validate_code(&ctx) < 0) 1999 ret = -EINVAL; 2000 2001 if (ret > 0) 2002 ret *= AARCH64_INSN_SIZE; 2003 2004 return ret; 2005 } 2006 2007 static bool is_long_jump(void *ip, void *target) 2008 { 2009 long offset; 2010 2011 /* NULL target means this is a NOP */ 2012 if (!target) 2013 return false; 2014 2015 offset = (long)target - (long)ip; 2016 return offset < -SZ_128M || offset >= SZ_128M; 2017 } 2018 2019 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, 2020 void *addr, void *plt, u32 *insn) 2021 { 2022 void *target; 2023 2024 if (!addr) { 2025 *insn = aarch64_insn_gen_nop(); 2026 return 0; 2027 } 2028 2029 if (is_long_jump(ip, addr)) 2030 target = plt; 2031 else 2032 target = addr; 2033 2034 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, 2035 (unsigned long)target, 2036 type); 2037 2038 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; 2039 } 2040 2041 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf 2042 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr 2043 * or @new_addr is NULL, the old or new instruction is NOP. 2044 * 2045 * When @ip is the bpf prog entry, a bpf trampoline is being attached or 2046 * detached. Since bpf trampoline and bpf prog are allocated separately with 2047 * vmalloc, the address distance may exceed 128MB, the maximum branch range. 2048 * So long jump should be handled. 2049 * 2050 * When a bpf prog is constructed, a plt pointing to empty trampoline 2051 * dummy_tramp is placed at the end: 2052 * 2053 * bpf_prog: 2054 * mov x9, lr 2055 * nop // patchsite 2056 * ... 2057 * ret 2058 * 2059 * plt: 2060 * ldr x10, target 2061 * br x10 2062 * target: 2063 * .quad dummy_tramp // plt target 2064 * 2065 * This is also the state when no trampoline is attached. 2066 * 2067 * When a short-jump bpf trampoline is attached, the patchsite is patched 2068 * to a bl instruction to the trampoline directly: 2069 * 2070 * bpf_prog: 2071 * mov x9, lr 2072 * bl <short-jump bpf trampoline address> // patchsite 2073 * ... 2074 * ret 2075 * 2076 * plt: 2077 * ldr x10, target 2078 * br x10 2079 * target: 2080 * .quad dummy_tramp // plt target 2081 * 2082 * When a long-jump bpf trampoline is attached, the plt target is filled with 2083 * the trampoline address and the patchsite is patched to a bl instruction to 2084 * the plt: 2085 * 2086 * bpf_prog: 2087 * mov x9, lr 2088 * bl plt // patchsite 2089 * ... 2090 * ret 2091 * 2092 * plt: 2093 * ldr x10, target 2094 * br x10 2095 * target: 2096 * .quad <long-jump bpf trampoline address> // plt target 2097 * 2098 * The dummy_tramp is used to prevent another CPU from jumping to unknown 2099 * locations during the patching process, making the patching process easier. 2100 */ 2101 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 2102 void *old_addr, void *new_addr) 2103 { 2104 int ret; 2105 u32 old_insn; 2106 u32 new_insn; 2107 u32 replaced; 2108 struct bpf_plt *plt = NULL; 2109 unsigned long size = 0UL; 2110 unsigned long offset = ~0UL; 2111 enum aarch64_insn_branch_type branch_type; 2112 char namebuf[KSYM_NAME_LEN]; 2113 void *image = NULL; 2114 u64 plt_target = 0ULL; 2115 bool poking_bpf_entry; 2116 2117 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) 2118 /* Only poking bpf text is supported. Since kernel function 2119 * entry is set up by ftrace, we reply on ftrace to poke kernel 2120 * functions. 2121 */ 2122 return -ENOTSUPP; 2123 2124 image = ip - offset; 2125 /* zero offset means we're poking bpf prog entry */ 2126 poking_bpf_entry = (offset == 0UL); 2127 2128 /* bpf prog entry, find plt and the real patchsite */ 2129 if (poking_bpf_entry) { 2130 /* plt locates at the end of bpf prog */ 2131 plt = image + size - PLT_TARGET_OFFSET; 2132 2133 /* skip to the nop instruction in bpf prog entry: 2134 * bti c // if BTI enabled 2135 * mov x9, x30 2136 * nop 2137 */ 2138 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; 2139 } 2140 2141 /* long jump is only possible at bpf prog entry */ 2142 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && 2143 !poking_bpf_entry)) 2144 return -EINVAL; 2145 2146 if (poke_type == BPF_MOD_CALL) 2147 branch_type = AARCH64_INSN_BRANCH_LINK; 2148 else 2149 branch_type = AARCH64_INSN_BRANCH_NOLINK; 2150 2151 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) 2152 return -EFAULT; 2153 2154 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) 2155 return -EFAULT; 2156 2157 if (is_long_jump(ip, new_addr)) 2158 plt_target = (u64)new_addr; 2159 else if (is_long_jump(ip, old_addr)) 2160 /* if the old target is a long jump and the new target is not, 2161 * restore the plt target to dummy_tramp, so there is always a 2162 * legal and harmless address stored in plt target, and we'll 2163 * never jump from plt to an unknown place. 2164 */ 2165 plt_target = (u64)&dummy_tramp; 2166 2167 if (plt_target) { 2168 /* non-zero plt_target indicates we're patching a bpf prog, 2169 * which is read only. 2170 */ 2171 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) 2172 return -EFAULT; 2173 WRITE_ONCE(plt->target, plt_target); 2174 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); 2175 /* since plt target points to either the new trampoline 2176 * or dummy_tramp, even if another CPU reads the old plt 2177 * target value before fetching the bl instruction to plt, 2178 * it will be brought back by dummy_tramp, so no barrier is 2179 * required here. 2180 */ 2181 } 2182 2183 /* if the old target and the new target are both long jumps, no 2184 * patching is required 2185 */ 2186 if (old_insn == new_insn) 2187 return 0; 2188 2189 mutex_lock(&text_mutex); 2190 if (aarch64_insn_read(ip, &replaced)) { 2191 ret = -EFAULT; 2192 goto out; 2193 } 2194 2195 if (replaced != old_insn) { 2196 ret = -EFAULT; 2197 goto out; 2198 } 2199 2200 /* We call aarch64_insn_patch_text_nosync() to replace instruction 2201 * atomically, so no other CPUs will fetch a half-new and half-old 2202 * instruction. But there is chance that another CPU executes the 2203 * old instruction after the patching operation finishes (e.g., 2204 * pipeline not flushed, or icache not synchronized yet). 2205 * 2206 * 1. when a new trampoline is attached, it is not a problem for 2207 * different CPUs to jump to different trampolines temporarily. 2208 * 2209 * 2. when an old trampoline is freed, we should wait for all other 2210 * CPUs to exit the trampoline and make sure the trampoline is no 2211 * longer reachable, since bpf_tramp_image_put() function already 2212 * uses percpu_ref and task-based rcu to do the sync, no need to call 2213 * the sync version here, see bpf_tramp_image_put() for details. 2214 */ 2215 ret = aarch64_insn_patch_text_nosync(ip, new_insn); 2216 out: 2217 mutex_unlock(&text_mutex); 2218 2219 return ret; 2220 } 2221