1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * BPF JIT compiler for ARM64
4 *
5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
6 */
7
8 #define pr_fmt(fmt) "bpf_jit: " fmt
9
10 #include <linux/bitfield.h>
11 #include <linux/bpf.h>
12 #include <linux/filter.h>
13 #include <linux/memory.h>
14 #include <linux/printk.h>
15 #include <linux/slab.h>
16
17 #include <asm/asm-extable.h>
18 #include <asm/byteorder.h>
19 #include <asm/cacheflush.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/insn.h>
22 #include <asm/patching.h>
23 #include <asm/set_memory.h>
24
25 #include "bpf_jit.h"
26
27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
29 #define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
31 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
32
33 #define check_imm(bits, imm) do { \
34 if ((((imm) > 0) && ((imm) >> (bits))) || \
35 (((imm) < 0) && (~(imm) >> (bits)))) { \
36 pr_info("[%2d] imm=%d(0x%x) out of range\n", \
37 i, imm, imm); \
38 return -EINVAL; \
39 } \
40 } while (0)
41 #define check_imm19(imm) check_imm(19, imm)
42 #define check_imm26(imm) check_imm(26, imm)
43
44 /* Map BPF registers to A64 registers */
45 static const int bpf2a64[] = {
46 /* return value from in-kernel function, and exit value from eBPF */
47 [BPF_REG_0] = A64_R(7),
48 /* arguments from eBPF program to in-kernel function */
49 [BPF_REG_1] = A64_R(0),
50 [BPF_REG_2] = A64_R(1),
51 [BPF_REG_3] = A64_R(2),
52 [BPF_REG_4] = A64_R(3),
53 [BPF_REG_5] = A64_R(4),
54 /* callee saved registers that in-kernel function will preserve */
55 [BPF_REG_6] = A64_R(19),
56 [BPF_REG_7] = A64_R(20),
57 [BPF_REG_8] = A64_R(21),
58 [BPF_REG_9] = A64_R(22),
59 /* read-only frame pointer to access stack */
60 [BPF_REG_FP] = A64_R(25),
61 /* temporary registers for BPF JIT */
62 [TMP_REG_1] = A64_R(10),
63 [TMP_REG_2] = A64_R(11),
64 [TMP_REG_3] = A64_R(12),
65 /* tail_call_cnt_ptr */
66 [TCCNT_PTR] = A64_R(26),
67 /* temporary register for blinding constants */
68 [BPF_REG_AX] = A64_R(9),
69 /* callee saved register for kern_vm_start address */
70 [ARENA_VM_START] = A64_R(28),
71 };
72
73 struct jit_ctx {
74 const struct bpf_prog *prog;
75 int idx;
76 int epilogue_offset;
77 int *offset;
78 int exentry_idx;
79 int nr_used_callee_reg;
80 u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */
81 __le32 *image;
82 __le32 *ro_image;
83 u32 stack_size;
84 u64 user_vm_start;
85 u64 arena_vm_start;
86 bool fp_used;
87 bool write;
88 };
89
90 struct bpf_plt {
91 u32 insn_ldr; /* load target */
92 u32 insn_br; /* branch to target */
93 u64 target; /* target value */
94 };
95
96 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
97 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
98
emit(const u32 insn,struct jit_ctx * ctx)99 static inline void emit(const u32 insn, struct jit_ctx *ctx)
100 {
101 if (ctx->image != NULL && ctx->write)
102 ctx->image[ctx->idx] = cpu_to_le32(insn);
103
104 ctx->idx++;
105 }
106
emit_a64_mov_i(const int is64,const int reg,const s32 val,struct jit_ctx * ctx)107 static inline void emit_a64_mov_i(const int is64, const int reg,
108 const s32 val, struct jit_ctx *ctx)
109 {
110 u16 hi = val >> 16;
111 u16 lo = val & 0xffff;
112
113 if (hi & 0x8000) {
114 if (hi == 0xffff) {
115 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
116 } else {
117 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
118 if (lo != 0xffff)
119 emit(A64_MOVK(is64, reg, lo, 0), ctx);
120 }
121 } else {
122 emit(A64_MOVZ(is64, reg, lo, 0), ctx);
123 if (hi)
124 emit(A64_MOVK(is64, reg, hi, 16), ctx);
125 }
126 }
127
i64_i16_blocks(const u64 val,bool inverse)128 static int i64_i16_blocks(const u64 val, bool inverse)
129 {
130 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
131 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
132 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
133 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
134 }
135
emit_a64_mov_i64(const int reg,const u64 val,struct jit_ctx * ctx)136 static inline void emit_a64_mov_i64(const int reg, const u64 val,
137 struct jit_ctx *ctx)
138 {
139 u64 nrm_tmp = val, rev_tmp = ~val;
140 bool inverse;
141 int shift;
142
143 if (!(nrm_tmp >> 32))
144 return emit_a64_mov_i(0, reg, (u32)val, ctx);
145
146 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
147 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
148 (fls64(nrm_tmp) - 1)), 16), 0);
149 if (inverse)
150 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
151 else
152 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
153 shift -= 16;
154 while (shift >= 0) {
155 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
156 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
157 shift -= 16;
158 }
159 }
160
emit_bti(u32 insn,struct jit_ctx * ctx)161 static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
162 {
163 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
164 emit(insn, ctx);
165 }
166
167 /*
168 * Kernel addresses in the vmalloc space use at most 48 bits, and the
169 * remaining bits are guaranteed to be 0x1. So we can compose the address
170 * with a fixed length movn/movk/movk sequence.
171 */
emit_addr_mov_i64(const int reg,const u64 val,struct jit_ctx * ctx)172 static inline void emit_addr_mov_i64(const int reg, const u64 val,
173 struct jit_ctx *ctx)
174 {
175 u64 tmp = val;
176 int shift = 0;
177
178 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
179 while (shift < 32) {
180 tmp >>= 16;
181 shift += 16;
182 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
183 }
184 }
185
should_emit_indirect_call(long target,const struct jit_ctx * ctx)186 static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx)
187 {
188 long offset;
189
190 /* when ctx->ro_image is not allocated or the target is unknown,
191 * emit indirect call
192 */
193 if (!ctx->ro_image || !target)
194 return true;
195
196 offset = target - (long)&ctx->ro_image[ctx->idx];
197 return offset < -SZ_128M || offset >= SZ_128M;
198 }
199
emit_direct_call(u64 target,struct jit_ctx * ctx)200 static void emit_direct_call(u64 target, struct jit_ctx *ctx)
201 {
202 u32 insn;
203 unsigned long pc;
204
205 pc = (unsigned long)&ctx->ro_image[ctx->idx];
206 insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK);
207 emit(insn, ctx);
208 }
209
emit_indirect_call(u64 target,struct jit_ctx * ctx)210 static void emit_indirect_call(u64 target, struct jit_ctx *ctx)
211 {
212 u8 tmp;
213
214 tmp = bpf2a64[TMP_REG_1];
215 emit_addr_mov_i64(tmp, target, ctx);
216 emit(A64_BLR(tmp), ctx);
217 }
218
emit_call(u64 target,struct jit_ctx * ctx)219 static void emit_call(u64 target, struct jit_ctx *ctx)
220 {
221 if (should_emit_indirect_call((long)target, ctx))
222 emit_indirect_call(target, ctx);
223 else
224 emit_direct_call(target, ctx);
225 }
226
bpf2a64_offset(int bpf_insn,int off,const struct jit_ctx * ctx)227 static inline int bpf2a64_offset(int bpf_insn, int off,
228 const struct jit_ctx *ctx)
229 {
230 /* BPF JMP offset is relative to the next instruction */
231 bpf_insn++;
232 /*
233 * Whereas arm64 branch instructions encode the offset
234 * from the branch itself, so we must subtract 1 from the
235 * instruction offset.
236 */
237 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
238 }
239
jit_fill_hole(void * area,unsigned int size)240 static void jit_fill_hole(void *area, unsigned int size)
241 {
242 __le32 *ptr;
243 /* We are guaranteed to have aligned memory. */
244 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
245 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
246 }
247
bpf_arch_text_invalidate(void * dst,size_t len)248 int bpf_arch_text_invalidate(void *dst, size_t len)
249 {
250 if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len))
251 return -EINVAL;
252
253 return 0;
254 }
255
epilogue_offset(const struct jit_ctx * ctx)256 static inline int epilogue_offset(const struct jit_ctx *ctx)
257 {
258 int to = ctx->epilogue_offset;
259 int from = ctx->idx;
260
261 return to - from;
262 }
263
is_addsub_imm(u32 imm)264 static bool is_addsub_imm(u32 imm)
265 {
266 /* Either imm12 or shifted imm12. */
267 return !(imm & ~0xfff) || !(imm & ~0xfff000);
268 }
269
270 /*
271 * There are 3 types of AArch64 LDR/STR (immediate) instruction:
272 * Post-index, Pre-index, Unsigned offset.
273 *
274 * For BPF ldr/str, the "unsigned offset" type is sufficient.
275 *
276 * "Unsigned offset" type LDR(immediate) format:
277 *
278 * 3 2 1 0
279 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
280 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
281 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt |
282 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
283 * scale
284 *
285 * "Unsigned offset" type STR(immediate) format:
286 * 3 2 1 0
287 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
288 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
289 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt |
290 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
291 * scale
292 *
293 * The offset is calculated from imm12 and scale in the following way:
294 *
295 * offset = (u64)imm12 << scale
296 */
is_lsi_offset(int offset,int scale)297 static bool is_lsi_offset(int offset, int scale)
298 {
299 if (offset < 0)
300 return false;
301
302 if (offset > (0xFFF << scale))
303 return false;
304
305 if (offset & ((1 << scale) - 1))
306 return false;
307
308 return true;
309 }
310
311 /* generated main prog prologue:
312 * bti c // if CONFIG_ARM64_BTI_KERNEL
313 * mov x9, lr
314 * nop // POKE_OFFSET
315 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
316 * stp x29, lr, [sp, #-16]!
317 * mov x29, sp
318 * stp xzr, x26, [sp, #-16]!
319 * mov x26, sp
320 * // PROLOGUE_OFFSET
321 * // save callee-saved registers
322 */
prepare_bpf_tail_call_cnt(struct jit_ctx * ctx)323 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
324 {
325 const bool is_main_prog = !bpf_is_subprog(ctx->prog);
326 const u8 ptr = bpf2a64[TCCNT_PTR];
327
328 if (is_main_prog) {
329 /* Initialize tail_call_cnt. */
330 emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx);
331 emit(A64_MOV(1, ptr, A64_SP), ctx);
332 } else
333 emit(A64_PUSH(ptr, ptr, A64_SP), ctx);
334 }
335
find_used_callee_regs(struct jit_ctx * ctx)336 static void find_used_callee_regs(struct jit_ctx *ctx)
337 {
338 int i;
339 const struct bpf_prog *prog = ctx->prog;
340 const struct bpf_insn *insn = &prog->insnsi[0];
341 int reg_used = 0;
342
343 for (i = 0; i < prog->len; i++, insn++) {
344 if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
345 reg_used |= 1;
346
347 if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
348 reg_used |= 2;
349
350 if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
351 reg_used |= 4;
352
353 if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
354 reg_used |= 8;
355
356 if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) {
357 ctx->fp_used = true;
358 reg_used |= 16;
359 }
360 }
361
362 i = 0;
363 if (reg_used & 1)
364 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6];
365
366 if (reg_used & 2)
367 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7];
368
369 if (reg_used & 4)
370 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8];
371
372 if (reg_used & 8)
373 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
374
375 if (reg_used & 16)
376 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
377
378 if (ctx->arena_vm_start)
379 ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
380
381 ctx->nr_used_callee_reg = i;
382 }
383
384 /* Save callee-saved registers */
push_callee_regs(struct jit_ctx * ctx)385 static void push_callee_regs(struct jit_ctx *ctx)
386 {
387 int reg1, reg2, i;
388
389 /*
390 * Program acting as exception boundary should save all ARM64
391 * Callee-saved registers as the exception callback needs to recover
392 * all ARM64 Callee-saved registers in its epilogue.
393 */
394 if (ctx->prog->aux->exception_boundary) {
395 emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx);
396 emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx);
397 emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
398 emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
399 emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
400 } else {
401 find_used_callee_regs(ctx);
402 for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
403 reg1 = ctx->used_callee_reg[i];
404 reg2 = ctx->used_callee_reg[i + 1];
405 emit(A64_PUSH(reg1, reg2, A64_SP), ctx);
406 }
407 if (i < ctx->nr_used_callee_reg) {
408 reg1 = ctx->used_callee_reg[i];
409 /* keep SP 16-byte aligned */
410 emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx);
411 }
412 }
413 }
414
415 /* Restore callee-saved registers */
pop_callee_regs(struct jit_ctx * ctx)416 static void pop_callee_regs(struct jit_ctx *ctx)
417 {
418 struct bpf_prog_aux *aux = ctx->prog->aux;
419 int reg1, reg2, i;
420
421 /*
422 * Program acting as exception boundary pushes R23 and R24 in addition
423 * to BPF callee-saved registers. Exception callback uses the boundary
424 * program's stack frame, so recover these extra registers in the above
425 * two cases.
426 */
427 if (aux->exception_boundary || aux->exception_cb) {
428 emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx);
429 emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx);
430 emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
431 emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx);
432 emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx);
433 } else {
434 i = ctx->nr_used_callee_reg - 1;
435 if (ctx->nr_used_callee_reg % 2 != 0) {
436 reg1 = ctx->used_callee_reg[i];
437 emit(A64_POP(reg1, A64_ZR, A64_SP), ctx);
438 i--;
439 }
440 while (i > 0) {
441 reg1 = ctx->used_callee_reg[i - 1];
442 reg2 = ctx->used_callee_reg[i];
443 emit(A64_POP(reg1, reg2, A64_SP), ctx);
444 i -= 2;
445 }
446 }
447 }
448
449 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
450 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
451
452 /* Offset of nop instruction in bpf prog entry to be poked */
453 #define POKE_OFFSET (BTI_INSNS + 1)
454
455 /* Tail call offset to jump into */
456 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4)
457
build_prologue(struct jit_ctx * ctx,bool ebpf_from_cbpf)458 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
459 {
460 const struct bpf_prog *prog = ctx->prog;
461 const bool is_main_prog = !bpf_is_subprog(prog);
462 const u8 fp = bpf2a64[BPF_REG_FP];
463 const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
464 const int idx0 = ctx->idx;
465 int cur_offset;
466
467 /*
468 * BPF prog stack layout
469 *
470 * high
471 * original A64_SP => 0:+-----+ BPF prologue
472 * |FP/LR|
473 * current A64_FP => -16:+-----+
474 * | ... | callee saved registers
475 * BPF fp register => -64:+-----+ <= (BPF_FP)
476 * | |
477 * | ... | BPF prog stack
478 * | |
479 * +-----+ <= (BPF_FP - prog->aux->stack_depth)
480 * |RSVD | padding
481 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
482 * | |
483 * | ... | Function call stack
484 * | |
485 * +-----+
486 * low
487 *
488 */
489
490 /* bpf function may be invoked by 3 instruction types:
491 * 1. bl, attached via freplace to bpf prog via short jump
492 * 2. br, attached via freplace to bpf prog via long jump
493 * 3. blr, working as a function pointer, used by emit_call.
494 * So BTI_JC should used here to support both br and blr.
495 */
496 emit_bti(A64_BTI_JC, ctx);
497
498 emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
499 emit(A64_NOP, ctx);
500
501 if (!prog->aux->exception_cb) {
502 /* Sign lr */
503 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
504 emit(A64_PACIASP, ctx);
505
506 /* Save FP and LR registers to stay align with ARM64 AAPCS */
507 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
508 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
509
510 prepare_bpf_tail_call_cnt(ctx);
511
512 if (!ebpf_from_cbpf && is_main_prog) {
513 cur_offset = ctx->idx - idx0;
514 if (cur_offset != PROLOGUE_OFFSET) {
515 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
516 cur_offset, PROLOGUE_OFFSET);
517 return -1;
518 }
519 /* BTI landing pad for the tail call, done with a BR */
520 emit_bti(A64_BTI_J, ctx);
521 }
522 push_callee_regs(ctx);
523 } else {
524 /*
525 * Exception callback receives FP of Main Program as third
526 * parameter
527 */
528 emit(A64_MOV(1, A64_FP, A64_R(2)), ctx);
529 /*
530 * Main Program already pushed the frame record and the
531 * callee-saved registers. The exception callback will not push
532 * anything and re-use the main program's stack.
533 *
534 * 12 registers are on the stack
535 */
536 emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
537 }
538
539 if (ctx->fp_used)
540 /* Set up BPF prog stack base register */
541 emit(A64_MOV(1, fp, A64_SP), ctx);
542
543 /* Stack must be multiples of 16B */
544 ctx->stack_size = round_up(prog->aux->stack_depth, 16);
545
546 /* Set up function call stack */
547 if (ctx->stack_size)
548 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
549
550 if (ctx->arena_vm_start)
551 emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx);
552
553 return 0;
554 }
555
emit_bpf_tail_call(struct jit_ctx * ctx)556 static int emit_bpf_tail_call(struct jit_ctx *ctx)
557 {
558 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
559 const u8 r2 = bpf2a64[BPF_REG_2];
560 const u8 r3 = bpf2a64[BPF_REG_3];
561
562 const u8 tmp = bpf2a64[TMP_REG_1];
563 const u8 prg = bpf2a64[TMP_REG_2];
564 const u8 tcc = bpf2a64[TMP_REG_3];
565 const u8 ptr = bpf2a64[TCCNT_PTR];
566 size_t off;
567 __le32 *branch1 = NULL;
568 __le32 *branch2 = NULL;
569 __le32 *branch3 = NULL;
570
571 /* if (index >= array->map.max_entries)
572 * goto out;
573 */
574 off = offsetof(struct bpf_array, map.max_entries);
575 emit_a64_mov_i64(tmp, off, ctx);
576 emit(A64_LDR32(tmp, r2, tmp), ctx);
577 emit(A64_MOV(0, r3, r3), ctx);
578 emit(A64_CMP(0, r3, tmp), ctx);
579 branch1 = ctx->image + ctx->idx;
580 emit(A64_NOP, ctx);
581
582 /*
583 * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT)
584 * goto out;
585 */
586 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
587 emit(A64_LDR64I(tcc, ptr, 0), ctx);
588 emit(A64_CMP(1, tcc, tmp), ctx);
589 branch2 = ctx->image + ctx->idx;
590 emit(A64_NOP, ctx);
591
592 /* (*tail_call_cnt_ptr)++; */
593 emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
594
595 /* prog = array->ptrs[index];
596 * if (prog == NULL)
597 * goto out;
598 */
599 off = offsetof(struct bpf_array, ptrs);
600 emit_a64_mov_i64(tmp, off, ctx);
601 emit(A64_ADD(1, tmp, r2, tmp), ctx);
602 emit(A64_LSL(1, prg, r3, 3), ctx);
603 emit(A64_LDR64(prg, tmp, prg), ctx);
604 branch3 = ctx->image + ctx->idx;
605 emit(A64_NOP, ctx);
606
607 /* Update tail_call_cnt if the slot is populated. */
608 emit(A64_STR64I(tcc, ptr, 0), ctx);
609
610 /* restore SP */
611 if (ctx->stack_size)
612 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
613
614 pop_callee_regs(ctx);
615
616 /* goto *(prog->bpf_func + prologue_offset); */
617 off = offsetof(struct bpf_prog, bpf_func);
618 emit_a64_mov_i64(tmp, off, ctx);
619 emit(A64_LDR64(tmp, prg, tmp), ctx);
620 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
621 emit(A64_BR(tmp), ctx);
622
623 if (ctx->image) {
624 off = &ctx->image[ctx->idx] - branch1;
625 *branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off));
626
627 off = &ctx->image[ctx->idx] - branch2;
628 *branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off));
629
630 off = &ctx->image[ctx->idx] - branch3;
631 *branch3 = cpu_to_le32(A64_CBZ(1, prg, off));
632 }
633
634 return 0;
635 }
636
637 #ifdef CONFIG_ARM64_LSE_ATOMICS
emit_lse_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)638 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
639 {
640 const u8 code = insn->code;
641 const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
642 const u8 dst = bpf2a64[insn->dst_reg];
643 const u8 src = bpf2a64[insn->src_reg];
644 const u8 tmp = bpf2a64[TMP_REG_1];
645 const u8 tmp2 = bpf2a64[TMP_REG_2];
646 const bool isdw = BPF_SIZE(code) == BPF_DW;
647 const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
648 const s16 off = insn->off;
649 u8 reg = dst;
650
651 if (off || arena) {
652 if (off) {
653 emit_a64_mov_i(1, tmp, off, ctx);
654 emit(A64_ADD(1, tmp, tmp, dst), ctx);
655 reg = tmp;
656 }
657 if (arena) {
658 emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
659 reg = tmp;
660 }
661 }
662
663 switch (insn->imm) {
664 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
665 case BPF_ADD:
666 emit(A64_STADD(isdw, reg, src), ctx);
667 break;
668 case BPF_AND:
669 emit(A64_MVN(isdw, tmp2, src), ctx);
670 emit(A64_STCLR(isdw, reg, tmp2), ctx);
671 break;
672 case BPF_OR:
673 emit(A64_STSET(isdw, reg, src), ctx);
674 break;
675 case BPF_XOR:
676 emit(A64_STEOR(isdw, reg, src), ctx);
677 break;
678 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
679 case BPF_ADD | BPF_FETCH:
680 emit(A64_LDADDAL(isdw, src, reg, src), ctx);
681 break;
682 case BPF_AND | BPF_FETCH:
683 emit(A64_MVN(isdw, tmp2, src), ctx);
684 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
685 break;
686 case BPF_OR | BPF_FETCH:
687 emit(A64_LDSETAL(isdw, src, reg, src), ctx);
688 break;
689 case BPF_XOR | BPF_FETCH:
690 emit(A64_LDEORAL(isdw, src, reg, src), ctx);
691 break;
692 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
693 case BPF_XCHG:
694 emit(A64_SWPAL(isdw, src, reg, src), ctx);
695 break;
696 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
697 case BPF_CMPXCHG:
698 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
699 break;
700 default:
701 pr_err_once("unknown atomic op code %02x\n", insn->imm);
702 return -EINVAL;
703 }
704
705 return 0;
706 }
707 #else
emit_lse_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)708 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
709 {
710 return -EINVAL;
711 }
712 #endif
713
emit_ll_sc_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)714 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
715 {
716 const u8 code = insn->code;
717 const u8 dst = bpf2a64[insn->dst_reg];
718 const u8 src = bpf2a64[insn->src_reg];
719 const u8 tmp = bpf2a64[TMP_REG_1];
720 const u8 tmp2 = bpf2a64[TMP_REG_2];
721 const u8 tmp3 = bpf2a64[TMP_REG_3];
722 const int i = insn - ctx->prog->insnsi;
723 const s32 imm = insn->imm;
724 const s16 off = insn->off;
725 const bool isdw = BPF_SIZE(code) == BPF_DW;
726 u8 reg;
727 s32 jmp_offset;
728
729 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
730 /* ll_sc based atomics don't support unsafe pointers yet. */
731 pr_err_once("unknown atomic opcode %02x\n", code);
732 return -EINVAL;
733 }
734
735 if (!off) {
736 reg = dst;
737 } else {
738 emit_a64_mov_i(1, tmp, off, ctx);
739 emit(A64_ADD(1, tmp, tmp, dst), ctx);
740 reg = tmp;
741 }
742
743 if (imm == BPF_ADD || imm == BPF_AND ||
744 imm == BPF_OR || imm == BPF_XOR) {
745 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
746 emit(A64_LDXR(isdw, tmp2, reg), ctx);
747 if (imm == BPF_ADD)
748 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
749 else if (imm == BPF_AND)
750 emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
751 else if (imm == BPF_OR)
752 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
753 else
754 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
755 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
756 jmp_offset = -3;
757 check_imm19(jmp_offset);
758 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
759 } else if (imm == (BPF_ADD | BPF_FETCH) ||
760 imm == (BPF_AND | BPF_FETCH) ||
761 imm == (BPF_OR | BPF_FETCH) ||
762 imm == (BPF_XOR | BPF_FETCH)) {
763 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
764 const u8 ax = bpf2a64[BPF_REG_AX];
765
766 emit(A64_MOV(isdw, ax, src), ctx);
767 emit(A64_LDXR(isdw, src, reg), ctx);
768 if (imm == (BPF_ADD | BPF_FETCH))
769 emit(A64_ADD(isdw, tmp2, src, ax), ctx);
770 else if (imm == (BPF_AND | BPF_FETCH))
771 emit(A64_AND(isdw, tmp2, src, ax), ctx);
772 else if (imm == (BPF_OR | BPF_FETCH))
773 emit(A64_ORR(isdw, tmp2, src, ax), ctx);
774 else
775 emit(A64_EOR(isdw, tmp2, src, ax), ctx);
776 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
777 jmp_offset = -3;
778 check_imm19(jmp_offset);
779 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
780 emit(A64_DMB_ISH, ctx);
781 } else if (imm == BPF_XCHG) {
782 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
783 emit(A64_MOV(isdw, tmp2, src), ctx);
784 emit(A64_LDXR(isdw, src, reg), ctx);
785 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
786 jmp_offset = -2;
787 check_imm19(jmp_offset);
788 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
789 emit(A64_DMB_ISH, ctx);
790 } else if (imm == BPF_CMPXCHG) {
791 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
792 const u8 r0 = bpf2a64[BPF_REG_0];
793
794 emit(A64_MOV(isdw, tmp2, r0), ctx);
795 emit(A64_LDXR(isdw, r0, reg), ctx);
796 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
797 jmp_offset = 4;
798 check_imm19(jmp_offset);
799 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
800 emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
801 jmp_offset = -4;
802 check_imm19(jmp_offset);
803 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
804 emit(A64_DMB_ISH, ctx);
805 } else {
806 pr_err_once("unknown atomic op code %02x\n", imm);
807 return -EINVAL;
808 }
809
810 return 0;
811 }
812
813 void dummy_tramp(void);
814
815 asm (
816 " .pushsection .text, \"ax\", @progbits\n"
817 " .global dummy_tramp\n"
818 " .type dummy_tramp, %function\n"
819 "dummy_tramp:"
820 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
821 " bti j\n" /* dummy_tramp is called via "br x10" */
822 #endif
823 " mov x10, x30\n"
824 " mov x30, x9\n"
825 " ret x10\n"
826 " .size dummy_tramp, .-dummy_tramp\n"
827 " .popsection\n"
828 );
829
830 /* build a plt initialized like this:
831 *
832 * plt:
833 * ldr tmp, target
834 * br tmp
835 * target:
836 * .quad dummy_tramp
837 *
838 * when a long jump trampoline is attached, target is filled with the
839 * trampoline address, and when the trampoline is removed, target is
840 * restored to dummy_tramp address.
841 */
build_plt(struct jit_ctx * ctx)842 static void build_plt(struct jit_ctx *ctx)
843 {
844 const u8 tmp = bpf2a64[TMP_REG_1];
845 struct bpf_plt *plt = NULL;
846
847 /* make sure target is 64-bit aligned */
848 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
849 emit(A64_NOP, ctx);
850
851 plt = (struct bpf_plt *)(ctx->image + ctx->idx);
852 /* plt is called via bl, no BTI needed here */
853 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
854 emit(A64_BR(tmp), ctx);
855
856 if (ctx->image)
857 plt->target = (u64)&dummy_tramp;
858 }
859
build_epilogue(struct jit_ctx * ctx)860 static void build_epilogue(struct jit_ctx *ctx)
861 {
862 const u8 r0 = bpf2a64[BPF_REG_0];
863 const u8 ptr = bpf2a64[TCCNT_PTR];
864
865 /* We're done with BPF stack */
866 if (ctx->stack_size)
867 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
868
869 pop_callee_regs(ctx);
870
871 emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
872
873 /* Restore FP/LR registers */
874 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
875
876 /* Set return value */
877 emit(A64_MOV(1, A64_R(0), r0), ctx);
878
879 /* Authenticate lr */
880 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
881 emit(A64_AUTIASP, ctx);
882
883 emit(A64_RET(A64_LR), ctx);
884 }
885
886 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
887 #define BPF_FIXUP_REG_MASK GENMASK(31, 27)
888 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
889
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)890 bool ex_handler_bpf(const struct exception_table_entry *ex,
891 struct pt_regs *regs)
892 {
893 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
894 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
895
896 if (dst_reg != DONT_CLEAR)
897 regs->regs[dst_reg] = 0;
898 regs->pc = (unsigned long)&ex->fixup - offset;
899 return true;
900 }
901
902 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)903 static int add_exception_handler(const struct bpf_insn *insn,
904 struct jit_ctx *ctx,
905 int dst_reg)
906 {
907 off_t ins_offset;
908 off_t fixup_offset;
909 unsigned long pc;
910 struct exception_table_entry *ex;
911
912 if (!ctx->image)
913 /* First pass */
914 return 0;
915
916 if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
917 BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
918 BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
919 BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
920 return 0;
921
922 if (!ctx->prog->aux->extable ||
923 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
924 return -EINVAL;
925
926 ex = &ctx->prog->aux->extable[ctx->exentry_idx];
927 pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
928
929 /*
930 * This is the relative offset of the instruction that may fault from
931 * the exception table itself. This will be written to the exception
932 * table and if this instruction faults, the destination register will
933 * be set to '0' and the execution will jump to the next instruction.
934 */
935 ins_offset = pc - (long)&ex->insn;
936 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
937 return -ERANGE;
938
939 /*
940 * Since the extable follows the program, the fixup offset is always
941 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
942 * to keep things simple, and put the destination register in the upper
943 * bits. We don't need to worry about buildtime or runtime sort
944 * modifying the upper bits because the table is already sorted, and
945 * isn't part of the main exception table.
946 *
947 * The fixup_offset is set to the next instruction from the instruction
948 * that may fault. The execution will jump to this after handling the
949 * fault.
950 */
951 fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
952 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
953 return -ERANGE;
954
955 /*
956 * The offsets above have been calculated using the RO buffer but we
957 * need to use the R/W buffer for writes.
958 * switch ex to rw buffer for writing.
959 */
960 ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
961
962 ex->insn = ins_offset;
963
964 if (BPF_CLASS(insn->code) != BPF_LDX)
965 dst_reg = DONT_CLEAR;
966
967 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
968 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
969
970 ex->type = EX_TYPE_BPF;
971
972 ctx->exentry_idx++;
973 return 0;
974 }
975
976 /* JITs an eBPF instruction.
977 * Returns:
978 * 0 - successfully JITed an 8-byte eBPF instruction.
979 * >0 - successfully JITed a 16-byte eBPF instruction.
980 * <0 - failed to JIT.
981 */
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)982 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
983 bool extra_pass)
984 {
985 const u8 code = insn->code;
986 u8 dst = bpf2a64[insn->dst_reg];
987 u8 src = bpf2a64[insn->src_reg];
988 const u8 tmp = bpf2a64[TMP_REG_1];
989 const u8 tmp2 = bpf2a64[TMP_REG_2];
990 const u8 fp = bpf2a64[BPF_REG_FP];
991 const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
992 const s16 off = insn->off;
993 const s32 imm = insn->imm;
994 const int i = insn - ctx->prog->insnsi;
995 const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
996 BPF_CLASS(code) == BPF_JMP;
997 u8 jmp_cond;
998 s32 jmp_offset;
999 u32 a64_insn;
1000 u8 src_adj;
1001 u8 dst_adj;
1002 int off_adj;
1003 int ret;
1004 bool sign_extend;
1005
1006 switch (code) {
1007 /* dst = src */
1008 case BPF_ALU | BPF_MOV | BPF_X:
1009 case BPF_ALU64 | BPF_MOV | BPF_X:
1010 if (insn_is_cast_user(insn)) {
1011 emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
1012 emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
1013 emit(A64_LSL(1, dst, dst, 32), ctx);
1014 emit(A64_CBZ(1, tmp, 2), ctx);
1015 emit(A64_ORR(1, tmp, dst, tmp), ctx);
1016 emit(A64_MOV(1, dst, tmp), ctx);
1017 break;
1018 } else if (insn_is_mov_percpu_addr(insn)) {
1019 if (dst != src)
1020 emit(A64_MOV(1, dst, src), ctx);
1021 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
1022 emit(A64_MRS_TPIDR_EL2(tmp), ctx);
1023 else
1024 emit(A64_MRS_TPIDR_EL1(tmp), ctx);
1025 emit(A64_ADD(1, dst, dst, tmp), ctx);
1026 break;
1027 }
1028 switch (insn->off) {
1029 case 0:
1030 emit(A64_MOV(is64, dst, src), ctx);
1031 break;
1032 case 8:
1033 emit(A64_SXTB(is64, dst, src), ctx);
1034 break;
1035 case 16:
1036 emit(A64_SXTH(is64, dst, src), ctx);
1037 break;
1038 case 32:
1039 emit(A64_SXTW(is64, dst, src), ctx);
1040 break;
1041 }
1042 break;
1043 /* dst = dst OP src */
1044 case BPF_ALU | BPF_ADD | BPF_X:
1045 case BPF_ALU64 | BPF_ADD | BPF_X:
1046 emit(A64_ADD(is64, dst, dst, src), ctx);
1047 break;
1048 case BPF_ALU | BPF_SUB | BPF_X:
1049 case BPF_ALU64 | BPF_SUB | BPF_X:
1050 emit(A64_SUB(is64, dst, dst, src), ctx);
1051 break;
1052 case BPF_ALU | BPF_AND | BPF_X:
1053 case BPF_ALU64 | BPF_AND | BPF_X:
1054 emit(A64_AND(is64, dst, dst, src), ctx);
1055 break;
1056 case BPF_ALU | BPF_OR | BPF_X:
1057 case BPF_ALU64 | BPF_OR | BPF_X:
1058 emit(A64_ORR(is64, dst, dst, src), ctx);
1059 break;
1060 case BPF_ALU | BPF_XOR | BPF_X:
1061 case BPF_ALU64 | BPF_XOR | BPF_X:
1062 emit(A64_EOR(is64, dst, dst, src), ctx);
1063 break;
1064 case BPF_ALU | BPF_MUL | BPF_X:
1065 case BPF_ALU64 | BPF_MUL | BPF_X:
1066 emit(A64_MUL(is64, dst, dst, src), ctx);
1067 break;
1068 case BPF_ALU | BPF_DIV | BPF_X:
1069 case BPF_ALU64 | BPF_DIV | BPF_X:
1070 if (!off)
1071 emit(A64_UDIV(is64, dst, dst, src), ctx);
1072 else
1073 emit(A64_SDIV(is64, dst, dst, src), ctx);
1074 break;
1075 case BPF_ALU | BPF_MOD | BPF_X:
1076 case BPF_ALU64 | BPF_MOD | BPF_X:
1077 if (!off)
1078 emit(A64_UDIV(is64, tmp, dst, src), ctx);
1079 else
1080 emit(A64_SDIV(is64, tmp, dst, src), ctx);
1081 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
1082 break;
1083 case BPF_ALU | BPF_LSH | BPF_X:
1084 case BPF_ALU64 | BPF_LSH | BPF_X:
1085 emit(A64_LSLV(is64, dst, dst, src), ctx);
1086 break;
1087 case BPF_ALU | BPF_RSH | BPF_X:
1088 case BPF_ALU64 | BPF_RSH | BPF_X:
1089 emit(A64_LSRV(is64, dst, dst, src), ctx);
1090 break;
1091 case BPF_ALU | BPF_ARSH | BPF_X:
1092 case BPF_ALU64 | BPF_ARSH | BPF_X:
1093 emit(A64_ASRV(is64, dst, dst, src), ctx);
1094 break;
1095 /* dst = -dst */
1096 case BPF_ALU | BPF_NEG:
1097 case BPF_ALU64 | BPF_NEG:
1098 emit(A64_NEG(is64, dst, dst), ctx);
1099 break;
1100 /* dst = BSWAP##imm(dst) */
1101 case BPF_ALU | BPF_END | BPF_FROM_LE:
1102 case BPF_ALU | BPF_END | BPF_FROM_BE:
1103 case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1104 #ifdef CONFIG_CPU_BIG_ENDIAN
1105 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE)
1106 goto emit_bswap_uxt;
1107 #else /* !CONFIG_CPU_BIG_ENDIAN */
1108 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE)
1109 goto emit_bswap_uxt;
1110 #endif
1111 switch (imm) {
1112 case 16:
1113 emit(A64_REV16(is64, dst, dst), ctx);
1114 /* zero-extend 16 bits into 64 bits */
1115 emit(A64_UXTH(is64, dst, dst), ctx);
1116 break;
1117 case 32:
1118 emit(A64_REV32(0, dst, dst), ctx);
1119 /* upper 32 bits already cleared */
1120 break;
1121 case 64:
1122 emit(A64_REV64(dst, dst), ctx);
1123 break;
1124 }
1125 break;
1126 emit_bswap_uxt:
1127 switch (imm) {
1128 case 16:
1129 /* zero-extend 16 bits into 64 bits */
1130 emit(A64_UXTH(is64, dst, dst), ctx);
1131 break;
1132 case 32:
1133 /* zero-extend 32 bits into 64 bits */
1134 emit(A64_UXTW(is64, dst, dst), ctx);
1135 break;
1136 case 64:
1137 /* nop */
1138 break;
1139 }
1140 break;
1141 /* dst = imm */
1142 case BPF_ALU | BPF_MOV | BPF_K:
1143 case BPF_ALU64 | BPF_MOV | BPF_K:
1144 emit_a64_mov_i(is64, dst, imm, ctx);
1145 break;
1146 /* dst = dst OP imm */
1147 case BPF_ALU | BPF_ADD | BPF_K:
1148 case BPF_ALU64 | BPF_ADD | BPF_K:
1149 if (is_addsub_imm(imm)) {
1150 emit(A64_ADD_I(is64, dst, dst, imm), ctx);
1151 } else if (is_addsub_imm(-imm)) {
1152 emit(A64_SUB_I(is64, dst, dst, -imm), ctx);
1153 } else {
1154 emit_a64_mov_i(is64, tmp, imm, ctx);
1155 emit(A64_ADD(is64, dst, dst, tmp), ctx);
1156 }
1157 break;
1158 case BPF_ALU | BPF_SUB | BPF_K:
1159 case BPF_ALU64 | BPF_SUB | BPF_K:
1160 if (is_addsub_imm(imm)) {
1161 emit(A64_SUB_I(is64, dst, dst, imm), ctx);
1162 } else if (is_addsub_imm(-imm)) {
1163 emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
1164 } else {
1165 emit_a64_mov_i(is64, tmp, imm, ctx);
1166 emit(A64_SUB(is64, dst, dst, tmp), ctx);
1167 }
1168 break;
1169 case BPF_ALU | BPF_AND | BPF_K:
1170 case BPF_ALU64 | BPF_AND | BPF_K:
1171 a64_insn = A64_AND_I(is64, dst, dst, imm);
1172 if (a64_insn != AARCH64_BREAK_FAULT) {
1173 emit(a64_insn, ctx);
1174 } else {
1175 emit_a64_mov_i(is64, tmp, imm, ctx);
1176 emit(A64_AND(is64, dst, dst, tmp), ctx);
1177 }
1178 break;
1179 case BPF_ALU | BPF_OR | BPF_K:
1180 case BPF_ALU64 | BPF_OR | BPF_K:
1181 a64_insn = A64_ORR_I(is64, dst, dst, imm);
1182 if (a64_insn != AARCH64_BREAK_FAULT) {
1183 emit(a64_insn, ctx);
1184 } else {
1185 emit_a64_mov_i(is64, tmp, imm, ctx);
1186 emit(A64_ORR(is64, dst, dst, tmp), ctx);
1187 }
1188 break;
1189 case BPF_ALU | BPF_XOR | BPF_K:
1190 case BPF_ALU64 | BPF_XOR | BPF_K:
1191 a64_insn = A64_EOR_I(is64, dst, dst, imm);
1192 if (a64_insn != AARCH64_BREAK_FAULT) {
1193 emit(a64_insn, ctx);
1194 } else {
1195 emit_a64_mov_i(is64, tmp, imm, ctx);
1196 emit(A64_EOR(is64, dst, dst, tmp), ctx);
1197 }
1198 break;
1199 case BPF_ALU | BPF_MUL | BPF_K:
1200 case BPF_ALU64 | BPF_MUL | BPF_K:
1201 emit_a64_mov_i(is64, tmp, imm, ctx);
1202 emit(A64_MUL(is64, dst, dst, tmp), ctx);
1203 break;
1204 case BPF_ALU | BPF_DIV | BPF_K:
1205 case BPF_ALU64 | BPF_DIV | BPF_K:
1206 emit_a64_mov_i(is64, tmp, imm, ctx);
1207 if (!off)
1208 emit(A64_UDIV(is64, dst, dst, tmp), ctx);
1209 else
1210 emit(A64_SDIV(is64, dst, dst, tmp), ctx);
1211 break;
1212 case BPF_ALU | BPF_MOD | BPF_K:
1213 case BPF_ALU64 | BPF_MOD | BPF_K:
1214 emit_a64_mov_i(is64, tmp2, imm, ctx);
1215 if (!off)
1216 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
1217 else
1218 emit(A64_SDIV(is64, tmp, dst, tmp2), ctx);
1219 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
1220 break;
1221 case BPF_ALU | BPF_LSH | BPF_K:
1222 case BPF_ALU64 | BPF_LSH | BPF_K:
1223 emit(A64_LSL(is64, dst, dst, imm), ctx);
1224 break;
1225 case BPF_ALU | BPF_RSH | BPF_K:
1226 case BPF_ALU64 | BPF_RSH | BPF_K:
1227 emit(A64_LSR(is64, dst, dst, imm), ctx);
1228 break;
1229 case BPF_ALU | BPF_ARSH | BPF_K:
1230 case BPF_ALU64 | BPF_ARSH | BPF_K:
1231 emit(A64_ASR(is64, dst, dst, imm), ctx);
1232 break;
1233
1234 /* JUMP off */
1235 case BPF_JMP | BPF_JA:
1236 case BPF_JMP32 | BPF_JA:
1237 if (BPF_CLASS(code) == BPF_JMP)
1238 jmp_offset = bpf2a64_offset(i, off, ctx);
1239 else
1240 jmp_offset = bpf2a64_offset(i, imm, ctx);
1241 check_imm26(jmp_offset);
1242 emit(A64_B(jmp_offset), ctx);
1243 break;
1244 /* IF (dst COND src) JUMP off */
1245 case BPF_JMP | BPF_JEQ | BPF_X:
1246 case BPF_JMP | BPF_JGT | BPF_X:
1247 case BPF_JMP | BPF_JLT | BPF_X:
1248 case BPF_JMP | BPF_JGE | BPF_X:
1249 case BPF_JMP | BPF_JLE | BPF_X:
1250 case BPF_JMP | BPF_JNE | BPF_X:
1251 case BPF_JMP | BPF_JSGT | BPF_X:
1252 case BPF_JMP | BPF_JSLT | BPF_X:
1253 case BPF_JMP | BPF_JSGE | BPF_X:
1254 case BPF_JMP | BPF_JSLE | BPF_X:
1255 case BPF_JMP32 | BPF_JEQ | BPF_X:
1256 case BPF_JMP32 | BPF_JGT | BPF_X:
1257 case BPF_JMP32 | BPF_JLT | BPF_X:
1258 case BPF_JMP32 | BPF_JGE | BPF_X:
1259 case BPF_JMP32 | BPF_JLE | BPF_X:
1260 case BPF_JMP32 | BPF_JNE | BPF_X:
1261 case BPF_JMP32 | BPF_JSGT | BPF_X:
1262 case BPF_JMP32 | BPF_JSLT | BPF_X:
1263 case BPF_JMP32 | BPF_JSGE | BPF_X:
1264 case BPF_JMP32 | BPF_JSLE | BPF_X:
1265 emit(A64_CMP(is64, dst, src), ctx);
1266 emit_cond_jmp:
1267 jmp_offset = bpf2a64_offset(i, off, ctx);
1268 check_imm19(jmp_offset);
1269 switch (BPF_OP(code)) {
1270 case BPF_JEQ:
1271 jmp_cond = A64_COND_EQ;
1272 break;
1273 case BPF_JGT:
1274 jmp_cond = A64_COND_HI;
1275 break;
1276 case BPF_JLT:
1277 jmp_cond = A64_COND_CC;
1278 break;
1279 case BPF_JGE:
1280 jmp_cond = A64_COND_CS;
1281 break;
1282 case BPF_JLE:
1283 jmp_cond = A64_COND_LS;
1284 break;
1285 case BPF_JSET:
1286 case BPF_JNE:
1287 jmp_cond = A64_COND_NE;
1288 break;
1289 case BPF_JSGT:
1290 jmp_cond = A64_COND_GT;
1291 break;
1292 case BPF_JSLT:
1293 jmp_cond = A64_COND_LT;
1294 break;
1295 case BPF_JSGE:
1296 jmp_cond = A64_COND_GE;
1297 break;
1298 case BPF_JSLE:
1299 jmp_cond = A64_COND_LE;
1300 break;
1301 default:
1302 return -EFAULT;
1303 }
1304 emit(A64_B_(jmp_cond, jmp_offset), ctx);
1305 break;
1306 case BPF_JMP | BPF_JSET | BPF_X:
1307 case BPF_JMP32 | BPF_JSET | BPF_X:
1308 emit(A64_TST(is64, dst, src), ctx);
1309 goto emit_cond_jmp;
1310 /* IF (dst COND imm) JUMP off */
1311 case BPF_JMP | BPF_JEQ | BPF_K:
1312 case BPF_JMP | BPF_JGT | BPF_K:
1313 case BPF_JMP | BPF_JLT | BPF_K:
1314 case BPF_JMP | BPF_JGE | BPF_K:
1315 case BPF_JMP | BPF_JLE | BPF_K:
1316 case BPF_JMP | BPF_JNE | BPF_K:
1317 case BPF_JMP | BPF_JSGT | BPF_K:
1318 case BPF_JMP | BPF_JSLT | BPF_K:
1319 case BPF_JMP | BPF_JSGE | BPF_K:
1320 case BPF_JMP | BPF_JSLE | BPF_K:
1321 case BPF_JMP32 | BPF_JEQ | BPF_K:
1322 case BPF_JMP32 | BPF_JGT | BPF_K:
1323 case BPF_JMP32 | BPF_JLT | BPF_K:
1324 case BPF_JMP32 | BPF_JGE | BPF_K:
1325 case BPF_JMP32 | BPF_JLE | BPF_K:
1326 case BPF_JMP32 | BPF_JNE | BPF_K:
1327 case BPF_JMP32 | BPF_JSGT | BPF_K:
1328 case BPF_JMP32 | BPF_JSLT | BPF_K:
1329 case BPF_JMP32 | BPF_JSGE | BPF_K:
1330 case BPF_JMP32 | BPF_JSLE | BPF_K:
1331 if (is_addsub_imm(imm)) {
1332 emit(A64_CMP_I(is64, dst, imm), ctx);
1333 } else if (is_addsub_imm(-imm)) {
1334 emit(A64_CMN_I(is64, dst, -imm), ctx);
1335 } else {
1336 emit_a64_mov_i(is64, tmp, imm, ctx);
1337 emit(A64_CMP(is64, dst, tmp), ctx);
1338 }
1339 goto emit_cond_jmp;
1340 case BPF_JMP | BPF_JSET | BPF_K:
1341 case BPF_JMP32 | BPF_JSET | BPF_K:
1342 a64_insn = A64_TST_I(is64, dst, imm);
1343 if (a64_insn != AARCH64_BREAK_FAULT) {
1344 emit(a64_insn, ctx);
1345 } else {
1346 emit_a64_mov_i(is64, tmp, imm, ctx);
1347 emit(A64_TST(is64, dst, tmp), ctx);
1348 }
1349 goto emit_cond_jmp;
1350 /* function call */
1351 case BPF_JMP | BPF_CALL:
1352 {
1353 const u8 r0 = bpf2a64[BPF_REG_0];
1354 bool func_addr_fixed;
1355 u64 func_addr;
1356 u32 cpu_offset;
1357
1358 /* Implement helper call to bpf_get_smp_processor_id() inline */
1359 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
1360 cpu_offset = offsetof(struct thread_info, cpu);
1361
1362 emit(A64_MRS_SP_EL0(tmp), ctx);
1363 if (is_lsi_offset(cpu_offset, 2)) {
1364 emit(A64_LDR32I(r0, tmp, cpu_offset), ctx);
1365 } else {
1366 emit_a64_mov_i(1, tmp2, cpu_offset, ctx);
1367 emit(A64_LDR32(r0, tmp, tmp2), ctx);
1368 }
1369 break;
1370 }
1371
1372 /* Implement helper call to bpf_get_current_task/_btf() inline */
1373 if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task ||
1374 insn->imm == BPF_FUNC_get_current_task_btf)) {
1375 emit(A64_MRS_SP_EL0(r0), ctx);
1376 break;
1377 }
1378
1379 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1380 &func_addr, &func_addr_fixed);
1381 if (ret < 0)
1382 return ret;
1383 emit_call(func_addr, ctx);
1384 emit(A64_MOV(1, r0, A64_R(0)), ctx);
1385 break;
1386 }
1387 /* tail call */
1388 case BPF_JMP | BPF_TAIL_CALL:
1389 if (emit_bpf_tail_call(ctx))
1390 return -EFAULT;
1391 break;
1392 /* function return */
1393 case BPF_JMP | BPF_EXIT:
1394 /* Optimization: when last instruction is EXIT,
1395 simply fallthrough to epilogue. */
1396 if (i == ctx->prog->len - 1)
1397 break;
1398 jmp_offset = epilogue_offset(ctx);
1399 check_imm26(jmp_offset);
1400 emit(A64_B(jmp_offset), ctx);
1401 break;
1402
1403 /* dst = imm64 */
1404 case BPF_LD | BPF_IMM | BPF_DW:
1405 {
1406 const struct bpf_insn insn1 = insn[1];
1407 u64 imm64;
1408
1409 imm64 = (u64)insn1.imm << 32 | (u32)imm;
1410 if (bpf_pseudo_func(insn))
1411 emit_addr_mov_i64(dst, imm64, ctx);
1412 else
1413 emit_a64_mov_i64(dst, imm64, ctx);
1414
1415 return 1;
1416 }
1417
1418 /* LDX: dst = (u64)*(unsigned size *)(src + off) */
1419 case BPF_LDX | BPF_MEM | BPF_W:
1420 case BPF_LDX | BPF_MEM | BPF_H:
1421 case BPF_LDX | BPF_MEM | BPF_B:
1422 case BPF_LDX | BPF_MEM | BPF_DW:
1423 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1424 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1425 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1426 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1427 /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */
1428 case BPF_LDX | BPF_MEMSX | BPF_B:
1429 case BPF_LDX | BPF_MEMSX | BPF_H:
1430 case BPF_LDX | BPF_MEMSX | BPF_W:
1431 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1432 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1433 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1434 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1435 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1436 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1437 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1438 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1439 emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
1440 src = tmp2;
1441 }
1442 if (src == fp) {
1443 src_adj = A64_SP;
1444 off_adj = off + ctx->stack_size;
1445 } else {
1446 src_adj = src;
1447 off_adj = off;
1448 }
1449 sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
1450 BPF_MODE(insn->code) == BPF_PROBE_MEMSX);
1451 switch (BPF_SIZE(code)) {
1452 case BPF_W:
1453 if (is_lsi_offset(off_adj, 2)) {
1454 if (sign_extend)
1455 emit(A64_LDRSWI(dst, src_adj, off_adj), ctx);
1456 else
1457 emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
1458 } else {
1459 emit_a64_mov_i(1, tmp, off, ctx);
1460 if (sign_extend)
1461 emit(A64_LDRSW(dst, src, tmp), ctx);
1462 else
1463 emit(A64_LDR32(dst, src, tmp), ctx);
1464 }
1465 break;
1466 case BPF_H:
1467 if (is_lsi_offset(off_adj, 1)) {
1468 if (sign_extend)
1469 emit(A64_LDRSHI(dst, src_adj, off_adj), ctx);
1470 else
1471 emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
1472 } else {
1473 emit_a64_mov_i(1, tmp, off, ctx);
1474 if (sign_extend)
1475 emit(A64_LDRSH(dst, src, tmp), ctx);
1476 else
1477 emit(A64_LDRH(dst, src, tmp), ctx);
1478 }
1479 break;
1480 case BPF_B:
1481 if (is_lsi_offset(off_adj, 0)) {
1482 if (sign_extend)
1483 emit(A64_LDRSBI(dst, src_adj, off_adj), ctx);
1484 else
1485 emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
1486 } else {
1487 emit_a64_mov_i(1, tmp, off, ctx);
1488 if (sign_extend)
1489 emit(A64_LDRSB(dst, src, tmp), ctx);
1490 else
1491 emit(A64_LDRB(dst, src, tmp), ctx);
1492 }
1493 break;
1494 case BPF_DW:
1495 if (is_lsi_offset(off_adj, 3)) {
1496 emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
1497 } else {
1498 emit_a64_mov_i(1, tmp, off, ctx);
1499 emit(A64_LDR64(dst, src, tmp), ctx);
1500 }
1501 break;
1502 }
1503
1504 ret = add_exception_handler(insn, ctx, dst);
1505 if (ret)
1506 return ret;
1507 break;
1508
1509 /* speculation barrier */
1510 case BPF_ST | BPF_NOSPEC:
1511 /*
1512 * Nothing required here.
1513 *
1514 * In case of arm64, we rely on the firmware mitigation of
1515 * Speculative Store Bypass as controlled via the ssbd kernel
1516 * parameter. Whenever the mitigation is enabled, it works
1517 * for all of the kernel code with no need to provide any
1518 * additional instructions.
1519 */
1520 break;
1521
1522 /* ST: *(size *)(dst + off) = imm */
1523 case BPF_ST | BPF_MEM | BPF_W:
1524 case BPF_ST | BPF_MEM | BPF_H:
1525 case BPF_ST | BPF_MEM | BPF_B:
1526 case BPF_ST | BPF_MEM | BPF_DW:
1527 case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1528 case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1529 case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1530 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1531 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1532 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
1533 dst = tmp2;
1534 }
1535 if (dst == fp) {
1536 dst_adj = A64_SP;
1537 off_adj = off + ctx->stack_size;
1538 } else {
1539 dst_adj = dst;
1540 off_adj = off;
1541 }
1542 /* Load imm to a register then store it */
1543 emit_a64_mov_i(1, tmp, imm, ctx);
1544 switch (BPF_SIZE(code)) {
1545 case BPF_W:
1546 if (is_lsi_offset(off_adj, 2)) {
1547 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
1548 } else {
1549 emit_a64_mov_i(1, tmp2, off, ctx);
1550 emit(A64_STR32(tmp, dst, tmp2), ctx);
1551 }
1552 break;
1553 case BPF_H:
1554 if (is_lsi_offset(off_adj, 1)) {
1555 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
1556 } else {
1557 emit_a64_mov_i(1, tmp2, off, ctx);
1558 emit(A64_STRH(tmp, dst, tmp2), ctx);
1559 }
1560 break;
1561 case BPF_B:
1562 if (is_lsi_offset(off_adj, 0)) {
1563 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
1564 } else {
1565 emit_a64_mov_i(1, tmp2, off, ctx);
1566 emit(A64_STRB(tmp, dst, tmp2), ctx);
1567 }
1568 break;
1569 case BPF_DW:
1570 if (is_lsi_offset(off_adj, 3)) {
1571 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
1572 } else {
1573 emit_a64_mov_i(1, tmp2, off, ctx);
1574 emit(A64_STR64(tmp, dst, tmp2), ctx);
1575 }
1576 break;
1577 }
1578
1579 ret = add_exception_handler(insn, ctx, dst);
1580 if (ret)
1581 return ret;
1582 break;
1583
1584 /* STX: *(size *)(dst + off) = src */
1585 case BPF_STX | BPF_MEM | BPF_W:
1586 case BPF_STX | BPF_MEM | BPF_H:
1587 case BPF_STX | BPF_MEM | BPF_B:
1588 case BPF_STX | BPF_MEM | BPF_DW:
1589 case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1590 case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1591 case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1592 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1593 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1594 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
1595 dst = tmp2;
1596 }
1597 if (dst == fp) {
1598 dst_adj = A64_SP;
1599 off_adj = off + ctx->stack_size;
1600 } else {
1601 dst_adj = dst;
1602 off_adj = off;
1603 }
1604 switch (BPF_SIZE(code)) {
1605 case BPF_W:
1606 if (is_lsi_offset(off_adj, 2)) {
1607 emit(A64_STR32I(src, dst_adj, off_adj), ctx);
1608 } else {
1609 emit_a64_mov_i(1, tmp, off, ctx);
1610 emit(A64_STR32(src, dst, tmp), ctx);
1611 }
1612 break;
1613 case BPF_H:
1614 if (is_lsi_offset(off_adj, 1)) {
1615 emit(A64_STRHI(src, dst_adj, off_adj), ctx);
1616 } else {
1617 emit_a64_mov_i(1, tmp, off, ctx);
1618 emit(A64_STRH(src, dst, tmp), ctx);
1619 }
1620 break;
1621 case BPF_B:
1622 if (is_lsi_offset(off_adj, 0)) {
1623 emit(A64_STRBI(src, dst_adj, off_adj), ctx);
1624 } else {
1625 emit_a64_mov_i(1, tmp, off, ctx);
1626 emit(A64_STRB(src, dst, tmp), ctx);
1627 }
1628 break;
1629 case BPF_DW:
1630 if (is_lsi_offset(off_adj, 3)) {
1631 emit(A64_STR64I(src, dst_adj, off_adj), ctx);
1632 } else {
1633 emit_a64_mov_i(1, tmp, off, ctx);
1634 emit(A64_STR64(src, dst, tmp), ctx);
1635 }
1636 break;
1637 }
1638
1639 ret = add_exception_handler(insn, ctx, dst);
1640 if (ret)
1641 return ret;
1642 break;
1643
1644 case BPF_STX | BPF_ATOMIC | BPF_W:
1645 case BPF_STX | BPF_ATOMIC | BPF_DW:
1646 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
1647 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1648 if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1649 ret = emit_lse_atomic(insn, ctx);
1650 else
1651 ret = emit_ll_sc_atomic(insn, ctx);
1652 if (ret)
1653 return ret;
1654
1655 ret = add_exception_handler(insn, ctx, dst);
1656 if (ret)
1657 return ret;
1658 break;
1659
1660 default:
1661 pr_err_once("unknown opcode %02x\n", code);
1662 return -EINVAL;
1663 }
1664
1665 return 0;
1666 }
1667
build_body(struct jit_ctx * ctx,bool extra_pass)1668 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1669 {
1670 const struct bpf_prog *prog = ctx->prog;
1671 int i;
1672
1673 /*
1674 * - offset[0] offset of the end of prologue,
1675 * start of the 1st instruction.
1676 * - offset[1] - offset of the end of 1st instruction,
1677 * start of the 2nd instruction
1678 * [....]
1679 * - offset[3] - offset of the end of 3rd instruction,
1680 * start of 4th instruction
1681 */
1682 for (i = 0; i < prog->len; i++) {
1683 const struct bpf_insn *insn = &prog->insnsi[i];
1684 int ret;
1685
1686 ctx->offset[i] = ctx->idx;
1687 ret = build_insn(insn, ctx, extra_pass);
1688 if (ret > 0) {
1689 i++;
1690 ctx->offset[i] = ctx->idx;
1691 continue;
1692 }
1693 if (ret)
1694 return ret;
1695 }
1696 /*
1697 * offset is allocated with prog->len + 1 so fill in
1698 * the last element with the offset after the last
1699 * instruction (end of program)
1700 */
1701 ctx->offset[i] = ctx->idx;
1702
1703 return 0;
1704 }
1705
validate_code(struct jit_ctx * ctx)1706 static int validate_code(struct jit_ctx *ctx)
1707 {
1708 int i;
1709
1710 for (i = 0; i < ctx->idx; i++) {
1711 u32 a64_insn = le32_to_cpu(ctx->image[i]);
1712
1713 if (a64_insn == AARCH64_BREAK_FAULT)
1714 return -1;
1715 }
1716 return 0;
1717 }
1718
validate_ctx(struct jit_ctx * ctx)1719 static int validate_ctx(struct jit_ctx *ctx)
1720 {
1721 if (validate_code(ctx))
1722 return -1;
1723
1724 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
1725 return -1;
1726
1727 return 0;
1728 }
1729
bpf_flush_icache(void * start,void * end)1730 static inline void bpf_flush_icache(void *start, void *end)
1731 {
1732 flush_icache_range((unsigned long)start, (unsigned long)end);
1733 }
1734
1735 struct arm64_jit_data {
1736 struct bpf_binary_header *header;
1737 u8 *ro_image;
1738 struct bpf_binary_header *ro_header;
1739 struct jit_ctx ctx;
1740 };
1741
bpf_int_jit_compile(struct bpf_prog * prog)1742 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1743 {
1744 int image_size, prog_size, extable_size, extable_align, extable_offset;
1745 struct bpf_prog *tmp, *orig_prog = prog;
1746 struct bpf_binary_header *header;
1747 struct bpf_binary_header *ro_header;
1748 struct arm64_jit_data *jit_data;
1749 bool was_classic = bpf_prog_was_classic(prog);
1750 bool tmp_blinded = false;
1751 bool extra_pass = false;
1752 struct jit_ctx ctx;
1753 u8 *image_ptr;
1754 u8 *ro_image_ptr;
1755 int body_idx;
1756 int exentry_idx;
1757
1758 if (!prog->jit_requested)
1759 return orig_prog;
1760
1761 tmp = bpf_jit_blind_constants(prog);
1762 /* If blinding was requested and we failed during blinding,
1763 * we must fall back to the interpreter.
1764 */
1765 if (IS_ERR(tmp))
1766 return orig_prog;
1767 if (tmp != prog) {
1768 tmp_blinded = true;
1769 prog = tmp;
1770 }
1771
1772 jit_data = prog->aux->jit_data;
1773 if (!jit_data) {
1774 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1775 if (!jit_data) {
1776 prog = orig_prog;
1777 goto out;
1778 }
1779 prog->aux->jit_data = jit_data;
1780 }
1781 if (jit_data->ctx.offset) {
1782 ctx = jit_data->ctx;
1783 ro_image_ptr = jit_data->ro_image;
1784 ro_header = jit_data->ro_header;
1785 header = jit_data->header;
1786 image_ptr = (void *)header + ((void *)ro_image_ptr
1787 - (void *)ro_header);
1788 extra_pass = true;
1789 prog_size = sizeof(u32) * ctx.idx;
1790 goto skip_init_ctx;
1791 }
1792 memset(&ctx, 0, sizeof(ctx));
1793 ctx.prog = prog;
1794
1795 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
1796 if (ctx.offset == NULL) {
1797 prog = orig_prog;
1798 goto out_off;
1799 }
1800
1801 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
1802 ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
1803
1804 /* Pass 1: Estimate the maximum image size.
1805 *
1806 * BPF line info needs ctx->offset[i] to be the offset of
1807 * instruction[i] in jited image, so build prologue first.
1808 */
1809 if (build_prologue(&ctx, was_classic)) {
1810 prog = orig_prog;
1811 goto out_off;
1812 }
1813
1814 if (build_body(&ctx, extra_pass)) {
1815 prog = orig_prog;
1816 goto out_off;
1817 }
1818
1819 ctx.epilogue_offset = ctx.idx;
1820 build_epilogue(&ctx);
1821 build_plt(&ctx);
1822
1823 extable_align = __alignof__(struct exception_table_entry);
1824 extable_size = prog->aux->num_exentries *
1825 sizeof(struct exception_table_entry);
1826
1827 /* Now we know the maximum image size. */
1828 prog_size = sizeof(u32) * ctx.idx;
1829 /* also allocate space for plt target */
1830 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
1831 image_size = extable_offset + extable_size;
1832 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
1833 sizeof(u32), &header, &image_ptr,
1834 jit_fill_hole);
1835 if (!ro_header) {
1836 prog = orig_prog;
1837 goto out_off;
1838 }
1839
1840 /* Pass 2: Determine jited position and result for each instruction */
1841
1842 /*
1843 * Use the image(RW) for writing the JITed instructions. But also save
1844 * the ro_image(RX) for calculating the offsets in the image. The RW
1845 * image will be later copied to the RX image from where the program
1846 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
1847 * final step.
1848 */
1849 ctx.image = (__le32 *)image_ptr;
1850 ctx.ro_image = (__le32 *)ro_image_ptr;
1851 if (extable_size)
1852 prog->aux->extable = (void *)ro_image_ptr + extable_offset;
1853 skip_init_ctx:
1854 ctx.idx = 0;
1855 ctx.exentry_idx = 0;
1856 ctx.write = true;
1857
1858 build_prologue(&ctx, was_classic);
1859
1860 /* Record exentry_idx and body_idx before first build_body */
1861 exentry_idx = ctx.exentry_idx;
1862 body_idx = ctx.idx;
1863 /* Dont write body instructions to memory for now */
1864 ctx.write = false;
1865
1866 if (build_body(&ctx, extra_pass)) {
1867 prog = orig_prog;
1868 goto out_free_hdr;
1869 }
1870
1871 ctx.epilogue_offset = ctx.idx;
1872 ctx.exentry_idx = exentry_idx;
1873 ctx.idx = body_idx;
1874 ctx.write = true;
1875
1876 /* Pass 3: Adjust jump offset and write final image */
1877 if (build_body(&ctx, extra_pass) ||
1878 WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
1879 prog = orig_prog;
1880 goto out_free_hdr;
1881 }
1882
1883 build_epilogue(&ctx);
1884 build_plt(&ctx);
1885
1886 /* Extra pass to validate JITed code. */
1887 if (validate_ctx(&ctx)) {
1888 prog = orig_prog;
1889 goto out_free_hdr;
1890 }
1891
1892 /* update the real prog size */
1893 prog_size = sizeof(u32) * ctx.idx;
1894
1895 /* And we're done. */
1896 if (bpf_jit_enable > 1)
1897 bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1898
1899 if (!prog->is_func || extra_pass) {
1900 /* The jited image may shrink since the jited result for
1901 * BPF_CALL to subprog may be changed from indirect call
1902 * to direct call.
1903 */
1904 if (extra_pass && ctx.idx > jit_data->ctx.idx) {
1905 pr_err_once("multi-func JIT bug %d > %d\n",
1906 ctx.idx, jit_data->ctx.idx);
1907 prog->bpf_func = NULL;
1908 prog->jited = 0;
1909 prog->jited_len = 0;
1910 goto out_free_hdr;
1911 }
1912 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
1913 /* ro_header has been freed */
1914 ro_header = NULL;
1915 prog = orig_prog;
1916 goto out_off;
1917 }
1918 /*
1919 * The instructions have now been copied to the ROX region from
1920 * where they will execute. Now the data cache has to be cleaned to
1921 * the PoU and the I-cache has to be invalidated for the VAs.
1922 */
1923 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
1924 } else {
1925 jit_data->ctx = ctx;
1926 jit_data->ro_image = ro_image_ptr;
1927 jit_data->header = header;
1928 jit_data->ro_header = ro_header;
1929 }
1930
1931 prog->bpf_func = (void *)ctx.ro_image;
1932 prog->jited = 1;
1933 prog->jited_len = prog_size;
1934
1935 if (!prog->is_func || extra_pass) {
1936 int i;
1937
1938 /* offset[prog->len] is the size of program */
1939 for (i = 0; i <= prog->len; i++)
1940 ctx.offset[i] *= AARCH64_INSN_SIZE;
1941 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1942 out_off:
1943 kvfree(ctx.offset);
1944 kfree(jit_data);
1945 prog->aux->jit_data = NULL;
1946 }
1947 out:
1948 if (tmp_blinded)
1949 bpf_jit_prog_release_other(prog, prog == orig_prog ?
1950 tmp : orig_prog);
1951 return prog;
1952
1953 out_free_hdr:
1954 if (header) {
1955 bpf_arch_text_copy(&ro_header->size, &header->size,
1956 sizeof(header->size));
1957 bpf_jit_binary_pack_free(ro_header, header);
1958 }
1959 goto out_off;
1960 }
1961
bpf_jit_supports_kfunc_call(void)1962 bool bpf_jit_supports_kfunc_call(void)
1963 {
1964 return true;
1965 }
1966
bpf_arch_text_copy(void * dst,void * src,size_t len)1967 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
1968 {
1969 if (!aarch64_insn_copy(dst, src, len))
1970 return ERR_PTR(-EINVAL);
1971 return dst;
1972 }
1973
bpf_jit_alloc_exec_limit(void)1974 u64 bpf_jit_alloc_exec_limit(void)
1975 {
1976 return VMALLOC_END - VMALLOC_START;
1977 }
1978
1979 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)1980 bool bpf_jit_supports_subprog_tailcalls(void)
1981 {
1982 return true;
1983 }
1984
invoke_bpf_prog(struct jit_ctx * ctx,struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret)1985 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
1986 int args_off, int retval_off, int run_ctx_off,
1987 bool save_ret)
1988 {
1989 __le32 *branch;
1990 u64 enter_prog;
1991 u64 exit_prog;
1992 struct bpf_prog *p = l->link.prog;
1993 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1994
1995 enter_prog = (u64)bpf_trampoline_enter(p);
1996 exit_prog = (u64)bpf_trampoline_exit(p);
1997
1998 if (l->cookie == 0) {
1999 /* if cookie is zero, one instruction is enough to store it */
2000 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
2001 } else {
2002 emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
2003 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
2004 ctx);
2005 }
2006
2007 /* save p to callee saved register x19 to avoid loading p with mov_i64
2008 * each time.
2009 */
2010 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
2011
2012 /* arg1: prog */
2013 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2014 /* arg2: &run_ctx */
2015 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
2016
2017 emit_call(enter_prog, ctx);
2018
2019 /* save return value to callee saved register x20 */
2020 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
2021
2022 /* if (__bpf_prog_enter(prog) == 0)
2023 * goto skip_exec_of_prog;
2024 */
2025 branch = ctx->image + ctx->idx;
2026 emit(A64_NOP, ctx);
2027
2028 emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
2029 if (!p->jited)
2030 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
2031
2032 emit_call((const u64)p->bpf_func, ctx);
2033
2034 if (save_ret)
2035 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2036
2037 if (ctx->image) {
2038 int offset = &ctx->image[ctx->idx] - branch;
2039 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
2040 }
2041
2042 /* arg1: prog */
2043 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2044 /* arg2: start time */
2045 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
2046 /* arg3: &run_ctx */
2047 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
2048
2049 emit_call(exit_prog, ctx);
2050 }
2051
invoke_bpf_mod_ret(struct jit_ctx * ctx,struct bpf_tramp_links * tl,int args_off,int retval_off,int run_ctx_off,__le32 ** branches)2052 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
2053 int args_off, int retval_off, int run_ctx_off,
2054 __le32 **branches)
2055 {
2056 int i;
2057
2058 /* The first fmod_ret program will receive a garbage return value.
2059 * Set this to 0 to avoid confusing the program.
2060 */
2061 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
2062 for (i = 0; i < tl->nr_links; i++) {
2063 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
2064 run_ctx_off, true);
2065 /* if (*(u64 *)(sp + retval_off) != 0)
2066 * goto do_fexit;
2067 */
2068 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
2069 /* Save the location of branch, and generate a nop.
2070 * This nop will be replaced with a cbnz later.
2071 */
2072 branches[i] = ctx->image + ctx->idx;
2073 emit(A64_NOP, ctx);
2074 }
2075 }
2076
save_args(struct jit_ctx * ctx,int args_off,int nregs)2077 static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
2078 {
2079 int i;
2080
2081 for (i = 0; i < nregs; i++) {
2082 emit(A64_STR64I(i, A64_SP, args_off), ctx);
2083 args_off += 8;
2084 }
2085 }
2086
restore_args(struct jit_ctx * ctx,int args_off,int nregs)2087 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
2088 {
2089 int i;
2090
2091 for (i = 0; i < nregs; i++) {
2092 emit(A64_LDR64I(i, A64_SP, args_off), ctx);
2093 args_off += 8;
2094 }
2095 }
2096
2097 /* Based on the x86's implementation of arch_prepare_bpf_trampoline().
2098 *
2099 * bpf prog and function entry before bpf trampoline hooked:
2100 * mov x9, lr
2101 * nop
2102 *
2103 * bpf prog and function entry after bpf trampoline hooked:
2104 * mov x9, lr
2105 * bl <bpf_trampoline or plt>
2106 *
2107 */
prepare_trampoline(struct jit_ctx * ctx,struct bpf_tramp_image * im,struct bpf_tramp_links * tlinks,void * func_addr,int nregs,u32 flags)2108 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
2109 struct bpf_tramp_links *tlinks, void *func_addr,
2110 int nregs, u32 flags)
2111 {
2112 int i;
2113 int stack_size;
2114 int retaddr_off;
2115 int regs_off;
2116 int retval_off;
2117 int args_off;
2118 int nregs_off;
2119 int ip_off;
2120 int run_ctx_off;
2121 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2122 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2123 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2124 bool save_ret;
2125 __le32 **branches = NULL;
2126
2127 /* trampoline stack layout:
2128 * [ parent ip ]
2129 * [ FP ]
2130 * SP + retaddr_off [ self ip ]
2131 * [ FP ]
2132 *
2133 * [ padding ] align SP to multiples of 16
2134 *
2135 * [ x20 ] callee saved reg x20
2136 * SP + regs_off [ x19 ] callee saved reg x19
2137 *
2138 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
2139 * BPF_TRAMP_F_RET_FENTRY_RET
2140 *
2141 * [ arg reg N ]
2142 * [ ... ]
2143 * SP + args_off [ arg reg 1 ]
2144 *
2145 * SP + nregs_off [ arg regs count ]
2146 *
2147 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
2148 *
2149 * SP + run_ctx_off [ bpf_tramp_run_ctx ]
2150 */
2151
2152 stack_size = 0;
2153 run_ctx_off = stack_size;
2154 /* room for bpf_tramp_run_ctx */
2155 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
2156
2157 ip_off = stack_size;
2158 /* room for IP address argument */
2159 if (flags & BPF_TRAMP_F_IP_ARG)
2160 stack_size += 8;
2161
2162 nregs_off = stack_size;
2163 /* room for args count */
2164 stack_size += 8;
2165
2166 args_off = stack_size;
2167 /* room for args */
2168 stack_size += nregs * 8;
2169
2170 /* room for return value */
2171 retval_off = stack_size;
2172 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
2173 if (save_ret)
2174 stack_size += 8;
2175
2176 /* room for callee saved registers, currently x19 and x20 are used */
2177 regs_off = stack_size;
2178 stack_size += 16;
2179
2180 /* round up to multiples of 16 to avoid SPAlignmentFault */
2181 stack_size = round_up(stack_size, 16);
2182
2183 /* return address locates above FP */
2184 retaddr_off = stack_size + 8;
2185
2186 /* bpf trampoline may be invoked by 3 instruction types:
2187 * 1. bl, attached to bpf prog or kernel function via short jump
2188 * 2. br, attached to bpf prog or kernel function via long jump
2189 * 3. blr, working as a function pointer, used by struct_ops.
2190 * So BTI_JC should used here to support both br and blr.
2191 */
2192 emit_bti(A64_BTI_JC, ctx);
2193
2194 /* frame for parent function */
2195 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
2196 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2197
2198 /* frame for patched function */
2199 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
2200 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2201
2202 /* allocate stack space */
2203 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
2204
2205 if (flags & BPF_TRAMP_F_IP_ARG) {
2206 /* save ip address of the traced function */
2207 emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
2208 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
2209 }
2210
2211 /* save arg regs count*/
2212 emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
2213 emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
2214
2215 /* save arg regs */
2216 save_args(ctx, args_off, nregs);
2217
2218 /* save callee saved registers */
2219 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
2220 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2221
2222 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2223 /* for the first pass, assume the worst case */
2224 if (!ctx->image)
2225 ctx->idx += 4;
2226 else
2227 emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2228 emit_call((const u64)__bpf_tramp_enter, ctx);
2229 }
2230
2231 for (i = 0; i < fentry->nr_links; i++)
2232 invoke_bpf_prog(ctx, fentry->links[i], args_off,
2233 retval_off, run_ctx_off,
2234 flags & BPF_TRAMP_F_RET_FENTRY_RET);
2235
2236 if (fmod_ret->nr_links) {
2237 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
2238 GFP_KERNEL);
2239 if (!branches)
2240 return -ENOMEM;
2241
2242 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
2243 run_ctx_off, branches);
2244 }
2245
2246 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2247 restore_args(ctx, args_off, nregs);
2248 /* call original func */
2249 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
2250 emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
2251 emit(A64_RET(A64_R(10)), ctx);
2252 /* store return value */
2253 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2254 /* reserve a nop for bpf_tramp_image_put */
2255 im->ip_after_call = ctx->ro_image + ctx->idx;
2256 emit(A64_NOP, ctx);
2257 }
2258
2259 /* update the branches saved in invoke_bpf_mod_ret with cbnz */
2260 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
2261 int offset = &ctx->image[ctx->idx] - branches[i];
2262 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
2263 }
2264
2265 for (i = 0; i < fexit->nr_links; i++)
2266 invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
2267 run_ctx_off, false);
2268
2269 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2270 im->ip_epilogue = ctx->ro_image + ctx->idx;
2271 /* for the first pass, assume the worst case */
2272 if (!ctx->image)
2273 ctx->idx += 4;
2274 else
2275 emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2276 emit_call((const u64)__bpf_tramp_exit, ctx);
2277 }
2278
2279 if (flags & BPF_TRAMP_F_RESTORE_REGS)
2280 restore_args(ctx, args_off, nregs);
2281
2282 /* restore callee saved register x19 and x20 */
2283 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
2284 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2285
2286 if (save_ret)
2287 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
2288
2289 /* reset SP */
2290 emit(A64_MOV(1, A64_SP, A64_FP), ctx);
2291
2292 /* pop frames */
2293 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2294 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
2295
2296 if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2297 /* skip patched function, return to parent */
2298 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2299 emit(A64_RET(A64_R(9)), ctx);
2300 } else {
2301 /* return to patched function */
2302 emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
2303 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2304 emit(A64_RET(A64_R(10)), ctx);
2305 }
2306
2307 kfree(branches);
2308
2309 return ctx->idx;
2310 }
2311
btf_func_model_nregs(const struct btf_func_model * m)2312 static int btf_func_model_nregs(const struct btf_func_model *m)
2313 {
2314 int nregs = m->nr_args;
2315 int i;
2316
2317 /* extra registers needed for struct argument */
2318 for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
2319 /* The arg_size is at most 16 bytes, enforced by the verifier. */
2320 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
2321 nregs += (m->arg_size[i] + 7) / 8 - 1;
2322 }
2323
2324 return nregs;
2325 }
2326
arch_bpf_trampoline_size(const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2327 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2328 struct bpf_tramp_links *tlinks, void *func_addr)
2329 {
2330 struct jit_ctx ctx = {
2331 .image = NULL,
2332 .idx = 0,
2333 };
2334 struct bpf_tramp_image im;
2335 int nregs, ret;
2336
2337 nregs = btf_func_model_nregs(m);
2338 /* the first 8 registers are used for arguments */
2339 if (nregs > 8)
2340 return -ENOTSUPP;
2341
2342 ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
2343 if (ret < 0)
2344 return ret;
2345
2346 return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
2347 }
2348
arch_alloc_bpf_trampoline(unsigned int size)2349 void *arch_alloc_bpf_trampoline(unsigned int size)
2350 {
2351 return bpf_prog_pack_alloc(size, jit_fill_hole);
2352 }
2353
arch_free_bpf_trampoline(void * image,unsigned int size)2354 void arch_free_bpf_trampoline(void *image, unsigned int size)
2355 {
2356 bpf_prog_pack_free(image, size);
2357 }
2358
arch_protect_bpf_trampoline(void * image,unsigned int size)2359 int arch_protect_bpf_trampoline(void *image, unsigned int size)
2360 {
2361 return 0;
2362 }
2363
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * ro_image,void * ro_image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2364 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
2365 void *ro_image_end, const struct btf_func_model *m,
2366 u32 flags, struct bpf_tramp_links *tlinks,
2367 void *func_addr)
2368 {
2369 int ret, nregs;
2370 void *image, *tmp;
2371 u32 size = ro_image_end - ro_image;
2372
2373 /* image doesn't need to be in module memory range, so we can
2374 * use kvmalloc.
2375 */
2376 image = kvmalloc(size, GFP_KERNEL);
2377 if (!image)
2378 return -ENOMEM;
2379
2380 struct jit_ctx ctx = {
2381 .image = image,
2382 .ro_image = ro_image,
2383 .idx = 0,
2384 .write = true,
2385 };
2386
2387 nregs = btf_func_model_nregs(m);
2388 /* the first 8 registers are used for arguments */
2389 if (nregs > 8)
2390 return -ENOTSUPP;
2391
2392 jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2393 ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
2394
2395 if (ret > 0 && validate_code(&ctx) < 0) {
2396 ret = -EINVAL;
2397 goto out;
2398 }
2399
2400 if (ret > 0)
2401 ret *= AARCH64_INSN_SIZE;
2402
2403 tmp = bpf_arch_text_copy(ro_image, image, size);
2404 if (IS_ERR(tmp)) {
2405 ret = PTR_ERR(tmp);
2406 goto out;
2407 }
2408
2409 bpf_flush_icache(ro_image, ro_image + size);
2410 out:
2411 kvfree(image);
2412 return ret;
2413 }
2414
is_long_jump(void * ip,void * target)2415 static bool is_long_jump(void *ip, void *target)
2416 {
2417 long offset;
2418
2419 /* NULL target means this is a NOP */
2420 if (!target)
2421 return false;
2422
2423 offset = (long)target - (long)ip;
2424 return offset < -SZ_128M || offset >= SZ_128M;
2425 }
2426
gen_branch_or_nop(enum aarch64_insn_branch_type type,void * ip,void * addr,void * plt,u32 * insn)2427 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
2428 void *addr, void *plt, u32 *insn)
2429 {
2430 void *target;
2431
2432 if (!addr) {
2433 *insn = aarch64_insn_gen_nop();
2434 return 0;
2435 }
2436
2437 if (is_long_jump(ip, addr))
2438 target = plt;
2439 else
2440 target = addr;
2441
2442 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
2443 (unsigned long)target,
2444 type);
2445
2446 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
2447 }
2448
2449 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
2450 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
2451 * or @new_addr is NULL, the old or new instruction is NOP.
2452 *
2453 * When @ip is the bpf prog entry, a bpf trampoline is being attached or
2454 * detached. Since bpf trampoline and bpf prog are allocated separately with
2455 * vmalloc, the address distance may exceed 128MB, the maximum branch range.
2456 * So long jump should be handled.
2457 *
2458 * When a bpf prog is constructed, a plt pointing to empty trampoline
2459 * dummy_tramp is placed at the end:
2460 *
2461 * bpf_prog:
2462 * mov x9, lr
2463 * nop // patchsite
2464 * ...
2465 * ret
2466 *
2467 * plt:
2468 * ldr x10, target
2469 * br x10
2470 * target:
2471 * .quad dummy_tramp // plt target
2472 *
2473 * This is also the state when no trampoline is attached.
2474 *
2475 * When a short-jump bpf trampoline is attached, the patchsite is patched
2476 * to a bl instruction to the trampoline directly:
2477 *
2478 * bpf_prog:
2479 * mov x9, lr
2480 * bl <short-jump bpf trampoline address> // patchsite
2481 * ...
2482 * ret
2483 *
2484 * plt:
2485 * ldr x10, target
2486 * br x10
2487 * target:
2488 * .quad dummy_tramp // plt target
2489 *
2490 * When a long-jump bpf trampoline is attached, the plt target is filled with
2491 * the trampoline address and the patchsite is patched to a bl instruction to
2492 * the plt:
2493 *
2494 * bpf_prog:
2495 * mov x9, lr
2496 * bl plt // patchsite
2497 * ...
2498 * ret
2499 *
2500 * plt:
2501 * ldr x10, target
2502 * br x10
2503 * target:
2504 * .quad <long-jump bpf trampoline address> // plt target
2505 *
2506 * The dummy_tramp is used to prevent another CPU from jumping to unknown
2507 * locations during the patching process, making the patching process easier.
2508 */
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type poke_type,void * old_addr,void * new_addr)2509 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
2510 void *old_addr, void *new_addr)
2511 {
2512 int ret;
2513 u32 old_insn;
2514 u32 new_insn;
2515 u32 replaced;
2516 struct bpf_plt *plt = NULL;
2517 unsigned long size = 0UL;
2518 unsigned long offset = ~0UL;
2519 enum aarch64_insn_branch_type branch_type;
2520 char namebuf[KSYM_NAME_LEN];
2521 void *image = NULL;
2522 u64 plt_target = 0ULL;
2523 bool poking_bpf_entry;
2524
2525 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
2526 /* Only poking bpf text is supported. Since kernel function
2527 * entry is set up by ftrace, we reply on ftrace to poke kernel
2528 * functions.
2529 */
2530 return -ENOTSUPP;
2531
2532 image = ip - offset;
2533 /* zero offset means we're poking bpf prog entry */
2534 poking_bpf_entry = (offset == 0UL);
2535
2536 /* bpf prog entry, find plt and the real patchsite */
2537 if (poking_bpf_entry) {
2538 /* plt locates at the end of bpf prog */
2539 plt = image + size - PLT_TARGET_OFFSET;
2540
2541 /* skip to the nop instruction in bpf prog entry:
2542 * bti c // if BTI enabled
2543 * mov x9, x30
2544 * nop
2545 */
2546 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
2547 }
2548
2549 /* long jump is only possible at bpf prog entry */
2550 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
2551 !poking_bpf_entry))
2552 return -EINVAL;
2553
2554 if (poke_type == BPF_MOD_CALL)
2555 branch_type = AARCH64_INSN_BRANCH_LINK;
2556 else
2557 branch_type = AARCH64_INSN_BRANCH_NOLINK;
2558
2559 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
2560 return -EFAULT;
2561
2562 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
2563 return -EFAULT;
2564
2565 if (is_long_jump(ip, new_addr))
2566 plt_target = (u64)new_addr;
2567 else if (is_long_jump(ip, old_addr))
2568 /* if the old target is a long jump and the new target is not,
2569 * restore the plt target to dummy_tramp, so there is always a
2570 * legal and harmless address stored in plt target, and we'll
2571 * never jump from plt to an unknown place.
2572 */
2573 plt_target = (u64)&dummy_tramp;
2574
2575 if (plt_target) {
2576 /* non-zero plt_target indicates we're patching a bpf prog,
2577 * which is read only.
2578 */
2579 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
2580 return -EFAULT;
2581 WRITE_ONCE(plt->target, plt_target);
2582 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
2583 /* since plt target points to either the new trampoline
2584 * or dummy_tramp, even if another CPU reads the old plt
2585 * target value before fetching the bl instruction to plt,
2586 * it will be brought back by dummy_tramp, so no barrier is
2587 * required here.
2588 */
2589 }
2590
2591 /* if the old target and the new target are both long jumps, no
2592 * patching is required
2593 */
2594 if (old_insn == new_insn)
2595 return 0;
2596
2597 mutex_lock(&text_mutex);
2598 if (aarch64_insn_read(ip, &replaced)) {
2599 ret = -EFAULT;
2600 goto out;
2601 }
2602
2603 if (replaced != old_insn) {
2604 ret = -EFAULT;
2605 goto out;
2606 }
2607
2608 /* We call aarch64_insn_patch_text_nosync() to replace instruction
2609 * atomically, so no other CPUs will fetch a half-new and half-old
2610 * instruction. But there is chance that another CPU executes the
2611 * old instruction after the patching operation finishes (e.g.,
2612 * pipeline not flushed, or icache not synchronized yet).
2613 *
2614 * 1. when a new trampoline is attached, it is not a problem for
2615 * different CPUs to jump to different trampolines temporarily.
2616 *
2617 * 2. when an old trampoline is freed, we should wait for all other
2618 * CPUs to exit the trampoline and make sure the trampoline is no
2619 * longer reachable, since bpf_tramp_image_put() function already
2620 * uses percpu_ref and task-based rcu to do the sync, no need to call
2621 * the sync version here, see bpf_tramp_image_put() for details.
2622 */
2623 ret = aarch64_insn_patch_text_nosync(ip, new_insn);
2624 out:
2625 mutex_unlock(&text_mutex);
2626
2627 return ret;
2628 }
2629
bpf_jit_supports_ptr_xchg(void)2630 bool bpf_jit_supports_ptr_xchg(void)
2631 {
2632 return true;
2633 }
2634
bpf_jit_supports_exceptions(void)2635 bool bpf_jit_supports_exceptions(void)
2636 {
2637 /* We unwind through both kernel frames starting from within bpf_throw
2638 * call and BPF frames. Therefore we require FP unwinder to be enabled
2639 * to walk kernel frames and reach BPF frames in the stack trace.
2640 * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y
2641 */
2642 return true;
2643 }
2644
bpf_jit_supports_arena(void)2645 bool bpf_jit_supports_arena(void)
2646 {
2647 return true;
2648 }
2649
bpf_jit_supports_insn(struct bpf_insn * insn,bool in_arena)2650 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
2651 {
2652 if (!in_arena)
2653 return true;
2654 switch (insn->code) {
2655 case BPF_STX | BPF_ATOMIC | BPF_W:
2656 case BPF_STX | BPF_ATOMIC | BPF_DW:
2657 if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
2658 return false;
2659 }
2660 return true;
2661 }
2662
bpf_jit_supports_percpu_insn(void)2663 bool bpf_jit_supports_percpu_insn(void)
2664 {
2665 return true;
2666 }
2667
bpf_jit_inlines_helper_call(s32 imm)2668 bool bpf_jit_inlines_helper_call(s32 imm)
2669 {
2670 switch (imm) {
2671 case BPF_FUNC_get_smp_processor_id:
2672 case BPF_FUNC_get_current_task:
2673 case BPF_FUNC_get_current_task_btf:
2674 return true;
2675 default:
2676 return false;
2677 }
2678 }
2679
bpf_jit_free(struct bpf_prog * prog)2680 void bpf_jit_free(struct bpf_prog *prog)
2681 {
2682 if (prog->jited) {
2683 struct arm64_jit_data *jit_data = prog->aux->jit_data;
2684 struct bpf_binary_header *hdr;
2685
2686 /*
2687 * If we fail the final pass of JIT (from jit_subprogs),
2688 * the program may not be finalized yet. Call finalize here
2689 * before freeing it.
2690 */
2691 if (jit_data) {
2692 bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
2693 sizeof(jit_data->header->size));
2694 kfree(jit_data);
2695 }
2696 hdr = bpf_jit_binary_pack_hdr(prog);
2697 bpf_jit_binary_pack_free(hdr, NULL);
2698 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
2699 }
2700
2701 bpf_prog_unlock_free(prog);
2702 }
2703