xref: /linux/arch/arm64/net/bpf_jit_comp.c (revision d9ef13f72711f2dad64cd4445472ded98fb6c954)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for ARM64
4  *
5  * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
6  */
7 
8 #define pr_fmt(fmt) "bpf_jit: " fmt
9 
10 #include <linux/arm-smccc.h>
11 #include <linux/bitfield.h>
12 #include <linux/bpf.h>
13 #include <linux/cfi.h>
14 #include <linux/filter.h>
15 #include <linux/memory.h>
16 #include <linux/printk.h>
17 #include <linux/slab.h>
18 
19 #include <asm/asm-extable.h>
20 #include <asm/byteorder.h>
21 #include <asm/cpufeature.h>
22 #include <asm/debug-monitors.h>
23 #include <asm/insn.h>
24 #include <asm/text-patching.h>
25 #include <asm/set_memory.h>
26 
27 #include "bpf_jit.h"
28 
29 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
30 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
31 #define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
32 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
33 #define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
34 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
35 
36 #define check_imm(bits, imm) do {				\
37 	if ((((imm) > 0) && ((imm) >> ((bits) - 1))) ||		\
38 	    (((imm) < 0) && (~(imm) >> ((bits) - 1)))) {	\
39 		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
40 			i, imm, imm);				\
41 		return -EINVAL;					\
42 	}							\
43 } while (0)
44 #define check_imm19(imm) check_imm(19, imm)
45 #define check_imm26(imm) check_imm(26, imm)
46 
47 /* Map BPF registers to A64 registers */
48 static const int bpf2a64[] = {
49 	/* return value from in-kernel function, and exit value from eBPF */
50 	[BPF_REG_0] = A64_R(7),
51 	/* arguments from eBPF program to in-kernel function */
52 	[BPF_REG_1] = A64_R(0),
53 	[BPF_REG_2] = A64_R(1),
54 	[BPF_REG_3] = A64_R(2),
55 	[BPF_REG_4] = A64_R(3),
56 	[BPF_REG_5] = A64_R(4),
57 	/* callee saved registers that in-kernel function will preserve */
58 	[BPF_REG_6] = A64_R(19),
59 	[BPF_REG_7] = A64_R(20),
60 	[BPF_REG_8] = A64_R(21),
61 	[BPF_REG_9] = A64_R(22),
62 	/* read-only frame pointer to access stack */
63 	[BPF_REG_FP] = A64_R(25),
64 	/* temporary registers for BPF JIT */
65 	[TMP_REG_1] = A64_R(10),
66 	[TMP_REG_2] = A64_R(11),
67 	[TMP_REG_3] = A64_R(12),
68 	/* tail_call_cnt_ptr */
69 	[TCCNT_PTR] = A64_R(26),
70 	/* temporary register for blinding constants */
71 	[BPF_REG_AX] = A64_R(9),
72 	/* callee saved register for private stack pointer */
73 	[PRIVATE_SP] = A64_R(27),
74 	/* callee saved register for kern_vm_start address */
75 	[ARENA_VM_START] = A64_R(28),
76 };
77 
78 struct jit_ctx {
79 	const struct bpf_prog *prog;
80 	int idx;
81 	int epilogue_offset;
82 	int *offset;
83 	int exentry_idx;
84 	int nr_used_callee_reg;
85 	u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */
86 	__le32 *image;
87 	__le32 *ro_image;
88 	u32 stack_size;
89 	u64 user_vm_start;
90 	u64 arena_vm_start;
91 	bool fp_used;
92 	bool priv_sp_used;
93 	bool write;
94 };
95 
96 struct bpf_plt {
97 	u32 insn_ldr; /* load target */
98 	u32 insn_br;  /* branch to target */
99 	u64 target;   /* target value */
100 };
101 
102 #define PLT_TARGET_SIZE   sizeof_field(struct bpf_plt, target)
103 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
104 
105 /* Memory size/value to protect private stack overflow/underflow */
106 #define PRIV_STACK_GUARD_SZ    16
107 #define PRIV_STACK_GUARD_VAL   0xEB9F12345678eb9fULL
108 
109 static inline void emit(const u32 insn, struct jit_ctx *ctx)
110 {
111 	if (ctx->image != NULL && ctx->write)
112 		ctx->image[ctx->idx] = cpu_to_le32(insn);
113 
114 	ctx->idx++;
115 }
116 
117 static inline void emit_u32_data(const u32 data, struct jit_ctx *ctx)
118 {
119 	if (ctx->image != NULL && ctx->write)
120 		ctx->image[ctx->idx] = (__force __le32)data;
121 
122 	ctx->idx++;
123 }
124 
125 static inline void emit_a64_mov_i(const int is64, const int reg,
126 				  const s32 val, struct jit_ctx *ctx)
127 {
128 	u16 hi = val >> 16;
129 	u16 lo = val & 0xffff;
130 
131 	if (hi & 0x8000) {
132 		if (hi == 0xffff) {
133 			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
134 		} else {
135 			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
136 			if (lo != 0xffff)
137 				emit(A64_MOVK(is64, reg, lo, 0), ctx);
138 		}
139 	} else {
140 		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
141 		if (hi)
142 			emit(A64_MOVK(is64, reg, hi, 16), ctx);
143 	}
144 }
145 
146 static int i64_i16_blocks(const u64 val, bool inverse)
147 {
148 	return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
149 	       (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
150 	       (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
151 	       (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
152 }
153 
154 static inline void emit_a64_mov_i64(const int reg, const u64 val,
155 				    struct jit_ctx *ctx)
156 {
157 	u64 nrm_tmp = val, rev_tmp = ~val;
158 	bool inverse;
159 	int shift;
160 
161 	if (!(nrm_tmp >> 32))
162 		return emit_a64_mov_i(0, reg, (u32)val, ctx);
163 
164 	inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
165 	shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
166 					  (fls64(nrm_tmp) - 1)), 16), 0);
167 	if (inverse)
168 		emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
169 	else
170 		emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
171 	shift -= 16;
172 	while (shift >= 0) {
173 		if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
174 			emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
175 		shift -= 16;
176 	}
177 }
178 
179 static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
180 {
181 	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
182 		emit(insn, ctx);
183 }
184 
185 static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx)
186 {
187 	if (IS_ENABLED(CONFIG_CFI))
188 		emit_u32_data(hash, ctx);
189 }
190 
191 /*
192  * Kernel addresses in the vmalloc space use at most 48 bits, and the
193  * remaining bits are guaranteed to be 0x1. So we can compose the address
194  * with a fixed length movn/movk/movk sequence.
195  */
196 static inline void emit_addr_mov_i64(const int reg, const u64 val,
197 				     struct jit_ctx *ctx)
198 {
199 	u64 tmp = val;
200 	int shift = 0;
201 
202 	emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
203 	while (shift < 32) {
204 		tmp >>= 16;
205 		shift += 16;
206 		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
207 	}
208 }
209 
210 static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx)
211 {
212 	long offset;
213 
214 	/* when ctx->ro_image is not allocated or the target is unknown,
215 	 * emit indirect call
216 	 */
217 	if (!ctx->ro_image || !target)
218 		return true;
219 
220 	offset = target - (long)&ctx->ro_image[ctx->idx];
221 	return offset < -SZ_128M || offset >= SZ_128M;
222 }
223 
224 static void emit_direct_call(u64 target, struct jit_ctx *ctx)
225 {
226 	u32 insn;
227 	unsigned long pc;
228 
229 	pc = (unsigned long)&ctx->ro_image[ctx->idx];
230 	insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK);
231 	emit(insn, ctx);
232 }
233 
234 static void emit_indirect_call(u64 target, struct jit_ctx *ctx)
235 {
236 	u8 tmp;
237 
238 	tmp = bpf2a64[TMP_REG_1];
239 	emit_addr_mov_i64(tmp, target, ctx);
240 	emit(A64_BLR(tmp), ctx);
241 }
242 
243 static void emit_call(u64 target, struct jit_ctx *ctx)
244 {
245 	if (should_emit_indirect_call((long)target, ctx))
246 		emit_indirect_call(target, ctx);
247 	else
248 		emit_direct_call(target, ctx);
249 }
250 
251 static inline int bpf2a64_offset(int bpf_insn, int off,
252 				 const struct jit_ctx *ctx)
253 {
254 	/* BPF JMP offset is relative to the next instruction */
255 	bpf_insn++;
256 	/*
257 	 * Whereas arm64 branch instructions encode the offset
258 	 * from the branch itself, so we must subtract 1 from the
259 	 * instruction offset.
260 	 */
261 	return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
262 }
263 
264 static void jit_fill_hole(void *area, unsigned int size)
265 {
266 	__le32 *ptr;
267 	/* We are guaranteed to have aligned memory. */
268 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
269 		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
270 }
271 
272 int bpf_arch_text_invalidate(void *dst, size_t len)
273 {
274 	if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len))
275 		return -EINVAL;
276 
277 	return 0;
278 }
279 
280 static inline int epilogue_offset(const struct jit_ctx *ctx)
281 {
282 	int to = ctx->epilogue_offset;
283 	int from = ctx->idx;
284 
285 	return to - from;
286 }
287 
288 static bool is_addsub_imm(u32 imm)
289 {
290 	/* Either imm12 or shifted imm12. */
291 	return !(imm & ~0xfff) || !(imm & ~0xfff000);
292 }
293 
294 static inline void emit_a64_add_i(const bool is64, const int dst, const int src,
295 				  const int tmp, const s32 imm, struct jit_ctx *ctx)
296 {
297 	if (is_addsub_imm(imm)) {
298 		emit(A64_ADD_I(is64, dst, src, imm), ctx);
299 	} else if (is_addsub_imm(-(u32)imm)) {
300 		emit(A64_SUB_I(is64, dst, src, -imm), ctx);
301 	} else {
302 		emit_a64_mov_i(is64, tmp, imm, ctx);
303 		emit(A64_ADD(is64, dst, src, tmp), ctx);
304 	}
305 }
306 
307 /*
308  * There are 3 types of AArch64 LDR/STR (immediate) instruction:
309  * Post-index, Pre-index, Unsigned offset.
310  *
311  * For BPF ldr/str, the "unsigned offset" type is sufficient.
312  *
313  * "Unsigned offset" type LDR(immediate) format:
314  *
315  *    3                   2                   1                   0
316  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
317  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
318  * |x x|1 1 1 0 0 1 0 1|         imm12         |    Rn   |    Rt   |
319  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
320  * scale
321  *
322  * "Unsigned offset" type STR(immediate) format:
323  *    3                   2                   1                   0
324  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
325  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
326  * |x x|1 1 1 0 0 1 0 0|         imm12         |    Rn   |    Rt   |
327  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
328  * scale
329  *
330  * The offset is calculated from imm12 and scale in the following way:
331  *
332  * offset = (u64)imm12 << scale
333  */
334 static bool is_lsi_offset(int offset, int scale)
335 {
336 	if (offset < 0)
337 		return false;
338 
339 	if (offset > (0xFFF << scale))
340 		return false;
341 
342 	if (offset & ((1 << scale) - 1))
343 		return false;
344 
345 	return true;
346 }
347 
348 /* generated main prog prologue:
349  *      bti c // if CONFIG_ARM64_BTI_KERNEL
350  *      mov x9, lr
351  *      nop  // POKE_OFFSET
352  *      paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
353  *      stp x29, lr, [sp, #-16]!
354  *      mov x29, sp
355  *      stp xzr, x26, [sp, #-16]!
356  *      mov x26, sp
357  *      // PROLOGUE_OFFSET
358  *	// save callee-saved registers
359  */
360 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
361 {
362 	const bool is_main_prog = !bpf_is_subprog(ctx->prog);
363 	const u8 ptr = bpf2a64[TCCNT_PTR];
364 
365 	if (is_main_prog) {
366 		/* Initialize tail_call_cnt. */
367 		emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx);
368 		emit(A64_MOV(1, ptr, A64_SP), ctx);
369 	} else
370 		emit(A64_PUSH(ptr, ptr, A64_SP), ctx);
371 }
372 
373 static void find_used_callee_regs(struct jit_ctx *ctx)
374 {
375 	int i;
376 	const struct bpf_prog *prog = ctx->prog;
377 	const struct bpf_insn *insn = &prog->insnsi[0];
378 	int reg_used = 0;
379 
380 	for (i = 0; i < prog->len; i++, insn++) {
381 		if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
382 			reg_used |= 1;
383 
384 		if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
385 			reg_used |= 2;
386 
387 		if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
388 			reg_used |= 4;
389 
390 		if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
391 			reg_used |= 8;
392 
393 		if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) {
394 			ctx->fp_used = true;
395 			reg_used |= 16;
396 		}
397 	}
398 
399 	i = 0;
400 	if (reg_used & 1)
401 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6];
402 
403 	if (reg_used & 2)
404 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7];
405 
406 	if (reg_used & 4)
407 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8];
408 
409 	if (reg_used & 8)
410 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
411 
412 	if (reg_used & 16) {
413 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
414 		if (ctx->priv_sp_used)
415 			ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
416 	}
417 
418 	if (ctx->arena_vm_start)
419 		ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
420 
421 	ctx->nr_used_callee_reg = i;
422 }
423 
424 /* Save callee-saved registers */
425 static void push_callee_regs(struct jit_ctx *ctx)
426 {
427 	int reg1, reg2, i;
428 
429 	/*
430 	 * Program acting as exception boundary should save all ARM64
431 	 * Callee-saved registers as the exception callback needs to recover
432 	 * all ARM64 Callee-saved registers in its epilogue.
433 	 */
434 	if (ctx->prog->aux->exception_boundary) {
435 		emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx);
436 		emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx);
437 		emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
438 		emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
439 		emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
440 		ctx->fp_used = true;
441 	} else {
442 		find_used_callee_regs(ctx);
443 		for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
444 			reg1 = ctx->used_callee_reg[i];
445 			reg2 = ctx->used_callee_reg[i + 1];
446 			emit(A64_PUSH(reg1, reg2, A64_SP), ctx);
447 		}
448 		if (i < ctx->nr_used_callee_reg) {
449 			reg1 = ctx->used_callee_reg[i];
450 			/* keep SP 16-byte aligned */
451 			emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx);
452 		}
453 	}
454 }
455 
456 /* Restore callee-saved registers */
457 static void pop_callee_regs(struct jit_ctx *ctx)
458 {
459 	struct bpf_prog_aux *aux = ctx->prog->aux;
460 	int reg1, reg2, i;
461 
462 	/*
463 	 * Program acting as exception boundary pushes R23 and R24 in addition
464 	 * to BPF callee-saved registers. Exception callback uses the boundary
465 	 * program's stack frame, so recover these extra registers in the above
466 	 * two cases.
467 	 */
468 	if (aux->exception_boundary || aux->exception_cb) {
469 		emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx);
470 		emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx);
471 		emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
472 		emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx);
473 		emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx);
474 	} else {
475 		i = ctx->nr_used_callee_reg - 1;
476 		if (ctx->nr_used_callee_reg % 2 != 0) {
477 			reg1 = ctx->used_callee_reg[i];
478 			emit(A64_POP(reg1, A64_ZR, A64_SP), ctx);
479 			i--;
480 		}
481 		while (i > 0) {
482 			reg1 = ctx->used_callee_reg[i - 1];
483 			reg2 = ctx->used_callee_reg[i];
484 			emit(A64_POP(reg1, reg2, A64_SP), ctx);
485 			i -= 2;
486 		}
487 	}
488 }
489 
490 static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
491 			    struct jit_ctx *ctx)
492 {
493 	const u8 tmp = bpf2a64[TMP_REG_1];
494 
495 	emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
496 	if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
497 		emit(A64_MRS_TPIDR_EL2(tmp), ctx);
498 	else
499 		emit(A64_MRS_TPIDR_EL1(tmp), ctx);
500 	emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
501 }
502 
503 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
504 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
505 
506 /* Offset of nop instruction in bpf prog entry to be poked */
507 #define POKE_OFFSET (BTI_INSNS + 1)
508 
509 /* Tail call offset to jump into */
510 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4)
511 
512 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
513 {
514 	const struct bpf_prog *prog = ctx->prog;
515 	const bool is_main_prog = !bpf_is_subprog(prog);
516 	const u8 fp = bpf2a64[BPF_REG_FP];
517 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
518 	const u8 priv_sp = bpf2a64[PRIVATE_SP];
519 	void __percpu *priv_stack_ptr;
520 	int cur_offset;
521 
522 	/*
523 	 * BPF prog stack layout
524 	 *
525 	 *                         high
526 	 * original A64_SP =>   0:+-----+ BPF prologue
527 	 *                        |FP/LR|
528 	 * current A64_FP =>  -16:+-----+
529 	 *                        | ... | callee saved registers
530 	 * BPF fp register => -64:+-----+ <= (BPF_FP)
531 	 *                        |     |
532 	 *                        | ... | BPF prog stack
533 	 *                        |     |
534 	 *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
535 	 *                        |RSVD | padding
536 	 * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
537 	 *                        |     |
538 	 *                        | ... | Function call stack
539 	 *                        |     |
540 	 *                        +-----+
541 	 *                          low
542 	 *
543 	 */
544 
545 	emit_kcfi(is_main_prog ? cfi_bpf_hash : cfi_bpf_subprog_hash, ctx);
546 	const int idx0 = ctx->idx;
547 
548 	/* bpf function may be invoked by 3 instruction types:
549 	 * 1. bl, attached via freplace to bpf prog via short jump
550 	 * 2. br, attached via freplace to bpf prog via long jump
551 	 * 3. blr, working as a function pointer, used by emit_call.
552 	 * So BTI_JC should used here to support both br and blr.
553 	 */
554 	emit_bti(A64_BTI_JC, ctx);
555 
556 	emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
557 	emit(A64_NOP, ctx);
558 
559 	if (!prog->aux->exception_cb) {
560 		/* Sign lr */
561 		if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
562 			emit(A64_PACIASP, ctx);
563 
564 		/* Save FP and LR registers to stay align with ARM64 AAPCS */
565 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
566 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
567 
568 		prepare_bpf_tail_call_cnt(ctx);
569 
570 		if (!ebpf_from_cbpf && is_main_prog) {
571 			cur_offset = ctx->idx - idx0;
572 			if (cur_offset != PROLOGUE_OFFSET) {
573 				pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
574 						cur_offset, PROLOGUE_OFFSET);
575 				return -1;
576 			}
577 			/* BTI landing pad for the tail call, done with a BR */
578 			emit_bti(A64_BTI_J, ctx);
579 		}
580 		push_callee_regs(ctx);
581 	} else {
582 		/*
583 		 * Exception callback receives FP of Main Program as third
584 		 * parameter
585 		 */
586 		emit(A64_MOV(1, A64_FP, A64_R(2)), ctx);
587 		/*
588 		 * Main Program already pushed the frame record and the
589 		 * callee-saved registers. The exception callback will not push
590 		 * anything and re-use the main program's stack.
591 		 *
592 		 * 12 registers are on the stack
593 		 */
594 		emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
595 	}
596 
597 	/* Stack must be multiples of 16B */
598 	ctx->stack_size = round_up(prog->aux->stack_depth, 16);
599 
600 	if (ctx->fp_used) {
601 		if (ctx->priv_sp_used) {
602 			/* Set up private stack pointer */
603 			priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
604 			emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
605 			emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
606 		} else {
607 			/* Set up BPF prog stack base register */
608 			emit(A64_MOV(1, fp, A64_SP), ctx);
609 		}
610 	}
611 
612 	/* Set up function call stack */
613 	if (ctx->stack_size && !ctx->priv_sp_used)
614 		emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
615 
616 	if (ctx->arena_vm_start)
617 		emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx);
618 
619 	return 0;
620 }
621 
622 static int emit_bpf_tail_call(struct jit_ctx *ctx)
623 {
624 	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
625 	const u8 r2 = bpf2a64[BPF_REG_2];
626 	const u8 r3 = bpf2a64[BPF_REG_3];
627 
628 	const u8 tmp = bpf2a64[TMP_REG_1];
629 	const u8 prg = bpf2a64[TMP_REG_2];
630 	const u8 tcc = bpf2a64[TMP_REG_3];
631 	const u8 ptr = bpf2a64[TCCNT_PTR];
632 	size_t off;
633 	__le32 *branch1 = NULL;
634 	__le32 *branch2 = NULL;
635 	__le32 *branch3 = NULL;
636 
637 	/* if (index >= array->map.max_entries)
638 	 *     goto out;
639 	 */
640 	off = offsetof(struct bpf_array, map.max_entries);
641 	emit_a64_mov_i64(tmp, off, ctx);
642 	emit(A64_LDR32(tmp, r2, tmp), ctx);
643 	emit(A64_MOV(0, r3, r3), ctx);
644 	emit(A64_CMP(0, r3, tmp), ctx);
645 	branch1 = ctx->image + ctx->idx;
646 	emit(A64_NOP, ctx);
647 
648 	/*
649 	 * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT)
650 	 *     goto out;
651 	 */
652 	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
653 	emit(A64_LDR64I(tcc, ptr, 0), ctx);
654 	emit(A64_CMP(1, tcc, tmp), ctx);
655 	branch2 = ctx->image + ctx->idx;
656 	emit(A64_NOP, ctx);
657 
658 	/* (*tail_call_cnt_ptr)++; */
659 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
660 
661 	/* prog = array->ptrs[index];
662 	 * if (prog == NULL)
663 	 *     goto out;
664 	 */
665 	off = offsetof(struct bpf_array, ptrs);
666 	emit_a64_mov_i64(tmp, off, ctx);
667 	emit(A64_ADD(1, tmp, r2, tmp), ctx);
668 	emit(A64_LSL(1, prg, r3, 3), ctx);
669 	emit(A64_LDR64(prg, tmp, prg), ctx);
670 	branch3 = ctx->image + ctx->idx;
671 	emit(A64_NOP, ctx);
672 
673 	/* Update tail_call_cnt if the slot is populated. */
674 	emit(A64_STR64I(tcc, ptr, 0), ctx);
675 
676 	/* restore SP */
677 	if (ctx->stack_size && !ctx->priv_sp_used)
678 		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
679 
680 	pop_callee_regs(ctx);
681 
682 	/* goto *(prog->bpf_func + prologue_offset); */
683 	off = offsetof(struct bpf_prog, bpf_func);
684 	emit_a64_mov_i64(tmp, off, ctx);
685 	emit(A64_LDR64(tmp, prg, tmp), ctx);
686 	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
687 	emit(A64_BR(tmp), ctx);
688 
689 	if (ctx->image) {
690 		off = &ctx->image[ctx->idx] - branch1;
691 		*branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off));
692 
693 		off = &ctx->image[ctx->idx] - branch2;
694 		*branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off));
695 
696 		off = &ctx->image[ctx->idx] - branch3;
697 		*branch3 = cpu_to_le32(A64_CBZ(1, prg, off));
698 	}
699 
700 	return 0;
701 }
702 
703 static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
704 {
705 	const s32 imm = insn->imm;
706 	const s16 off = insn->off;
707 	const u8 code = insn->code;
708 	const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
709 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
710 	const u8 dst = bpf2a64[insn->dst_reg];
711 	const u8 src = bpf2a64[insn->src_reg];
712 	const u8 tmp = bpf2a64[TMP_REG_1];
713 	u8 reg;
714 
715 	switch (imm) {
716 	case BPF_LOAD_ACQ:
717 		reg = src;
718 		break;
719 	case BPF_STORE_REL:
720 		reg = dst;
721 		break;
722 	default:
723 		pr_err_once("unknown atomic load/store op code %02x\n", imm);
724 		return -EINVAL;
725 	}
726 
727 	if (off) {
728 		emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
729 		reg = tmp;
730 	}
731 	if (arena) {
732 		emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
733 		reg = tmp;
734 	}
735 
736 	switch (imm) {
737 	case BPF_LOAD_ACQ:
738 		switch (BPF_SIZE(code)) {
739 		case BPF_B:
740 			emit(A64_LDARB(dst, reg), ctx);
741 			break;
742 		case BPF_H:
743 			emit(A64_LDARH(dst, reg), ctx);
744 			break;
745 		case BPF_W:
746 			emit(A64_LDAR32(dst, reg), ctx);
747 			break;
748 		case BPF_DW:
749 			emit(A64_LDAR64(dst, reg), ctx);
750 			break;
751 		}
752 		break;
753 	case BPF_STORE_REL:
754 		switch (BPF_SIZE(code)) {
755 		case BPF_B:
756 			emit(A64_STLRB(src, reg), ctx);
757 			break;
758 		case BPF_H:
759 			emit(A64_STLRH(src, reg), ctx);
760 			break;
761 		case BPF_W:
762 			emit(A64_STLR32(src, reg), ctx);
763 			break;
764 		case BPF_DW:
765 			emit(A64_STLR64(src, reg), ctx);
766 			break;
767 		}
768 		break;
769 	default:
770 		pr_err_once("unexpected atomic load/store op code %02x\n",
771 			    imm);
772 		return -EINVAL;
773 	}
774 
775 	return 0;
776 }
777 
778 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
779 {
780 	const u8 code = insn->code;
781 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
782 	const u8 dst = bpf2a64[insn->dst_reg];
783 	const u8 src = bpf2a64[insn->src_reg];
784 	const u8 tmp = bpf2a64[TMP_REG_1];
785 	const u8 tmp2 = bpf2a64[TMP_REG_2];
786 	const bool isdw = BPF_SIZE(code) == BPF_DW;
787 	const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
788 	const s16 off = insn->off;
789 	u8 reg = dst;
790 
791 	if (off) {
792 		emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
793 		reg = tmp;
794 	}
795 	if (arena) {
796 		emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
797 		reg = tmp;
798 	}
799 
800 	switch (insn->imm) {
801 	/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
802 	case BPF_ADD:
803 		emit(A64_STADD(isdw, reg, src), ctx);
804 		break;
805 	case BPF_AND:
806 		emit(A64_MVN(isdw, tmp2, src), ctx);
807 		emit(A64_STCLR(isdw, reg, tmp2), ctx);
808 		break;
809 	case BPF_OR:
810 		emit(A64_STSET(isdw, reg, src), ctx);
811 		break;
812 	case BPF_XOR:
813 		emit(A64_STEOR(isdw, reg, src), ctx);
814 		break;
815 	/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
816 	case BPF_ADD | BPF_FETCH:
817 		emit(A64_LDADDAL(isdw, src, reg, src), ctx);
818 		break;
819 	case BPF_AND | BPF_FETCH:
820 		emit(A64_MVN(isdw, tmp2, src), ctx);
821 		emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
822 		break;
823 	case BPF_OR | BPF_FETCH:
824 		emit(A64_LDSETAL(isdw, src, reg, src), ctx);
825 		break;
826 	case BPF_XOR | BPF_FETCH:
827 		emit(A64_LDEORAL(isdw, src, reg, src), ctx);
828 		break;
829 	/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
830 	case BPF_XCHG:
831 		emit(A64_SWPAL(isdw, src, reg, src), ctx);
832 		break;
833 	/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
834 	case BPF_CMPXCHG:
835 		emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
836 		break;
837 	default:
838 		pr_err_once("unknown atomic op code %02x\n", insn->imm);
839 		return -EINVAL;
840 	}
841 
842 	return 0;
843 }
844 
845 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
846 {
847 	const u8 code = insn->code;
848 	const u8 dst = bpf2a64[insn->dst_reg];
849 	const u8 src = bpf2a64[insn->src_reg];
850 	const u8 tmp = bpf2a64[TMP_REG_1];
851 	const u8 tmp2 = bpf2a64[TMP_REG_2];
852 	const u8 tmp3 = bpf2a64[TMP_REG_3];
853 	const int i = insn - ctx->prog->insnsi;
854 	const s32 imm = insn->imm;
855 	const s16 off = insn->off;
856 	const bool isdw = BPF_SIZE(code) == BPF_DW;
857 	u8 reg = dst;
858 	s32 jmp_offset;
859 
860 	if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
861 		/* ll_sc based atomics don't support unsafe pointers yet. */
862 		pr_err_once("unknown atomic opcode %02x\n", code);
863 		return -EINVAL;
864 	}
865 
866 	if (off) {
867 		emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
868 		reg = tmp;
869 	}
870 
871 	if (imm == BPF_ADD || imm == BPF_AND ||
872 	    imm == BPF_OR || imm == BPF_XOR) {
873 		/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
874 		emit(A64_LDXR(isdw, tmp2, reg), ctx);
875 		if (imm == BPF_ADD)
876 			emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
877 		else if (imm == BPF_AND)
878 			emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
879 		else if (imm == BPF_OR)
880 			emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
881 		else
882 			emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
883 		emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
884 		jmp_offset = -3;
885 		check_imm19(jmp_offset);
886 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
887 	} else if (imm == (BPF_ADD | BPF_FETCH) ||
888 		   imm == (BPF_AND | BPF_FETCH) ||
889 		   imm == (BPF_OR | BPF_FETCH) ||
890 		   imm == (BPF_XOR | BPF_FETCH)) {
891 		/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
892 		const u8 ax = bpf2a64[BPF_REG_AX];
893 
894 		emit(A64_MOV(isdw, ax, src), ctx);
895 		emit(A64_LDXR(isdw, src, reg), ctx);
896 		if (imm == (BPF_ADD | BPF_FETCH))
897 			emit(A64_ADD(isdw, tmp2, src, ax), ctx);
898 		else if (imm == (BPF_AND | BPF_FETCH))
899 			emit(A64_AND(isdw, tmp2, src, ax), ctx);
900 		else if (imm == (BPF_OR | BPF_FETCH))
901 			emit(A64_ORR(isdw, tmp2, src, ax), ctx);
902 		else
903 			emit(A64_EOR(isdw, tmp2, src, ax), ctx);
904 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
905 		jmp_offset = -3;
906 		check_imm19(jmp_offset);
907 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
908 		emit(A64_DMB_ISH, ctx);
909 	} else if (imm == BPF_XCHG) {
910 		/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
911 		emit(A64_MOV(isdw, tmp2, src), ctx);
912 		emit(A64_LDXR(isdw, src, reg), ctx);
913 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
914 		jmp_offset = -2;
915 		check_imm19(jmp_offset);
916 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
917 		emit(A64_DMB_ISH, ctx);
918 	} else if (imm == BPF_CMPXCHG) {
919 		/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
920 		const u8 r0 = bpf2a64[BPF_REG_0];
921 
922 		emit(A64_MOV(isdw, tmp2, r0), ctx);
923 		emit(A64_LDXR(isdw, r0, reg), ctx);
924 		emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
925 		jmp_offset = 4;
926 		check_imm19(jmp_offset);
927 		emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
928 		emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
929 		jmp_offset = -4;
930 		check_imm19(jmp_offset);
931 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
932 		emit(A64_DMB_ISH, ctx);
933 	} else {
934 		pr_err_once("unknown atomic op code %02x\n", imm);
935 		return -EINVAL;
936 	}
937 
938 	return 0;
939 }
940 
941 void dummy_tramp(void);
942 
943 asm (
944 "	.pushsection .text, \"ax\", @progbits\n"
945 "	.global dummy_tramp\n"
946 "	.type dummy_tramp, %function\n"
947 "dummy_tramp:"
948 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
949 "	bti j\n" /* dummy_tramp is called via "br x10" */
950 #endif
951 "	mov x10, x30\n"
952 "	mov x30, x9\n"
953 "	ret x10\n"
954 "	.size dummy_tramp, .-dummy_tramp\n"
955 "	.popsection\n"
956 );
957 
958 /* build a plt initialized like this:
959  *
960  * plt:
961  *      ldr tmp, target
962  *      br tmp
963  * target:
964  *      .quad dummy_tramp
965  *
966  * when a long jump trampoline is attached, target is filled with the
967  * trampoline address, and when the trampoline is removed, target is
968  * restored to dummy_tramp address.
969  */
970 static void build_plt(struct jit_ctx *ctx)
971 {
972 	const u8 tmp = bpf2a64[TMP_REG_1];
973 	struct bpf_plt *plt = NULL;
974 
975 	/* make sure target is 64-bit aligned */
976 	if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
977 		emit(A64_NOP, ctx);
978 
979 	plt = (struct bpf_plt *)(ctx->image + ctx->idx);
980 	/* plt is called via bl, no BTI needed here */
981 	emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
982 	emit(A64_BR(tmp), ctx);
983 
984 	if (ctx->image)
985 		plt->target = (u64)&dummy_tramp;
986 }
987 
988 /* Clobbers BPF registers 1-4, aka x0-x3 */
989 static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
990 {
991 	const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */
992 	u8 k = get_spectre_bhb_loop_value();
993 
994 	if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
995 	    cpu_mitigations_off() || __nospectre_bhb ||
996 	    arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
997 		return;
998 
999 	if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
1000 		return;
1001 
1002 	if (supports_clearbhb(SCOPE_SYSTEM)) {
1003 		emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx);
1004 		return;
1005 	}
1006 
1007 	if (k) {
1008 		emit_a64_mov_i64(r1, k, ctx);
1009 		emit(A64_B(1), ctx);
1010 		emit(A64_SUBS_I(true, r1, r1, 1), ctx);
1011 		emit(A64_B_(A64_COND_NE, -2), ctx);
1012 		emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx);
1013 		emit(aarch64_insn_get_isb_value(), ctx);
1014 	}
1015 
1016 	if (is_spectre_bhb_fw_mitigated()) {
1017 		emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR,
1018 			       ARM_SMCCC_ARCH_WORKAROUND_3), ctx);
1019 		switch (arm_smccc_1_1_get_conduit()) {
1020 		case SMCCC_CONDUIT_HVC:
1021 			emit(aarch64_insn_get_hvc_value(), ctx);
1022 			break;
1023 		case SMCCC_CONDUIT_SMC:
1024 			emit(aarch64_insn_get_smc_value(), ctx);
1025 			break;
1026 		default:
1027 			pr_err_once("Firmware mitigation enabled with unknown conduit\n");
1028 		}
1029 	}
1030 }
1031 
1032 static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
1033 {
1034 	const u8 r0 = bpf2a64[BPF_REG_0];
1035 	const u8 ptr = bpf2a64[TCCNT_PTR];
1036 
1037 	/* We're done with BPF stack */
1038 	if (ctx->stack_size && !ctx->priv_sp_used)
1039 		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
1040 
1041 	pop_callee_regs(ctx);
1042 
1043 	emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
1044 
1045 	if (was_classic)
1046 		build_bhb_mitigation(ctx);
1047 
1048 	/* Restore FP/LR registers */
1049 	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
1050 
1051 	/* Move the return value from bpf:r0 (aka x7) to x0 */
1052 	emit(A64_MOV(1, A64_R(0), r0), ctx);
1053 
1054 	/* Authenticate lr */
1055 	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
1056 		emit(A64_AUTIASP, ctx);
1057 
1058 	emit(A64_RET(A64_LR), ctx);
1059 }
1060 
1061 /*
1062  * Metadata encoding for exception handling in JITed code.
1063  *
1064  * Format of `fixup` field in `struct exception_table_entry`:
1065  *
1066  * Bit layout of `fixup` (32-bit):
1067  *
1068  * +-----------+--------+-----------+-----------+----------+
1069  * |   31-27   | 26-22  |     21    |   20-16   |   15-0   |
1070  * |           |        |           |           |          |
1071  * | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG |  OFFSET  |
1072  * +-----------+--------+-----------+-----------+----------+
1073  *
1074  * - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
1075  * - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
1076  *                       accessing the arena region.
1077  * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
1078  * - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
1079  *                       DONT_CLEAR if it is a store instruction.
1080  */
1081 
1082 #define BPF_FIXUP_OFFSET_MASK      GENMASK(15, 0)
1083 #define BPF_FIXUP_ARENA_REG_MASK   GENMASK(20, 16)
1084 #define BPF_ARENA_ACCESS           BIT(21)
1085 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
1086 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
1087 
1088 bool ex_handler_bpf(const struct exception_table_entry *ex,
1089 		    struct pt_regs *regs)
1090 {
1091 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
1092 	s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
1093 	int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
1094 	bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
1095 	bool is_write = (dst_reg == DONT_CLEAR);
1096 	unsigned long addr;
1097 
1098 	if (is_arena) {
1099 		addr = regs->regs[arena_reg] + off;
1100 		bpf_prog_report_arena_violation(is_write, addr, regs->pc);
1101 	}
1102 
1103 	if (dst_reg != DONT_CLEAR)
1104 		regs->regs[dst_reg] = 0;
1105 	/* Skip the faulting instruction */
1106 	regs->pc += AARCH64_INSN_SIZE;
1107 
1108 	return true;
1109 }
1110 
1111 /* For accesses to BTF pointers, add an entry to the exception table */
1112 static int add_exception_handler(const struct bpf_insn *insn,
1113 				 struct jit_ctx *ctx,
1114 				 int dst_reg)
1115 {
1116 	off_t ins_offset;
1117 	s16 off = insn->off;
1118 	bool is_arena;
1119 	int arena_reg;
1120 	unsigned long pc;
1121 	struct exception_table_entry *ex;
1122 
1123 	if (!ctx->image)
1124 		/* First pass */
1125 		return 0;
1126 
1127 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
1128 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
1129 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
1130 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32SX &&
1131 	    BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
1132 		return 0;
1133 
1134 	is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
1135 		   (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) ||
1136 		   (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);
1137 
1138 	if (!ctx->prog->aux->extable ||
1139 	    WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
1140 		return -EINVAL;
1141 
1142 	ex = &ctx->prog->aux->extable[ctx->exentry_idx];
1143 	pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
1144 
1145 	/*
1146 	 * This is the relative offset of the instruction that may fault from
1147 	 * the exception table itself. This will be written to the exception
1148 	 * table and if this instruction faults, the destination register will
1149 	 * be set to '0' and the execution will jump to the next instruction.
1150 	 */
1151 	ins_offset = pc - (long)&ex->insn;
1152 	if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
1153 		return -ERANGE;
1154 
1155 	/*
1156 	 * The offsets above have been calculated using the RO buffer but we
1157 	 * need to use the R/W buffer for writes.
1158 	 * switch ex to rw buffer for writing.
1159 	 */
1160 	ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
1161 
1162 	ex->insn = ins_offset;
1163 
1164 	if (BPF_CLASS(insn->code) != BPF_LDX)
1165 		dst_reg = DONT_CLEAR;
1166 
1167 	ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
1168 
1169 	if (is_arena) {
1170 		ex->fixup |= BPF_ARENA_ACCESS;
1171 		/*
1172 		 * insn->src_reg/dst_reg holds the address in the arena region with upper 32-bits
1173 		 * being zero because of a preceding addr_space_cast(r<n>, 0x0, 0x1) instruction.
1174 		 * This address is adjusted with the addition of arena_vm_start (see the
1175 		 * implementation of BPF_PROBE_MEM32 and BPF_PROBE_ATOMIC) before being used for the
1176 		 * memory access. Pass the reg holding the unmodified 32-bit address to
1177 		 * ex_handler_bpf.
1178 		 */
1179 		if (BPF_CLASS(insn->code) == BPF_LDX)
1180 			arena_reg = bpf2a64[insn->src_reg];
1181 		else
1182 			arena_reg = bpf2a64[insn->dst_reg];
1183 
1184 		ex->fixup |=  FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
1185 			      FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
1186 	}
1187 
1188 	ex->type = EX_TYPE_BPF;
1189 
1190 	ctx->exentry_idx++;
1191 	return 0;
1192 }
1193 
1194 /* JITs an eBPF instruction.
1195  * Returns:
1196  * 0  - successfully JITed an 8-byte eBPF instruction.
1197  * >0 - successfully JITed a 16-byte eBPF instruction.
1198  * <0 - failed to JIT.
1199  */
1200 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1201 		      bool extra_pass)
1202 {
1203 	const u8 code = insn->code;
1204 	u8 dst = bpf2a64[insn->dst_reg];
1205 	u8 src = bpf2a64[insn->src_reg];
1206 	const u8 tmp = bpf2a64[TMP_REG_1];
1207 	const u8 tmp2 = bpf2a64[TMP_REG_2];
1208 	const u8 tmp3 = bpf2a64[TMP_REG_3];
1209 	const u8 fp = bpf2a64[BPF_REG_FP];
1210 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
1211 	const u8 priv_sp = bpf2a64[PRIVATE_SP];
1212 	const s16 off = insn->off;
1213 	const s32 imm = insn->imm;
1214 	const int i = insn - ctx->prog->insnsi;
1215 	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
1216 			  BPF_CLASS(code) == BPF_JMP;
1217 	u8 jmp_cond;
1218 	s32 jmp_offset;
1219 	u32 a64_insn;
1220 	u8 src_adj;
1221 	u8 dst_adj;
1222 	int off_adj;
1223 	int ret;
1224 	bool sign_extend;
1225 
1226 	switch (code) {
1227 	/* dst = src */
1228 	case BPF_ALU | BPF_MOV | BPF_X:
1229 	case BPF_ALU64 | BPF_MOV | BPF_X:
1230 		if (insn_is_cast_user(insn)) {
1231 			emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
1232 			emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
1233 			emit(A64_LSL(1, dst, dst, 32), ctx);
1234 			emit(A64_CBZ(1, tmp, 2), ctx);
1235 			emit(A64_ORR(1, tmp, dst, tmp), ctx);
1236 			emit(A64_MOV(1, dst, tmp), ctx);
1237 			break;
1238 		} else if (insn_is_mov_percpu_addr(insn)) {
1239 			if (dst != src)
1240 				emit(A64_MOV(1, dst, src), ctx);
1241 			if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
1242 				emit(A64_MRS_TPIDR_EL2(tmp), ctx);
1243 			else
1244 				emit(A64_MRS_TPIDR_EL1(tmp), ctx);
1245 			emit(A64_ADD(1, dst, dst, tmp), ctx);
1246 			break;
1247 		}
1248 		switch (insn->off) {
1249 		case 0:
1250 			emit(A64_MOV(is64, dst, src), ctx);
1251 			break;
1252 		case 8:
1253 			emit(A64_SXTB(is64, dst, src), ctx);
1254 			break;
1255 		case 16:
1256 			emit(A64_SXTH(is64, dst, src), ctx);
1257 			break;
1258 		case 32:
1259 			emit(A64_SXTW(is64, dst, src), ctx);
1260 			break;
1261 		}
1262 		break;
1263 	/* dst = dst OP src */
1264 	case BPF_ALU | BPF_ADD | BPF_X:
1265 	case BPF_ALU64 | BPF_ADD | BPF_X:
1266 		emit(A64_ADD(is64, dst, dst, src), ctx);
1267 		break;
1268 	case BPF_ALU | BPF_SUB | BPF_X:
1269 	case BPF_ALU64 | BPF_SUB | BPF_X:
1270 		emit(A64_SUB(is64, dst, dst, src), ctx);
1271 		break;
1272 	case BPF_ALU | BPF_AND | BPF_X:
1273 	case BPF_ALU64 | BPF_AND | BPF_X:
1274 		emit(A64_AND(is64, dst, dst, src), ctx);
1275 		break;
1276 	case BPF_ALU | BPF_OR | BPF_X:
1277 	case BPF_ALU64 | BPF_OR | BPF_X:
1278 		emit(A64_ORR(is64, dst, dst, src), ctx);
1279 		break;
1280 	case BPF_ALU | BPF_XOR | BPF_X:
1281 	case BPF_ALU64 | BPF_XOR | BPF_X:
1282 		emit(A64_EOR(is64, dst, dst, src), ctx);
1283 		break;
1284 	case BPF_ALU | BPF_MUL | BPF_X:
1285 	case BPF_ALU64 | BPF_MUL | BPF_X:
1286 		emit(A64_MUL(is64, dst, dst, src), ctx);
1287 		break;
1288 	case BPF_ALU | BPF_DIV | BPF_X:
1289 	case BPF_ALU64 | BPF_DIV | BPF_X:
1290 		if (!off)
1291 			emit(A64_UDIV(is64, dst, dst, src), ctx);
1292 		else
1293 			emit(A64_SDIV(is64, dst, dst, src), ctx);
1294 		break;
1295 	case BPF_ALU | BPF_MOD | BPF_X:
1296 	case BPF_ALU64 | BPF_MOD | BPF_X:
1297 		if (!off)
1298 			emit(A64_UDIV(is64, tmp, dst, src), ctx);
1299 		else
1300 			emit(A64_SDIV(is64, tmp, dst, src), ctx);
1301 		emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
1302 		break;
1303 	case BPF_ALU | BPF_LSH | BPF_X:
1304 	case BPF_ALU64 | BPF_LSH | BPF_X:
1305 		emit(A64_LSLV(is64, dst, dst, src), ctx);
1306 		break;
1307 	case BPF_ALU | BPF_RSH | BPF_X:
1308 	case BPF_ALU64 | BPF_RSH | BPF_X:
1309 		emit(A64_LSRV(is64, dst, dst, src), ctx);
1310 		break;
1311 	case BPF_ALU | BPF_ARSH | BPF_X:
1312 	case BPF_ALU64 | BPF_ARSH | BPF_X:
1313 		emit(A64_ASRV(is64, dst, dst, src), ctx);
1314 		break;
1315 	/* dst = -dst */
1316 	case BPF_ALU | BPF_NEG:
1317 	case BPF_ALU64 | BPF_NEG:
1318 		emit(A64_NEG(is64, dst, dst), ctx);
1319 		break;
1320 	/* dst = BSWAP##imm(dst) */
1321 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1322 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1323 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1324 #ifdef CONFIG_CPU_BIG_ENDIAN
1325 		if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE)
1326 			goto emit_bswap_uxt;
1327 #else /* !CONFIG_CPU_BIG_ENDIAN */
1328 		if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE)
1329 			goto emit_bswap_uxt;
1330 #endif
1331 		switch (imm) {
1332 		case 16:
1333 			emit(A64_REV16(is64, dst, dst), ctx);
1334 			/* zero-extend 16 bits into 64 bits */
1335 			emit(A64_UXTH(is64, dst, dst), ctx);
1336 			break;
1337 		case 32:
1338 			emit(A64_REV32(0, dst, dst), ctx);
1339 			/* upper 32 bits already cleared */
1340 			break;
1341 		case 64:
1342 			emit(A64_REV64(dst, dst), ctx);
1343 			break;
1344 		}
1345 		break;
1346 emit_bswap_uxt:
1347 		switch (imm) {
1348 		case 16:
1349 			/* zero-extend 16 bits into 64 bits */
1350 			emit(A64_UXTH(is64, dst, dst), ctx);
1351 			break;
1352 		case 32:
1353 			/* zero-extend 32 bits into 64 bits */
1354 			emit(A64_UXTW(is64, dst, dst), ctx);
1355 			break;
1356 		case 64:
1357 			/* nop */
1358 			break;
1359 		}
1360 		break;
1361 	/* dst = imm */
1362 	case BPF_ALU | BPF_MOV | BPF_K:
1363 	case BPF_ALU64 | BPF_MOV | BPF_K:
1364 		emit_a64_mov_i(is64, dst, imm, ctx);
1365 		break;
1366 	/* dst = dst OP imm */
1367 	case BPF_ALU | BPF_ADD | BPF_K:
1368 	case BPF_ALU64 | BPF_ADD | BPF_K:
1369 		emit_a64_add_i(is64, dst, dst, tmp, imm, ctx);
1370 		break;
1371 	case BPF_ALU | BPF_SUB | BPF_K:
1372 	case BPF_ALU64 | BPF_SUB | BPF_K:
1373 		if (is_addsub_imm(imm)) {
1374 			emit(A64_SUB_I(is64, dst, dst, imm), ctx);
1375 		} else if (is_addsub_imm(-(u32)imm)) {
1376 			emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
1377 		} else {
1378 			emit_a64_mov_i(is64, tmp, imm, ctx);
1379 			emit(A64_SUB(is64, dst, dst, tmp), ctx);
1380 		}
1381 		break;
1382 	case BPF_ALU | BPF_AND | BPF_K:
1383 	case BPF_ALU64 | BPF_AND | BPF_K:
1384 		a64_insn = A64_AND_I(is64, dst, dst, imm);
1385 		if (a64_insn != AARCH64_BREAK_FAULT) {
1386 			emit(a64_insn, ctx);
1387 		} else {
1388 			emit_a64_mov_i(is64, tmp, imm, ctx);
1389 			emit(A64_AND(is64, dst, dst, tmp), ctx);
1390 		}
1391 		break;
1392 	case BPF_ALU | BPF_OR | BPF_K:
1393 	case BPF_ALU64 | BPF_OR | BPF_K:
1394 		a64_insn = A64_ORR_I(is64, dst, dst, imm);
1395 		if (a64_insn != AARCH64_BREAK_FAULT) {
1396 			emit(a64_insn, ctx);
1397 		} else {
1398 			emit_a64_mov_i(is64, tmp, imm, ctx);
1399 			emit(A64_ORR(is64, dst, dst, tmp), ctx);
1400 		}
1401 		break;
1402 	case BPF_ALU | BPF_XOR | BPF_K:
1403 	case BPF_ALU64 | BPF_XOR | BPF_K:
1404 		a64_insn = A64_EOR_I(is64, dst, dst, imm);
1405 		if (a64_insn != AARCH64_BREAK_FAULT) {
1406 			emit(a64_insn, ctx);
1407 		} else {
1408 			emit_a64_mov_i(is64, tmp, imm, ctx);
1409 			emit(A64_EOR(is64, dst, dst, tmp), ctx);
1410 		}
1411 		break;
1412 	case BPF_ALU | BPF_MUL | BPF_K:
1413 	case BPF_ALU64 | BPF_MUL | BPF_K:
1414 		emit_a64_mov_i(is64, tmp, imm, ctx);
1415 		emit(A64_MUL(is64, dst, dst, tmp), ctx);
1416 		break;
1417 	case BPF_ALU | BPF_DIV | BPF_K:
1418 	case BPF_ALU64 | BPF_DIV | BPF_K:
1419 		emit_a64_mov_i(is64, tmp, imm, ctx);
1420 		if (!off)
1421 			emit(A64_UDIV(is64, dst, dst, tmp), ctx);
1422 		else
1423 			emit(A64_SDIV(is64, dst, dst, tmp), ctx);
1424 		break;
1425 	case BPF_ALU | BPF_MOD | BPF_K:
1426 	case BPF_ALU64 | BPF_MOD | BPF_K:
1427 		emit_a64_mov_i(is64, tmp2, imm, ctx);
1428 		if (!off)
1429 			emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
1430 		else
1431 			emit(A64_SDIV(is64, tmp, dst, tmp2), ctx);
1432 		emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
1433 		break;
1434 	case BPF_ALU | BPF_LSH | BPF_K:
1435 	case BPF_ALU64 | BPF_LSH | BPF_K:
1436 		emit(A64_LSL(is64, dst, dst, imm), ctx);
1437 		break;
1438 	case BPF_ALU | BPF_RSH | BPF_K:
1439 	case BPF_ALU64 | BPF_RSH | BPF_K:
1440 		emit(A64_LSR(is64, dst, dst, imm), ctx);
1441 		break;
1442 	case BPF_ALU | BPF_ARSH | BPF_K:
1443 	case BPF_ALU64 | BPF_ARSH | BPF_K:
1444 		emit(A64_ASR(is64, dst, dst, imm), ctx);
1445 		break;
1446 
1447 	/* JUMP reg */
1448 	case BPF_JMP | BPF_JA | BPF_X:
1449 		emit(A64_BR(dst), ctx);
1450 		break;
1451 	/* JUMP off */
1452 	case BPF_JMP | BPF_JA:
1453 	case BPF_JMP32 | BPF_JA:
1454 		if (BPF_CLASS(code) == BPF_JMP)
1455 			jmp_offset = bpf2a64_offset(i, off, ctx);
1456 		else
1457 			jmp_offset = bpf2a64_offset(i, imm, ctx);
1458 		check_imm26(jmp_offset);
1459 		emit(A64_B(jmp_offset), ctx);
1460 		break;
1461 	/* IF (dst COND src) JUMP off */
1462 	case BPF_JMP | BPF_JEQ | BPF_X:
1463 	case BPF_JMP | BPF_JGT | BPF_X:
1464 	case BPF_JMP | BPF_JLT | BPF_X:
1465 	case BPF_JMP | BPF_JGE | BPF_X:
1466 	case BPF_JMP | BPF_JLE | BPF_X:
1467 	case BPF_JMP | BPF_JNE | BPF_X:
1468 	case BPF_JMP | BPF_JSGT | BPF_X:
1469 	case BPF_JMP | BPF_JSLT | BPF_X:
1470 	case BPF_JMP | BPF_JSGE | BPF_X:
1471 	case BPF_JMP | BPF_JSLE | BPF_X:
1472 	case BPF_JMP32 | BPF_JEQ | BPF_X:
1473 	case BPF_JMP32 | BPF_JGT | BPF_X:
1474 	case BPF_JMP32 | BPF_JLT | BPF_X:
1475 	case BPF_JMP32 | BPF_JGE | BPF_X:
1476 	case BPF_JMP32 | BPF_JLE | BPF_X:
1477 	case BPF_JMP32 | BPF_JNE | BPF_X:
1478 	case BPF_JMP32 | BPF_JSGT | BPF_X:
1479 	case BPF_JMP32 | BPF_JSLT | BPF_X:
1480 	case BPF_JMP32 | BPF_JSGE | BPF_X:
1481 	case BPF_JMP32 | BPF_JSLE | BPF_X:
1482 		emit(A64_CMP(is64, dst, src), ctx);
1483 emit_cond_jmp:
1484 		jmp_offset = bpf2a64_offset(i, off, ctx);
1485 		check_imm19(jmp_offset);
1486 		switch (BPF_OP(code)) {
1487 		case BPF_JEQ:
1488 			jmp_cond = A64_COND_EQ;
1489 			break;
1490 		case BPF_JGT:
1491 			jmp_cond = A64_COND_HI;
1492 			break;
1493 		case BPF_JLT:
1494 			jmp_cond = A64_COND_CC;
1495 			break;
1496 		case BPF_JGE:
1497 			jmp_cond = A64_COND_CS;
1498 			break;
1499 		case BPF_JLE:
1500 			jmp_cond = A64_COND_LS;
1501 			break;
1502 		case BPF_JSET:
1503 		case BPF_JNE:
1504 			jmp_cond = A64_COND_NE;
1505 			break;
1506 		case BPF_JSGT:
1507 			jmp_cond = A64_COND_GT;
1508 			break;
1509 		case BPF_JSLT:
1510 			jmp_cond = A64_COND_LT;
1511 			break;
1512 		case BPF_JSGE:
1513 			jmp_cond = A64_COND_GE;
1514 			break;
1515 		case BPF_JSLE:
1516 			jmp_cond = A64_COND_LE;
1517 			break;
1518 		default:
1519 			return -EFAULT;
1520 		}
1521 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
1522 		break;
1523 	case BPF_JMP | BPF_JSET | BPF_X:
1524 	case BPF_JMP32 | BPF_JSET | BPF_X:
1525 		emit(A64_TST(is64, dst, src), ctx);
1526 		goto emit_cond_jmp;
1527 	/* IF (dst COND imm) JUMP off */
1528 	case BPF_JMP | BPF_JEQ | BPF_K:
1529 	case BPF_JMP | BPF_JGT | BPF_K:
1530 	case BPF_JMP | BPF_JLT | BPF_K:
1531 	case BPF_JMP | BPF_JGE | BPF_K:
1532 	case BPF_JMP | BPF_JLE | BPF_K:
1533 	case BPF_JMP | BPF_JNE | BPF_K:
1534 	case BPF_JMP | BPF_JSGT | BPF_K:
1535 	case BPF_JMP | BPF_JSLT | BPF_K:
1536 	case BPF_JMP | BPF_JSGE | BPF_K:
1537 	case BPF_JMP | BPF_JSLE | BPF_K:
1538 	case BPF_JMP32 | BPF_JEQ | BPF_K:
1539 	case BPF_JMP32 | BPF_JGT | BPF_K:
1540 	case BPF_JMP32 | BPF_JLT | BPF_K:
1541 	case BPF_JMP32 | BPF_JGE | BPF_K:
1542 	case BPF_JMP32 | BPF_JLE | BPF_K:
1543 	case BPF_JMP32 | BPF_JNE | BPF_K:
1544 	case BPF_JMP32 | BPF_JSGT | BPF_K:
1545 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1546 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1547 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1548 		if (is_addsub_imm(imm)) {
1549 			emit(A64_CMP_I(is64, dst, imm), ctx);
1550 		} else if (is_addsub_imm(-(u32)imm)) {
1551 			emit(A64_CMN_I(is64, dst, -imm), ctx);
1552 		} else {
1553 			emit_a64_mov_i(is64, tmp, imm, ctx);
1554 			emit(A64_CMP(is64, dst, tmp), ctx);
1555 		}
1556 		goto emit_cond_jmp;
1557 	case BPF_JMP | BPF_JSET | BPF_K:
1558 	case BPF_JMP32 | BPF_JSET | BPF_K:
1559 		a64_insn = A64_TST_I(is64, dst, imm);
1560 		if (a64_insn != AARCH64_BREAK_FAULT) {
1561 			emit(a64_insn, ctx);
1562 		} else {
1563 			emit_a64_mov_i(is64, tmp, imm, ctx);
1564 			emit(A64_TST(is64, dst, tmp), ctx);
1565 		}
1566 		goto emit_cond_jmp;
1567 	/* function call */
1568 	case BPF_JMP | BPF_CALL:
1569 	{
1570 		const u8 r0 = bpf2a64[BPF_REG_0];
1571 		bool func_addr_fixed;
1572 		u64 func_addr;
1573 		u32 cpu_offset;
1574 
1575 		/* Implement helper call to bpf_get_smp_processor_id() inline */
1576 		if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
1577 			cpu_offset = offsetof(struct thread_info, cpu);
1578 
1579 			emit(A64_MRS_SP_EL0(tmp), ctx);
1580 			if (is_lsi_offset(cpu_offset, 2)) {
1581 				emit(A64_LDR32I(r0, tmp, cpu_offset), ctx);
1582 			} else {
1583 				emit_a64_mov_i(1, tmp2, cpu_offset, ctx);
1584 				emit(A64_LDR32(r0, tmp, tmp2), ctx);
1585 			}
1586 			break;
1587 		}
1588 
1589 		/* Implement helper call to bpf_get_current_task/_btf() inline */
1590 		if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task ||
1591 					   insn->imm == BPF_FUNC_get_current_task_btf)) {
1592 			emit(A64_MRS_SP_EL0(r0), ctx);
1593 			break;
1594 		}
1595 
1596 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1597 					    &func_addr, &func_addr_fixed);
1598 		if (ret < 0)
1599 			return ret;
1600 		emit_call(func_addr, ctx);
1601 		/*
1602 		 * Call to arch_bpf_timed_may_goto() is emitted by the
1603 		 * verifier and called with custom calling convention with
1604 		 * first argument and return value in BPF_REG_AX (x9).
1605 		 */
1606 		if (func_addr != (u64)arch_bpf_timed_may_goto)
1607 			emit(A64_MOV(1, r0, A64_R(0)), ctx);
1608 		break;
1609 	}
1610 	/* tail call */
1611 	case BPF_JMP | BPF_TAIL_CALL:
1612 		if (emit_bpf_tail_call(ctx))
1613 			return -EFAULT;
1614 		break;
1615 	/* function return */
1616 	case BPF_JMP | BPF_EXIT:
1617 		/* Optimization: when last instruction is EXIT,
1618 		   simply fallthrough to epilogue. */
1619 		if (i == ctx->prog->len - 1)
1620 			break;
1621 		jmp_offset = epilogue_offset(ctx);
1622 		check_imm26(jmp_offset);
1623 		emit(A64_B(jmp_offset), ctx);
1624 		break;
1625 
1626 	/* dst = imm64 */
1627 	case BPF_LD | BPF_IMM | BPF_DW:
1628 	{
1629 		const struct bpf_insn insn1 = insn[1];
1630 		u64 imm64;
1631 
1632 		imm64 = (u64)insn1.imm << 32 | (u32)imm;
1633 		if (bpf_pseudo_func(insn))
1634 			emit_addr_mov_i64(dst, imm64, ctx);
1635 		else
1636 			emit_a64_mov_i64(dst, imm64, ctx);
1637 
1638 		return 1;
1639 	}
1640 
1641 	/* LDX: dst = (u64)*(unsigned size *)(src + off) */
1642 	case BPF_LDX | BPF_MEM | BPF_W:
1643 	case BPF_LDX | BPF_MEM | BPF_H:
1644 	case BPF_LDX | BPF_MEM | BPF_B:
1645 	case BPF_LDX | BPF_MEM | BPF_DW:
1646 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1647 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1648 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1649 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1650 	/* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */
1651 	case BPF_LDX | BPF_MEMSX | BPF_B:
1652 	case BPF_LDX | BPF_MEMSX | BPF_H:
1653 	case BPF_LDX | BPF_MEMSX | BPF_W:
1654 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1655 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1656 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1657 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1658 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1659 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1660 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1661 	case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
1662 	case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
1663 	case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
1664 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
1665 		    BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) {
1666 			emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
1667 			src = tmp2;
1668 		}
1669 		if (src == fp) {
1670 			src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
1671 			off_adj = off + ctx->stack_size;
1672 		} else {
1673 			src_adj = src;
1674 			off_adj = off;
1675 		}
1676 		sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
1677 				BPF_MODE(insn->code) == BPF_PROBE_MEMSX ||
1678 				 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX);
1679 		switch (BPF_SIZE(code)) {
1680 		case BPF_W:
1681 			if (is_lsi_offset(off_adj, 2)) {
1682 				if (sign_extend)
1683 					emit(A64_LDRSWI(dst, src_adj, off_adj), ctx);
1684 				else
1685 					emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
1686 			} else {
1687 				emit_a64_mov_i(1, tmp, off, ctx);
1688 				if (sign_extend)
1689 					emit(A64_LDRSW(dst, src, tmp), ctx);
1690 				else
1691 					emit(A64_LDR32(dst, src, tmp), ctx);
1692 			}
1693 			break;
1694 		case BPF_H:
1695 			if (is_lsi_offset(off_adj, 1)) {
1696 				if (sign_extend)
1697 					emit(A64_LDRSHI(dst, src_adj, off_adj), ctx);
1698 				else
1699 					emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
1700 			} else {
1701 				emit_a64_mov_i(1, tmp, off, ctx);
1702 				if (sign_extend)
1703 					emit(A64_LDRSH(dst, src, tmp), ctx);
1704 				else
1705 					emit(A64_LDRH(dst, src, tmp), ctx);
1706 			}
1707 			break;
1708 		case BPF_B:
1709 			if (is_lsi_offset(off_adj, 0)) {
1710 				if (sign_extend)
1711 					emit(A64_LDRSBI(dst, src_adj, off_adj), ctx);
1712 				else
1713 					emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
1714 			} else {
1715 				emit_a64_mov_i(1, tmp, off, ctx);
1716 				if (sign_extend)
1717 					emit(A64_LDRSB(dst, src, tmp), ctx);
1718 				else
1719 					emit(A64_LDRB(dst, src, tmp), ctx);
1720 			}
1721 			break;
1722 		case BPF_DW:
1723 			if (is_lsi_offset(off_adj, 3)) {
1724 				emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
1725 			} else {
1726 				emit_a64_mov_i(1, tmp, off, ctx);
1727 				emit(A64_LDR64(dst, src, tmp), ctx);
1728 			}
1729 			break;
1730 		}
1731 
1732 		ret = add_exception_handler(insn, ctx, dst);
1733 		if (ret)
1734 			return ret;
1735 		break;
1736 
1737 	/* speculation barrier against v1 and v4 */
1738 	case BPF_ST | BPF_NOSPEC:
1739 		if (alternative_has_cap_likely(ARM64_HAS_SB)) {
1740 			emit(A64_SB, ctx);
1741 		} else {
1742 			emit(A64_DSB_NSH, ctx);
1743 			emit(A64_ISB, ctx);
1744 		}
1745 		break;
1746 
1747 	/* ST: *(size *)(dst + off) = imm */
1748 	case BPF_ST | BPF_MEM | BPF_W:
1749 	case BPF_ST | BPF_MEM | BPF_H:
1750 	case BPF_ST | BPF_MEM | BPF_B:
1751 	case BPF_ST | BPF_MEM | BPF_DW:
1752 	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1753 	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1754 	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1755 	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1756 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1757 			emit(A64_ADD(1, tmp3, dst, arena_vm_base), ctx);
1758 			dst = tmp3;
1759 		}
1760 		if (dst == fp) {
1761 			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
1762 			off_adj = off + ctx->stack_size;
1763 		} else {
1764 			dst_adj = dst;
1765 			off_adj = off;
1766 		}
1767 		/* Load imm to a register then store it */
1768 		emit_a64_mov_i(1, tmp, imm, ctx);
1769 		switch (BPF_SIZE(code)) {
1770 		case BPF_W:
1771 			if (is_lsi_offset(off_adj, 2)) {
1772 				emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
1773 			} else {
1774 				emit_a64_mov_i(1, tmp2, off, ctx);
1775 				emit(A64_STR32(tmp, dst, tmp2), ctx);
1776 			}
1777 			break;
1778 		case BPF_H:
1779 			if (is_lsi_offset(off_adj, 1)) {
1780 				emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
1781 			} else {
1782 				emit_a64_mov_i(1, tmp2, off, ctx);
1783 				emit(A64_STRH(tmp, dst, tmp2), ctx);
1784 			}
1785 			break;
1786 		case BPF_B:
1787 			if (is_lsi_offset(off_adj, 0)) {
1788 				emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
1789 			} else {
1790 				emit_a64_mov_i(1, tmp2, off, ctx);
1791 				emit(A64_STRB(tmp, dst, tmp2), ctx);
1792 			}
1793 			break;
1794 		case BPF_DW:
1795 			if (is_lsi_offset(off_adj, 3)) {
1796 				emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
1797 			} else {
1798 				emit_a64_mov_i(1, tmp2, off, ctx);
1799 				emit(A64_STR64(tmp, dst, tmp2), ctx);
1800 			}
1801 			break;
1802 		}
1803 
1804 		ret = add_exception_handler(insn, ctx, dst);
1805 		if (ret)
1806 			return ret;
1807 		break;
1808 
1809 	/* STX: *(size *)(dst + off) = src */
1810 	case BPF_STX | BPF_MEM | BPF_W:
1811 	case BPF_STX | BPF_MEM | BPF_H:
1812 	case BPF_STX | BPF_MEM | BPF_B:
1813 	case BPF_STX | BPF_MEM | BPF_DW:
1814 	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1815 	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1816 	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1817 	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1818 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1819 			emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
1820 			dst = tmp2;
1821 		}
1822 		if (dst == fp) {
1823 			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
1824 			off_adj = off + ctx->stack_size;
1825 		} else {
1826 			dst_adj = dst;
1827 			off_adj = off;
1828 		}
1829 		switch (BPF_SIZE(code)) {
1830 		case BPF_W:
1831 			if (is_lsi_offset(off_adj, 2)) {
1832 				emit(A64_STR32I(src, dst_adj, off_adj), ctx);
1833 			} else {
1834 				emit_a64_mov_i(1, tmp, off, ctx);
1835 				emit(A64_STR32(src, dst, tmp), ctx);
1836 			}
1837 			break;
1838 		case BPF_H:
1839 			if (is_lsi_offset(off_adj, 1)) {
1840 				emit(A64_STRHI(src, dst_adj, off_adj), ctx);
1841 			} else {
1842 				emit_a64_mov_i(1, tmp, off, ctx);
1843 				emit(A64_STRH(src, dst, tmp), ctx);
1844 			}
1845 			break;
1846 		case BPF_B:
1847 			if (is_lsi_offset(off_adj, 0)) {
1848 				emit(A64_STRBI(src, dst_adj, off_adj), ctx);
1849 			} else {
1850 				emit_a64_mov_i(1, tmp, off, ctx);
1851 				emit(A64_STRB(src, dst, tmp), ctx);
1852 			}
1853 			break;
1854 		case BPF_DW:
1855 			if (is_lsi_offset(off_adj, 3)) {
1856 				emit(A64_STR64I(src, dst_adj, off_adj), ctx);
1857 			} else {
1858 				emit_a64_mov_i(1, tmp, off, ctx);
1859 				emit(A64_STR64(src, dst, tmp), ctx);
1860 			}
1861 			break;
1862 		}
1863 
1864 		ret = add_exception_handler(insn, ctx, dst);
1865 		if (ret)
1866 			return ret;
1867 		break;
1868 
1869 	case BPF_STX | BPF_ATOMIC | BPF_B:
1870 	case BPF_STX | BPF_ATOMIC | BPF_H:
1871 	case BPF_STX | BPF_ATOMIC | BPF_W:
1872 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1873 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
1874 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
1875 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
1876 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1877 		if (bpf_atomic_is_load_store(insn))
1878 			ret = emit_atomic_ld_st(insn, ctx);
1879 		else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1880 			ret = emit_lse_atomic(insn, ctx);
1881 		else
1882 			ret = emit_ll_sc_atomic(insn, ctx);
1883 		if (ret)
1884 			return ret;
1885 
1886 		if (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
1887 			ret = add_exception_handler(insn, ctx, dst);
1888 			if (ret)
1889 				return ret;
1890 		}
1891 		break;
1892 
1893 	default:
1894 		pr_err_once("unknown opcode %02x\n", code);
1895 		return -EINVAL;
1896 	}
1897 
1898 	return 0;
1899 }
1900 
1901 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1902 {
1903 	const struct bpf_prog *prog = ctx->prog;
1904 	int i;
1905 
1906 	/*
1907 	 * - offset[0] offset of the end of prologue,
1908 	 *   start of the 1st instruction.
1909 	 * - offset[1] - offset of the end of 1st instruction,
1910 	 *   start of the 2nd instruction
1911 	 * [....]
1912 	 * - offset[3] - offset of the end of 3rd instruction,
1913 	 *   start of 4th instruction
1914 	 */
1915 	for (i = 0; i < prog->len; i++) {
1916 		const struct bpf_insn *insn = &prog->insnsi[i];
1917 		int ret;
1918 
1919 		ctx->offset[i] = ctx->idx;
1920 		ret = build_insn(insn, ctx, extra_pass);
1921 		if (ret > 0) {
1922 			i++;
1923 			ctx->offset[i] = ctx->idx;
1924 			continue;
1925 		}
1926 		if (ret)
1927 			return ret;
1928 	}
1929 	/*
1930 	 * offset is allocated with prog->len + 1 so fill in
1931 	 * the last element with the offset after the last
1932 	 * instruction (end of program)
1933 	 */
1934 	ctx->offset[i] = ctx->idx;
1935 
1936 	return 0;
1937 }
1938 
1939 static int validate_code(struct jit_ctx *ctx)
1940 {
1941 	int i;
1942 
1943 	for (i = 0; i < ctx->idx; i++) {
1944 		u32 a64_insn = le32_to_cpu(ctx->image[i]);
1945 
1946 		if (a64_insn == AARCH64_BREAK_FAULT)
1947 			return -1;
1948 	}
1949 	return 0;
1950 }
1951 
1952 static int validate_ctx(struct jit_ctx *ctx)
1953 {
1954 	if (validate_code(ctx))
1955 		return -1;
1956 
1957 	if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
1958 		return -1;
1959 
1960 	return 0;
1961 }
1962 
1963 static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
1964 {
1965 	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
1966 	u64 *stack_ptr;
1967 
1968 	for_each_possible_cpu(cpu) {
1969 		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
1970 		stack_ptr[0] = PRIV_STACK_GUARD_VAL;
1971 		stack_ptr[1] = PRIV_STACK_GUARD_VAL;
1972 		stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
1973 		stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
1974 	}
1975 }
1976 
1977 static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
1978 				   struct bpf_prog *prog)
1979 {
1980 	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
1981 	u64 *stack_ptr;
1982 
1983 	for_each_possible_cpu(cpu) {
1984 		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
1985 		if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
1986 		    stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
1987 		    stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
1988 		    stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
1989 			pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
1990 			       bpf_jit_get_prog_name(prog));
1991 			break;
1992 		}
1993 	}
1994 }
1995 
1996 struct arm64_jit_data {
1997 	struct bpf_binary_header *header;
1998 	u8 *ro_image;
1999 	struct bpf_binary_header *ro_header;
2000 	struct jit_ctx ctx;
2001 };
2002 
2003 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
2004 {
2005 	int image_size, prog_size, extable_size, extable_align, extable_offset;
2006 	struct bpf_binary_header *header;
2007 	struct bpf_binary_header *ro_header = NULL;
2008 	struct arm64_jit_data *jit_data;
2009 	void __percpu *priv_stack_ptr = NULL;
2010 	bool was_classic = bpf_prog_was_classic(prog);
2011 	int priv_stack_alloc_sz;
2012 	bool extra_pass = false;
2013 	struct jit_ctx ctx;
2014 	u8 *image_ptr;
2015 	u8 *ro_image_ptr;
2016 	int body_idx;
2017 	int exentry_idx;
2018 
2019 	if (!prog->jit_requested)
2020 		return prog;
2021 
2022 	jit_data = prog->aux->jit_data;
2023 	if (!jit_data) {
2024 		jit_data = kzalloc_obj(*jit_data);
2025 		if (!jit_data)
2026 			return prog;
2027 		prog->aux->jit_data = jit_data;
2028 	}
2029 	priv_stack_ptr = prog->aux->priv_stack_ptr;
2030 	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
2031 		/* Allocate actual private stack size with verifier-calculated
2032 		 * stack size plus two memory guards to protect overflow and
2033 		 * underflow.
2034 		 */
2035 		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
2036 				      2 * PRIV_STACK_GUARD_SZ;
2037 		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
2038 		if (!priv_stack_ptr)
2039 			goto out_priv_stack;
2040 
2041 		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
2042 		prog->aux->priv_stack_ptr = priv_stack_ptr;
2043 	}
2044 	if (jit_data->ctx.offset) {
2045 		ctx = jit_data->ctx;
2046 		ro_image_ptr = jit_data->ro_image;
2047 		ro_header = jit_data->ro_header;
2048 		header = jit_data->header;
2049 		image_ptr = (void *)header + ((void *)ro_image_ptr
2050 						 - (void *)ro_header);
2051 		extra_pass = true;
2052 		prog_size = sizeof(u32) * ctx.idx;
2053 		goto skip_init_ctx;
2054 	}
2055 	memset(&ctx, 0, sizeof(ctx));
2056 	ctx.prog = prog;
2057 
2058 	ctx.offset = kvzalloc_objs(int, prog->len + 1);
2059 	if (ctx.offset == NULL)
2060 		goto out_off;
2061 
2062 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
2063 	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
2064 
2065 	if (priv_stack_ptr)
2066 		ctx.priv_sp_used = true;
2067 
2068 	/* Pass 1: Estimate the maximum image size.
2069 	 *
2070 	 * BPF line info needs ctx->offset[i] to be the offset of
2071 	 * instruction[i] in jited image, so build prologue first.
2072 	 */
2073 	if (build_prologue(&ctx, was_classic))
2074 		goto out_off;
2075 
2076 	if (build_body(&ctx, extra_pass))
2077 		goto out_off;
2078 
2079 	ctx.epilogue_offset = ctx.idx;
2080 	build_epilogue(&ctx, was_classic);
2081 	build_plt(&ctx);
2082 
2083 	extable_align = __alignof__(struct exception_table_entry);
2084 	extable_size = prog->aux->num_exentries *
2085 		sizeof(struct exception_table_entry);
2086 
2087 	/* Now we know the maximum image size. */
2088 	prog_size = sizeof(u32) * ctx.idx;
2089 	/* also allocate space for plt target */
2090 	extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
2091 	image_size = extable_offset + extable_size;
2092 	ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
2093 					      sizeof(u64), &header, &image_ptr,
2094 					      jit_fill_hole);
2095 	if (!ro_header)
2096 		goto out_off;
2097 
2098 	/* Pass 2: Determine jited position and result for each instruction */
2099 
2100 	/*
2101 	 * Use the image(RW) for writing the JITed instructions. But also save
2102 	 * the ro_image(RX) for calculating the offsets in the image. The RW
2103 	 * image will be later copied to the RX image from where the program
2104 	 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
2105 	 * final step.
2106 	 */
2107 	ctx.image = (__le32 *)image_ptr;
2108 	ctx.ro_image = (__le32 *)ro_image_ptr;
2109 	if (extable_size)
2110 		prog->aux->extable = (void *)ro_image_ptr + extable_offset;
2111 skip_init_ctx:
2112 	ctx.idx = 0;
2113 	ctx.exentry_idx = 0;
2114 	ctx.write = true;
2115 
2116 	build_prologue(&ctx, was_classic);
2117 
2118 	/* Record exentry_idx and body_idx before first build_body */
2119 	exentry_idx = ctx.exentry_idx;
2120 	body_idx = ctx.idx;
2121 	/* Dont write body instructions to memory for now */
2122 	ctx.write = false;
2123 
2124 	if (build_body(&ctx, extra_pass))
2125 		goto out_free_hdr;
2126 
2127 	ctx.epilogue_offset = ctx.idx;
2128 	ctx.exentry_idx = exentry_idx;
2129 	ctx.idx = body_idx;
2130 	ctx.write = true;
2131 
2132 	/* Pass 3: Adjust jump offset and write final image */
2133 	if (build_body(&ctx, extra_pass) ||
2134 		WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset))
2135 		goto out_free_hdr;
2136 
2137 	build_epilogue(&ctx, was_classic);
2138 	build_plt(&ctx);
2139 
2140 	/* Extra pass to validate JITed code. */
2141 	if (validate_ctx(&ctx))
2142 		goto out_free_hdr;
2143 
2144 	/* update the real prog size */
2145 	prog_size = sizeof(u32) * ctx.idx;
2146 
2147 	/* And we're done. */
2148 	if (bpf_jit_enable > 1)
2149 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
2150 
2151 	if (!prog->is_func || extra_pass) {
2152 		/* The jited image may shrink since the jited result for
2153 		 * BPF_CALL to subprog may be changed from indirect call
2154 		 * to direct call.
2155 		 */
2156 		if (extra_pass && ctx.idx > jit_data->ctx.idx) {
2157 			pr_err_once("multi-func JIT bug %d > %d\n",
2158 				    ctx.idx, jit_data->ctx.idx);
2159 			goto out_free_hdr;
2160 		}
2161 		if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
2162 			/* ro_header and header has been freed */
2163 			ro_header = NULL;
2164 			header = NULL;
2165 			goto out_free_hdr;
2166 		}
2167 	} else {
2168 		jit_data->ctx = ctx;
2169 		jit_data->ro_image = ro_image_ptr;
2170 		jit_data->header = header;
2171 		jit_data->ro_header = ro_header;
2172 	}
2173 
2174 	prog->bpf_func = (void *)ctx.ro_image + cfi_get_offset();
2175 	prog->jited = 1;
2176 	prog->jited_len = prog_size - cfi_get_offset();
2177 
2178 	if (!prog->is_func || extra_pass) {
2179 		int i;
2180 
2181 		/* offset[prog->len] is the size of program */
2182 		for (i = 0; i <= prog->len; i++)
2183 			ctx.offset[i] *= AARCH64_INSN_SIZE;
2184 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
2185 		/*
2186 		 * The bpf_prog_update_insn_ptrs function expects offsets to
2187 		 * point to the first byte of the jitted instruction (unlike
2188 		 * the bpf_prog_fill_jited_linfo above, which, for historical
2189 		 * reasons, expects to point to the next instruction)
2190 		 */
2191 		bpf_prog_update_insn_ptrs(prog, ctx.offset, ctx.ro_image);
2192 out_off:
2193 		if (!ro_header && priv_stack_ptr) {
2194 			free_percpu(priv_stack_ptr);
2195 			prog->aux->priv_stack_ptr = NULL;
2196 		}
2197 		kvfree(ctx.offset);
2198 out_priv_stack:
2199 		kfree(jit_data);
2200 		prog->aux->jit_data = NULL;
2201 	}
2202 
2203 	return prog;
2204 
2205 out_free_hdr:
2206 	if (extra_pass) {
2207 		prog->bpf_func = NULL;
2208 		prog->jited = 0;
2209 		prog->jited_len = 0;
2210 	}
2211 	if (header) {
2212 		bpf_arch_text_copy(&ro_header->size, &header->size,
2213 				   sizeof(header->size));
2214 		bpf_jit_binary_pack_free(ro_header, header);
2215 	}
2216 	goto out_off;
2217 }
2218 
2219 bool bpf_jit_supports_private_stack(void)
2220 {
2221 	return true;
2222 }
2223 
2224 bool bpf_jit_supports_kfunc_call(void)
2225 {
2226 	return true;
2227 }
2228 
2229 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
2230 {
2231 	if (!aarch64_insn_copy(dst, src, len))
2232 		return ERR_PTR(-EINVAL);
2233 	return dst;
2234 }
2235 
2236 u64 bpf_jit_alloc_exec_limit(void)
2237 {
2238 	return VMALLOC_END - VMALLOC_START;
2239 }
2240 
2241 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
2242 bool bpf_jit_supports_subprog_tailcalls(void)
2243 {
2244 	return true;
2245 }
2246 
2247 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
2248 			    int bargs_off, int retval_off, int run_ctx_off,
2249 			    bool save_ret)
2250 {
2251 	__le32 *branch;
2252 	u64 enter_prog;
2253 	u64 exit_prog;
2254 	struct bpf_prog *p = l->link.prog;
2255 	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2256 
2257 	enter_prog = (u64)bpf_trampoline_enter(p);
2258 	exit_prog = (u64)bpf_trampoline_exit(p);
2259 
2260 	if (l->cookie == 0) {
2261 		/* if cookie is zero, one instruction is enough to store it */
2262 		emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
2263 	} else {
2264 		emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
2265 		emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
2266 		     ctx);
2267 	}
2268 
2269 	/* save p to callee saved register x19 to avoid loading p with mov_i64
2270 	 * each time.
2271 	 */
2272 	emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
2273 
2274 	/* arg1: prog */
2275 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2276 	/* arg2: &run_ctx */
2277 	emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
2278 
2279 	emit_call(enter_prog, ctx);
2280 
2281 	/* save return value to callee saved register x20 */
2282 	emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
2283 
2284 	/* if (__bpf_prog_enter(prog) == 0)
2285 	 *         goto skip_exec_of_prog;
2286 	 */
2287 	branch = ctx->image + ctx->idx;
2288 	emit(A64_NOP, ctx);
2289 
2290 	emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx);
2291 	if (!p->jited)
2292 		emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
2293 
2294 	emit_call((const u64)p->bpf_func, ctx);
2295 
2296 	if (save_ret)
2297 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2298 
2299 	if (ctx->image) {
2300 		int offset = &ctx->image[ctx->idx] - branch;
2301 		*branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
2302 	}
2303 
2304 	/* arg1: prog */
2305 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2306 	/* arg2: start time */
2307 	emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
2308 	/* arg3: &run_ctx */
2309 	emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
2310 
2311 	emit_call(exit_prog, ctx);
2312 }
2313 
2314 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
2315 			       int bargs_off, int retval_off, int run_ctx_off,
2316 			       __le32 **branches)
2317 {
2318 	int i;
2319 
2320 	/* The first fmod_ret program will receive a garbage return value.
2321 	 * Set this to 0 to avoid confusing the program.
2322 	 */
2323 	emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
2324 	for (i = 0; i < tl->nr_links; i++) {
2325 		invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
2326 				run_ctx_off, true);
2327 		/* if (*(u64 *)(sp + retval_off) !=  0)
2328 		 *	goto do_fexit;
2329 		 */
2330 		emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
2331 		/* Save the location of branch, and generate a nop.
2332 		 * This nop will be replaced with a cbnz later.
2333 		 */
2334 		branches[i] = ctx->image + ctx->idx;
2335 		emit(A64_NOP, ctx);
2336 	}
2337 }
2338 
2339 struct arg_aux {
2340 	/* how many args are passed through registers, the rest of the args are
2341 	 * passed through stack
2342 	 */
2343 	int args_in_regs;
2344 	/* how many registers are used to pass arguments */
2345 	int regs_for_args;
2346 	/* how much stack is used for additional args passed to bpf program
2347 	 * that did not fit in original function registers
2348 	 */
2349 	int bstack_for_args;
2350 	/* home much stack is used for additional args passed to the
2351 	 * original function when called from trampoline (this one needs
2352 	 * arguments to be properly aligned)
2353 	 */
2354 	int ostack_for_args;
2355 };
2356 
2357 static int calc_arg_aux(const struct btf_func_model *m,
2358 			 struct arg_aux *a)
2359 {
2360 	int stack_slots, nregs, slots, i;
2361 
2362 	/* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */
2363 	for (i = 0, nregs = 0; i < m->nr_args; i++) {
2364 		slots = (m->arg_size[i] + 7) / 8;
2365 		if (nregs + slots <= 8) /* passed through register ? */
2366 			nregs += slots;
2367 		else
2368 			break;
2369 	}
2370 
2371 	a->args_in_regs = i;
2372 	a->regs_for_args = nregs;
2373 	a->ostack_for_args = 0;
2374 	a->bstack_for_args = 0;
2375 
2376 	/* the rest arguments are passed through stack */
2377 	for (; i < m->nr_args; i++) {
2378 		stack_slots = (m->arg_size[i] + 7) / 8;
2379 		a->bstack_for_args += stack_slots * 8;
2380 		a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
2381 	}
2382 
2383 	return 0;
2384 }
2385 
2386 static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
2387 {
2388 	if (effective_bytes) {
2389 		int garbage_bits = 64 - 8 * effective_bytes;
2390 #ifdef CONFIG_CPU_BIG_ENDIAN
2391 		/* garbage bits are at the right end */
2392 		emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2393 		emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2394 #else
2395 		/* garbage bits are at the left end */
2396 		emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2397 		emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2398 #endif
2399 	}
2400 }
2401 
2402 static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off,
2403 		      const struct btf_func_model *m,
2404 		      const struct arg_aux *a,
2405 		      bool for_call_origin)
2406 {
2407 	int i;
2408 	int reg;
2409 	int doff;
2410 	int soff;
2411 	int slots;
2412 	u8 tmp = bpf2a64[TMP_REG_1];
2413 
2414 	/* store arguments to the stack for the bpf program, or restore
2415 	 * arguments from stack for the original function
2416 	 */
2417 	for (reg = 0; reg < a->regs_for_args; reg++) {
2418 		emit(for_call_origin ?
2419 		     A64_LDR64I(reg, A64_SP, bargs_off) :
2420 		     A64_STR64I(reg, A64_SP, bargs_off),
2421 		     ctx);
2422 		bargs_off += 8;
2423 	}
2424 
2425 	soff = 32; /* on stack arguments start from FP + 32 */
2426 	doff = (for_call_origin ? oargs_off : bargs_off);
2427 
2428 	/* save on stack arguments */
2429 	for (i = a->args_in_regs; i < m->nr_args; i++) {
2430 		slots = (m->arg_size[i] + 7) / 8;
2431 		/* verifier ensures arg_size <= 16, so slots equals 1 or 2 */
2432 		while (slots-- > 0) {
2433 			emit(A64_LDR64I(tmp, A64_FP, soff), ctx);
2434 			/* if there is unused space in the last slot, clear
2435 			 * the garbage contained in the space.
2436 			 */
2437 			if (slots == 0 && !for_call_origin)
2438 				clear_garbage(ctx, tmp, m->arg_size[i] % 8);
2439 			emit(A64_STR64I(tmp, A64_SP, doff), ctx);
2440 			soff += 8;
2441 			doff += 8;
2442 		}
2443 	}
2444 }
2445 
2446 static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
2447 {
2448 	int reg;
2449 
2450 	for (reg = 0; reg < nregs; reg++) {
2451 		emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx);
2452 		bargs_off += 8;
2453 	}
2454 }
2455 
2456 static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
2457 {
2458 	return fentry_links->nr_links == 1 &&
2459 		fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
2460 }
2461 
2462 static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_off)
2463 {
2464 	emit_a64_mov_i64(A64_R(10), func_meta, ctx);
2465 	emit(A64_STR64I(A64_R(10), A64_SP, func_meta_off), ctx);
2466 }
2467 
2468 /* Based on the x86's implementation of arch_prepare_bpf_trampoline().
2469  *
2470  * bpf prog and function entry before bpf trampoline hooked:
2471  *   mov x9, lr
2472  *   nop
2473  *
2474  * bpf prog and function entry after bpf trampoline hooked:
2475  *   mov x9, lr
2476  *   bl  <bpf_trampoline or plt>
2477  *
2478  */
2479 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
2480 			      struct bpf_tramp_links *tlinks, void *func_addr,
2481 			      const struct btf_func_model *m,
2482 			      const struct arg_aux *a,
2483 			      u32 flags)
2484 {
2485 	int i;
2486 	int stack_size;
2487 	int retaddr_off;
2488 	int regs_off;
2489 	int retval_off;
2490 	int bargs_off;
2491 	int func_meta_off;
2492 	int ip_off;
2493 	int run_ctx_off;
2494 	int oargs_off;
2495 	int nfuncargs;
2496 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2497 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2498 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2499 	bool save_ret;
2500 	__le32 **branches = NULL;
2501 	bool is_struct_ops = is_struct_ops_tramp(fentry);
2502 	int cookie_off, cookie_cnt, cookie_bargs_off;
2503 	int fsession_cnt = bpf_fsession_cnt(tlinks);
2504 	u64 func_meta;
2505 
2506 	/* trampoline stack layout:
2507 	 *                    [ parent ip         ]
2508 	 *                    [ FP                ]
2509 	 * SP + retaddr_off   [ self ip           ]
2510 	 *                    [ FP                ]
2511 	 *
2512 	 *                    [ padding           ] align SP to multiples of 16
2513 	 *
2514 	 *                    [ x20               ] callee saved reg x20
2515 	 * SP + regs_off      [ x19               ] callee saved reg x19
2516 	 *
2517 	 * SP + retval_off    [ return value      ] BPF_TRAMP_F_CALL_ORIG or
2518 	 *                                          BPF_TRAMP_F_RET_FENTRY_RET
2519 	 *                    [ arg reg N         ]
2520 	 *                    [ ...               ]
2521 	 * SP + bargs_off     [ arg reg 1         ] for bpf
2522 	 *
2523 	 * SP + func_meta_off [ regs count, etc   ]
2524 	 *
2525 	 * SP + ip_off        [ traced function   ] BPF_TRAMP_F_IP_ARG flag
2526 	 *
2527 	 *                    [ stack cookie N    ]
2528 	 *                    [ ...               ]
2529 	 * SP + cookie_off    [ stack cookie 1    ]
2530 	 *
2531 	 * SP + run_ctx_off   [ bpf_tramp_run_ctx ]
2532 	 *
2533 	 *                    [ stack arg N       ]
2534 	 *                    [ ...               ]
2535 	 * SP + oargs_off     [ stack arg 1       ] for original func
2536 	 */
2537 
2538 	stack_size = 0;
2539 	oargs_off = stack_size;
2540 	if (flags & BPF_TRAMP_F_CALL_ORIG)
2541 		stack_size +=  a->ostack_for_args;
2542 
2543 	run_ctx_off = stack_size;
2544 	/* room for bpf_tramp_run_ctx */
2545 	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
2546 
2547 	cookie_off = stack_size;
2548 	/* room for session cookies */
2549 	cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
2550 	stack_size += cookie_cnt * 8;
2551 
2552 	ip_off = stack_size;
2553 	/* room for IP address argument */
2554 	if (flags & BPF_TRAMP_F_IP_ARG)
2555 		stack_size += 8;
2556 
2557 	func_meta_off = stack_size;
2558 	/* room for function metadata, such as regs count */
2559 	stack_size += 8;
2560 
2561 	bargs_off = stack_size;
2562 	/* room for args */
2563 	nfuncargs = a->regs_for_args + a->bstack_for_args / 8;
2564 	stack_size += 8 * nfuncargs;
2565 
2566 	/* room for return value */
2567 	retval_off = stack_size;
2568 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
2569 	if (save_ret)
2570 		stack_size += 8;
2571 
2572 	/* room for callee saved registers, currently x19 and x20 are used */
2573 	regs_off = stack_size;
2574 	stack_size += 16;
2575 
2576 	/* round up to multiples of 16 to avoid SPAlignmentFault */
2577 	stack_size = round_up(stack_size, 16);
2578 
2579 	/* return address locates above FP */
2580 	retaddr_off = stack_size + 8;
2581 
2582 	if (flags & BPF_TRAMP_F_INDIRECT) {
2583 		/*
2584 		 * Indirect call for bpf_struct_ops
2585 		 */
2586 		emit_kcfi(cfi_get_func_hash(func_addr), ctx);
2587 	}
2588 	/* bpf trampoline may be invoked by 3 instruction types:
2589 	 * 1. bl, attached to bpf prog or kernel function via short jump
2590 	 * 2. br, attached to bpf prog or kernel function via long jump
2591 	 * 3. blr, working as a function pointer, used by struct_ops.
2592 	 * So BTI_JC should used here to support both br and blr.
2593 	 */
2594 	emit_bti(A64_BTI_JC, ctx);
2595 
2596 	/* x9 is not set for struct_ops */
2597 	if (!is_struct_ops) {
2598 		/* frame for parent function */
2599 		emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
2600 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2601 	}
2602 
2603 	/* frame for patched function for tracing, or caller for struct_ops */
2604 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
2605 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2606 
2607 	/* allocate stack space */
2608 	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
2609 
2610 	if (flags & BPF_TRAMP_F_IP_ARG) {
2611 		/* save ip address of the traced function */
2612 		emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
2613 		emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
2614 	}
2615 
2616 	/* save function metadata */
2617 	func_meta = nfuncargs;
2618 	store_func_meta(ctx, func_meta, func_meta_off);
2619 
2620 	/* save args for bpf */
2621 	save_args(ctx, bargs_off, oargs_off, m, a, false);
2622 
2623 	/* save callee saved registers */
2624 	emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
2625 	emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2626 
2627 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2628 		/* for the first pass, assume the worst case */
2629 		if (!ctx->image)
2630 			ctx->idx += 4;
2631 		else
2632 			emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2633 		emit_call((const u64)__bpf_tramp_enter, ctx);
2634 	}
2635 
2636 	if (fsession_cnt) {
2637 		/* clear all the session cookies' value */
2638 		emit(A64_MOVZ(1, A64_R(10), 0, 0), ctx);
2639 		for (int i = 0; i < cookie_cnt; i++)
2640 			emit(A64_STR64I(A64_R(10), A64_SP, cookie_off + 8 * i), ctx);
2641 		/* clear the return value to make sure fentry always gets 0 */
2642 		emit(A64_STR64I(A64_R(10), A64_SP, retval_off), ctx);
2643 	}
2644 
2645 	cookie_bargs_off = (bargs_off - cookie_off) / 8;
2646 	for (i = 0; i < fentry->nr_links; i++) {
2647 		if (bpf_prog_calls_session_cookie(fentry->links[i])) {
2648 			u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
2649 
2650 			store_func_meta(ctx, meta, func_meta_off);
2651 			cookie_bargs_off--;
2652 		}
2653 		invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
2654 				retval_off, run_ctx_off,
2655 				flags & BPF_TRAMP_F_RET_FENTRY_RET);
2656 	}
2657 
2658 	if (fmod_ret->nr_links) {
2659 		branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
2660 				   GFP_KERNEL);
2661 		if (!branches)
2662 			return -ENOMEM;
2663 
2664 		invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off,
2665 				   run_ctx_off, branches);
2666 	}
2667 
2668 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2669 		/* save args for original func */
2670 		save_args(ctx, bargs_off, oargs_off, m, a, true);
2671 		/* call original func */
2672 		emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
2673 		emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
2674 		emit(A64_RET(A64_R(10)), ctx);
2675 		/* store return value */
2676 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2677 		/* reserve a nop for bpf_tramp_image_put */
2678 		im->ip_after_call = ctx->ro_image + ctx->idx;
2679 		emit(A64_NOP, ctx);
2680 	}
2681 
2682 	/* update the branches saved in invoke_bpf_mod_ret with cbnz */
2683 	for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
2684 		int offset = &ctx->image[ctx->idx] - branches[i];
2685 		*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
2686 	}
2687 
2688 	/* set the "is_return" flag for fsession */
2689 	func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
2690 	if (fsession_cnt)
2691 		store_func_meta(ctx, func_meta, func_meta_off);
2692 
2693 	cookie_bargs_off = (bargs_off - cookie_off) / 8;
2694 	for (i = 0; i < fexit->nr_links; i++) {
2695 		if (bpf_prog_calls_session_cookie(fexit->links[i])) {
2696 			u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
2697 
2698 			store_func_meta(ctx, meta, func_meta_off);
2699 			cookie_bargs_off--;
2700 		}
2701 		invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
2702 				run_ctx_off, false);
2703 	}
2704 
2705 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2706 		im->ip_epilogue = ctx->ro_image + ctx->idx;
2707 		/* for the first pass, assume the worst case */
2708 		if (!ctx->image)
2709 			ctx->idx += 4;
2710 		else
2711 			emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2712 		emit_call((const u64)__bpf_tramp_exit, ctx);
2713 	}
2714 
2715 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
2716 		restore_args(ctx, bargs_off, a->regs_for_args);
2717 
2718 	/* restore callee saved register x19 and x20 */
2719 	emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
2720 	emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2721 
2722 	if (save_ret)
2723 		emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
2724 
2725 	/* reset SP  */
2726 	emit(A64_MOV(1, A64_SP, A64_FP), ctx);
2727 
2728 	if (is_struct_ops) {
2729 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2730 		emit(A64_RET(A64_LR), ctx);
2731 	} else {
2732 		/* pop frames */
2733 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2734 		emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
2735 
2736 		if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2737 			/* skip patched function, return to parent */
2738 			emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2739 			emit(A64_RET(A64_R(9)), ctx);
2740 		} else {
2741 			/* return to patched function */
2742 			emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
2743 			emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2744 			emit(A64_RET(A64_R(10)), ctx);
2745 		}
2746 	}
2747 
2748 	kfree(branches);
2749 
2750 	return ctx->idx;
2751 }
2752 
2753 bool bpf_jit_supports_fsession(void)
2754 {
2755 	return true;
2756 }
2757 
2758 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2759 			     struct bpf_tramp_links *tlinks, void *func_addr)
2760 {
2761 	struct jit_ctx ctx = {
2762 		.image = NULL,
2763 		.idx = 0,
2764 	};
2765 	struct bpf_tramp_image im;
2766 	struct arg_aux aaux;
2767 	int ret;
2768 
2769 	ret = calc_arg_aux(m, &aaux);
2770 	if (ret < 0)
2771 		return ret;
2772 
2773 	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
2774 	if (ret < 0)
2775 		return ret;
2776 
2777 	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
2778 }
2779 
2780 void *arch_alloc_bpf_trampoline(unsigned int size)
2781 {
2782 	return bpf_prog_pack_alloc(size, jit_fill_hole);
2783 }
2784 
2785 void arch_free_bpf_trampoline(void *image, unsigned int size)
2786 {
2787 	bpf_prog_pack_free(image, size);
2788 }
2789 
2790 int arch_protect_bpf_trampoline(void *image, unsigned int size)
2791 {
2792 	return 0;
2793 }
2794 
2795 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
2796 				void *ro_image_end, const struct btf_func_model *m,
2797 				u32 flags, struct bpf_tramp_links *tlinks,
2798 				void *func_addr)
2799 {
2800 	u32 size = ro_image_end - ro_image;
2801 	struct arg_aux aaux;
2802 	void *image, *tmp;
2803 	int ret;
2804 
2805 	/* image doesn't need to be in module memory range, so we can
2806 	 * use kvmalloc.
2807 	 */
2808 	image = kvmalloc(size, GFP_KERNEL);
2809 	if (!image)
2810 		return -ENOMEM;
2811 
2812 	struct jit_ctx ctx = {
2813 		.image = image,
2814 		.ro_image = ro_image,
2815 		.idx = 0,
2816 		.write = true,
2817 	};
2818 
2819 
2820 	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2821 	ret = calc_arg_aux(m, &aaux);
2822 	if (ret)
2823 		goto out;
2824 	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
2825 
2826 	if (ret > 0 && validate_code(&ctx) < 0) {
2827 		ret = -EINVAL;
2828 		goto out;
2829 	}
2830 
2831 	if (ret > 0)
2832 		ret *= AARCH64_INSN_SIZE;
2833 
2834 	tmp = bpf_arch_text_copy(ro_image, image, size);
2835 	if (IS_ERR(tmp)) {
2836 		ret = PTR_ERR(tmp);
2837 		goto out;
2838 	}
2839 
2840 out:
2841 	kvfree(image);
2842 	return ret;
2843 }
2844 
2845 static bool is_long_jump(void *ip, void *target)
2846 {
2847 	long offset;
2848 
2849 	/* NULL target means this is a NOP */
2850 	if (!target)
2851 		return false;
2852 
2853 	offset = (long)target - (long)ip;
2854 	return offset < -SZ_128M || offset >= SZ_128M;
2855 }
2856 
2857 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
2858 			     void *addr, void *plt, u32 *insn)
2859 {
2860 	void *target;
2861 
2862 	if (!addr) {
2863 		*insn = aarch64_insn_gen_nop();
2864 		return 0;
2865 	}
2866 
2867 	if (is_long_jump(ip, addr))
2868 		target = plt;
2869 	else
2870 		target = addr;
2871 
2872 	*insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
2873 					    (unsigned long)target,
2874 					    type);
2875 
2876 	return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
2877 }
2878 
2879 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
2880  * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
2881  * or @new_addr is NULL, the old or new instruction is NOP.
2882  *
2883  * When @ip is the bpf prog entry, a bpf trampoline is being attached or
2884  * detached. Since bpf trampoline and bpf prog are allocated separately with
2885  * vmalloc, the address distance may exceed 128MB, the maximum branch range.
2886  * So long jump should be handled.
2887  *
2888  * When a bpf prog is constructed, a plt pointing to empty trampoline
2889  * dummy_tramp is placed at the end:
2890  *
2891  *      bpf_prog:
2892  *              mov x9, lr
2893  *              nop // patchsite
2894  *              ...
2895  *              ret
2896  *
2897  *      plt:
2898  *              ldr x10, target
2899  *              br x10
2900  *      target:
2901  *              .quad dummy_tramp // plt target
2902  *
2903  * This is also the state when no trampoline is attached.
2904  *
2905  * When a short-jump bpf trampoline is attached, the patchsite is patched
2906  * to a bl instruction to the trampoline directly:
2907  *
2908  *      bpf_prog:
2909  *              mov x9, lr
2910  *              bl <short-jump bpf trampoline address> // patchsite
2911  *              ...
2912  *              ret
2913  *
2914  *      plt:
2915  *              ldr x10, target
2916  *              br x10
2917  *      target:
2918  *              .quad dummy_tramp // plt target
2919  *
2920  * When a long-jump bpf trampoline is attached, the plt target is filled with
2921  * the trampoline address and the patchsite is patched to a bl instruction to
2922  * the plt:
2923  *
2924  *      bpf_prog:
2925  *              mov x9, lr
2926  *              bl plt // patchsite
2927  *              ...
2928  *              ret
2929  *
2930  *      plt:
2931  *              ldr x10, target
2932  *              br x10
2933  *      target:
2934  *              .quad <long-jump bpf trampoline address> // plt target
2935  *
2936  * The dummy_tramp is used to prevent another CPU from jumping to unknown
2937  * locations during the patching process, making the patching process easier.
2938  */
2939 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
2940 		       enum bpf_text_poke_type new_t, void *old_addr,
2941 		       void *new_addr)
2942 {
2943 	int ret;
2944 	u32 old_insn;
2945 	u32 new_insn;
2946 	u32 replaced;
2947 	struct bpf_plt *plt = NULL;
2948 	unsigned long size = 0UL;
2949 	unsigned long offset = ~0UL;
2950 	enum aarch64_insn_branch_type branch_type;
2951 	char namebuf[KSYM_NAME_LEN];
2952 	void *image = NULL;
2953 	u64 plt_target = 0ULL;
2954 	bool poking_bpf_entry;
2955 
2956 	if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
2957 		/* Only poking bpf text is supported. Since kernel function
2958 		 * entry is set up by ftrace, we reply on ftrace to poke kernel
2959 		 * functions.
2960 		 */
2961 		return -ENOTSUPP;
2962 
2963 	image = ip - offset;
2964 	/* zero offset means we're poking bpf prog entry */
2965 	poking_bpf_entry = (offset == 0UL);
2966 
2967 	/* bpf prog entry, find plt and the real patchsite */
2968 	if (poking_bpf_entry) {
2969 		/* plt locates at the end of bpf prog */
2970 		plt = image + size - PLT_TARGET_OFFSET;
2971 
2972 		/* skip to the nop instruction in bpf prog entry:
2973 		 * bti c // if BTI enabled
2974 		 * mov x9, x30
2975 		 * nop
2976 		 */
2977 		ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
2978 	}
2979 
2980 	/* long jump is only possible at bpf prog entry */
2981 	if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
2982 		    !poking_bpf_entry))
2983 		return -EINVAL;
2984 
2985 	branch_type = old_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
2986 					      AARCH64_INSN_BRANCH_NOLINK;
2987 	if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
2988 		return -EFAULT;
2989 
2990 	branch_type = new_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
2991 					      AARCH64_INSN_BRANCH_NOLINK;
2992 	if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
2993 		return -EFAULT;
2994 
2995 	if (is_long_jump(ip, new_addr))
2996 		plt_target = (u64)new_addr;
2997 	else if (is_long_jump(ip, old_addr))
2998 		/* if the old target is a long jump and the new target is not,
2999 		 * restore the plt target to dummy_tramp, so there is always a
3000 		 * legal and harmless address stored in plt target, and we'll
3001 		 * never jump from plt to an unknown place.
3002 		 */
3003 		plt_target = (u64)&dummy_tramp;
3004 
3005 	if (plt_target) {
3006 		/* non-zero plt_target indicates we're patching a bpf prog,
3007 		 * which is read only.
3008 		 */
3009 		if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
3010 			return -EFAULT;
3011 		WRITE_ONCE(plt->target, plt_target);
3012 		set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
3013 		/* since plt target points to either the new trampoline
3014 		 * or dummy_tramp, even if another CPU reads the old plt
3015 		 * target value before fetching the bl instruction to plt,
3016 		 * it will be brought back by dummy_tramp, so no barrier is
3017 		 * required here.
3018 		 */
3019 	}
3020 
3021 	/* if the old target and the new target are both long jumps, no
3022 	 * patching is required
3023 	 */
3024 	if (old_insn == new_insn)
3025 		return 0;
3026 
3027 	mutex_lock(&text_mutex);
3028 	if (aarch64_insn_read(ip, &replaced)) {
3029 		ret = -EFAULT;
3030 		goto out;
3031 	}
3032 
3033 	if (replaced != old_insn) {
3034 		ret = -EFAULT;
3035 		goto out;
3036 	}
3037 
3038 	/* We call aarch64_insn_patch_text_nosync() to replace instruction
3039 	 * atomically, so no other CPUs will fetch a half-new and half-old
3040 	 * instruction. But there is chance that another CPU executes the
3041 	 * old instruction after the patching operation finishes (e.g.,
3042 	 * pipeline not flushed, or icache not synchronized yet).
3043 	 *
3044 	 * 1. when a new trampoline is attached, it is not a problem for
3045 	 *    different CPUs to jump to different trampolines temporarily.
3046 	 *
3047 	 * 2. when an old trampoline is freed, we should wait for all other
3048 	 *    CPUs to exit the trampoline and make sure the trampoline is no
3049 	 *    longer reachable, since bpf_tramp_image_put() function already
3050 	 *    uses percpu_ref and task-based rcu to do the sync, no need to call
3051 	 *    the sync version here, see bpf_tramp_image_put() for details.
3052 	 */
3053 	ret = aarch64_insn_patch_text_nosync(ip, new_insn);
3054 out:
3055 	mutex_unlock(&text_mutex);
3056 
3057 	return ret;
3058 }
3059 
3060 bool bpf_jit_supports_ptr_xchg(void)
3061 {
3062 	return true;
3063 }
3064 
3065 bool bpf_jit_supports_exceptions(void)
3066 {
3067 	/* We unwind through both kernel frames starting from within bpf_throw
3068 	 * call and BPF frames. Therefore we require FP unwinder to be enabled
3069 	 * to walk kernel frames and reach BPF frames in the stack trace.
3070 	 * ARM64 kernel is always compiled with CONFIG_FRAME_POINTER=y
3071 	 */
3072 	return true;
3073 }
3074 
3075 bool bpf_jit_supports_arena(void)
3076 {
3077 	return true;
3078 }
3079 
3080 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
3081 {
3082 	if (!in_arena)
3083 		return true;
3084 	switch (insn->code) {
3085 	case BPF_STX | BPF_ATOMIC | BPF_W:
3086 	case BPF_STX | BPF_ATOMIC | BPF_DW:
3087 		if (!bpf_atomic_is_load_store(insn) &&
3088 		    !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
3089 			return false;
3090 	}
3091 	return true;
3092 }
3093 
3094 bool bpf_jit_supports_percpu_insn(void)
3095 {
3096 	return true;
3097 }
3098 
3099 bool bpf_jit_bypass_spec_v4(void)
3100 {
3101 	/* In case of arm64, we rely on the firmware mitigation of Speculative
3102 	 * Store Bypass as controlled via the ssbd kernel parameter. Whenever
3103 	 * the mitigation is enabled, it works for all of the kernel code with
3104 	 * no need to provide any additional instructions. Therefore, skip
3105 	 * inserting nospec insns against Spectre v4.
3106 	 */
3107 	return true;
3108 }
3109 
3110 bool bpf_jit_supports_timed_may_goto(void)
3111 {
3112 	return true;
3113 }
3114 
3115 bool bpf_jit_inlines_helper_call(s32 imm)
3116 {
3117 	switch (imm) {
3118 	case BPF_FUNC_get_smp_processor_id:
3119 	case BPF_FUNC_get_current_task:
3120 	case BPF_FUNC_get_current_task_btf:
3121 		return true;
3122 	default:
3123 		return false;
3124 	}
3125 }
3126 
3127 void bpf_jit_free(struct bpf_prog *prog)
3128 {
3129 	if (prog->jited) {
3130 		struct arm64_jit_data *jit_data = prog->aux->jit_data;
3131 		struct bpf_binary_header *hdr;
3132 		void __percpu *priv_stack_ptr;
3133 		int priv_stack_alloc_sz;
3134 
3135 		/*
3136 		 * If we fail the final pass of JIT (from jit_subprogs),
3137 		 * the program may not be finalized yet. Call finalize here
3138 		 * before freeing it.
3139 		 */
3140 		if (jit_data) {
3141 			bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
3142 			kfree(jit_data);
3143 		}
3144 		prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
3145 		hdr = bpf_jit_binary_pack_hdr(prog);
3146 		bpf_jit_binary_pack_free(hdr, NULL);
3147 		priv_stack_ptr = prog->aux->priv_stack_ptr;
3148 		if (priv_stack_ptr) {
3149 			priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
3150 					      2 * PRIV_STACK_GUARD_SZ;
3151 			priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
3152 			free_percpu(prog->aux->priv_stack_ptr);
3153 		}
3154 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
3155 	}
3156 
3157 	bpf_prog_unlock_free(prog);
3158 }
3159