xref: /linux/arch/arm64/net/bpf_jit_comp.c (revision f6606a44bc438ec5f1d450d0153878e80e79ff80)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for ARM64
4  *
5  * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
6  */
7 
8 #define pr_fmt(fmt) "bpf_jit: " fmt
9 
10 #include <linux/arm-smccc.h>
11 #include <linux/bitfield.h>
12 #include <linux/bpf.h>
13 #include <linux/cfi.h>
14 #include <linux/filter.h>
15 #include <linux/memory.h>
16 #include <linux/printk.h>
17 #include <linux/slab.h>
18 
19 #include <asm/asm-extable.h>
20 #include <asm/byteorder.h>
21 #include <asm/cpufeature.h>
22 #include <asm/debug-monitors.h>
23 #include <asm/insn.h>
24 #include <asm/text-patching.h>
25 #include <asm/set_memory.h>
26 
27 #include "bpf_jit.h"
28 
29 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
30 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
31 #define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
32 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
33 #define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
34 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
35 
36 #define check_imm(bits, imm) do {				\
37 	if ((((imm) > 0) && ((imm) >> ((bits) - 1))) ||		\
38 	    (((imm) < 0) && (~(imm) >> ((bits) - 1)))) {	\
39 		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
40 			i, imm, imm);				\
41 		return -EINVAL;					\
42 	}							\
43 } while (0)
44 #define check_imm19(imm) check_imm(19, imm)
45 #define check_imm26(imm) check_imm(26, imm)
46 
47 /* Map BPF registers to A64 registers */
48 static const int bpf2a64[] = {
49 	/* return value from in-kernel function, and exit value from eBPF */
50 	[BPF_REG_0] = A64_R(7),
51 	/* arguments from eBPF program to in-kernel function */
52 	[BPF_REG_1] = A64_R(0),
53 	[BPF_REG_2] = A64_R(1),
54 	[BPF_REG_3] = A64_R(2),
55 	[BPF_REG_4] = A64_R(3),
56 	[BPF_REG_5] = A64_R(4),
57 	/* callee saved registers that in-kernel function will preserve */
58 	[BPF_REG_6] = A64_R(19),
59 	[BPF_REG_7] = A64_R(20),
60 	[BPF_REG_8] = A64_R(21),
61 	[BPF_REG_9] = A64_R(22),
62 	/* read-only frame pointer to access stack */
63 	[BPF_REG_FP] = A64_R(25),
64 	/* temporary registers for BPF JIT */
65 	[TMP_REG_1] = A64_R(10),
66 	[TMP_REG_2] = A64_R(11),
67 	[TMP_REG_3] = A64_R(12),
68 	/* tail_call_cnt_ptr */
69 	[TCCNT_PTR] = A64_R(26),
70 	/* temporary register for blinding constants */
71 	[BPF_REG_AX] = A64_R(9),
72 	/* callee saved register for private stack pointer */
73 	[PRIVATE_SP] = A64_R(27),
74 	/* callee saved register for kern_vm_start address */
75 	[ARENA_VM_START] = A64_R(28),
76 };
77 
78 struct jit_ctx {
79 	const struct bpf_prog *prog;
80 	int idx;
81 	int epilogue_offset;
82 	int *offset;
83 	int exentry_idx;
84 	int nr_used_callee_reg;
85 	u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */
86 	__le32 *image;
87 	__le32 *ro_image;
88 	u32 stack_size;
89 	u64 user_vm_start;
90 	u64 arena_vm_start;
91 	bool fp_used;
92 	bool priv_sp_used;
93 	bool write;
94 };
95 
96 struct bpf_plt {
97 	u32 insn_ldr; /* load target */
98 	u32 insn_br;  /* branch to target */
99 	u64 target;   /* target value */
100 };
101 
102 #define PLT_TARGET_SIZE   sizeof_field(struct bpf_plt, target)
103 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
104 
105 /* Memory size/value to protect private stack overflow/underflow */
106 #define PRIV_STACK_GUARD_SZ    16
107 #define PRIV_STACK_GUARD_VAL   0xEB9F12345678eb9fULL
108 
109 static inline void emit(const u32 insn, struct jit_ctx *ctx)
110 {
111 	if (ctx->image != NULL && ctx->write)
112 		ctx->image[ctx->idx] = cpu_to_le32(insn);
113 
114 	ctx->idx++;
115 }
116 
117 static inline void emit_u32_data(const u32 data, struct jit_ctx *ctx)
118 {
119 	if (ctx->image != NULL && ctx->write)
120 		ctx->image[ctx->idx] = (__force __le32)data;
121 
122 	ctx->idx++;
123 }
124 
125 static inline void emit_a64_mov_i(const int is64, const int reg,
126 				  const s32 val, struct jit_ctx *ctx)
127 {
128 	u16 hi = val >> 16;
129 	u16 lo = val & 0xffff;
130 
131 	if (hi & 0x8000) {
132 		if (hi == 0xffff) {
133 			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
134 		} else {
135 			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
136 			if (lo != 0xffff)
137 				emit(A64_MOVK(is64, reg, lo, 0), ctx);
138 		}
139 	} else {
140 		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
141 		if (hi)
142 			emit(A64_MOVK(is64, reg, hi, 16), ctx);
143 	}
144 }
145 
146 static int i64_i16_blocks(const u64 val, bool inverse)
147 {
148 	return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
149 	       (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
150 	       (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
151 	       (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
152 }
153 
154 static inline void emit_a64_mov_i64(const int reg, const u64 val,
155 				    struct jit_ctx *ctx)
156 {
157 	u64 nrm_tmp = val, rev_tmp = ~val;
158 	bool inverse;
159 	int shift;
160 
161 	if (!(nrm_tmp >> 32))
162 		return emit_a64_mov_i(0, reg, (u32)val, ctx);
163 
164 	inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
165 	shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
166 					  (fls64(nrm_tmp) - 1)), 16), 0);
167 	if (inverse)
168 		emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
169 	else
170 		emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
171 	shift -= 16;
172 	while (shift >= 0) {
173 		if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
174 			emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
175 		shift -= 16;
176 	}
177 }
178 
179 static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
180 {
181 	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
182 		emit(insn, ctx);
183 }
184 
185 static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx)
186 {
187 	if (IS_ENABLED(CONFIG_CFI))
188 		emit_u32_data(hash, ctx);
189 }
190 
191 /*
192  * Kernel addresses in the vmalloc space use at most 48 bits, and the
193  * remaining bits are guaranteed to be 0x1. So we can compose the address
194  * with a fixed length movn/movk/movk sequence.
195  */
196 static inline void emit_addr_mov_i64(const int reg, const u64 val,
197 				     struct jit_ctx *ctx)
198 {
199 	u64 tmp = val;
200 	int shift = 0;
201 
202 	emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
203 	while (shift < 32) {
204 		tmp >>= 16;
205 		shift += 16;
206 		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
207 	}
208 }
209 
210 static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx)
211 {
212 	long offset;
213 
214 	/* when ctx->ro_image is not allocated or the target is unknown,
215 	 * emit indirect call
216 	 */
217 	if (!ctx->ro_image || !target)
218 		return true;
219 
220 	offset = target - (long)&ctx->ro_image[ctx->idx];
221 	return offset < -SZ_128M || offset >= SZ_128M;
222 }
223 
224 static void emit_direct_call(u64 target, struct jit_ctx *ctx)
225 {
226 	u32 insn;
227 	unsigned long pc;
228 
229 	pc = (unsigned long)&ctx->ro_image[ctx->idx];
230 	insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK);
231 	emit(insn, ctx);
232 }
233 
234 static void emit_indirect_call(u64 target, struct jit_ctx *ctx)
235 {
236 	u8 tmp;
237 
238 	tmp = bpf2a64[TMP_REG_1];
239 	emit_addr_mov_i64(tmp, target, ctx);
240 	emit(A64_BLR(tmp), ctx);
241 }
242 
243 static void emit_call(u64 target, struct jit_ctx *ctx)
244 {
245 	if (should_emit_indirect_call((long)target, ctx))
246 		emit_indirect_call(target, ctx);
247 	else
248 		emit_direct_call(target, ctx);
249 }
250 
251 static inline int bpf2a64_offset(int bpf_insn, int off,
252 				 const struct jit_ctx *ctx)
253 {
254 	/* BPF JMP offset is relative to the next instruction */
255 	bpf_insn++;
256 	/*
257 	 * Whereas arm64 branch instructions encode the offset
258 	 * from the branch itself, so we must subtract 1 from the
259 	 * instruction offset.
260 	 */
261 	return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
262 }
263 
264 static void jit_fill_hole(void *area, unsigned int size)
265 {
266 	__le32 *ptr;
267 	/* We are guaranteed to have aligned memory. */
268 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
269 		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
270 }
271 
272 int bpf_arch_text_invalidate(void *dst, size_t len)
273 {
274 	if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len))
275 		return -EINVAL;
276 
277 	return 0;
278 }
279 
280 static inline int epilogue_offset(const struct jit_ctx *ctx)
281 {
282 	int to = ctx->epilogue_offset;
283 	int from = ctx->idx;
284 
285 	return to - from;
286 }
287 
288 static bool is_addsub_imm(u32 imm)
289 {
290 	/* Either imm12 or shifted imm12. */
291 	return !(imm & ~0xfff) || !(imm & ~0xfff000);
292 }
293 
294 static inline void emit_a64_add_i(const bool is64, const int dst, const int src,
295 				  const int tmp, const s32 imm, struct jit_ctx *ctx)
296 {
297 	if (is_addsub_imm(imm)) {
298 		emit(A64_ADD_I(is64, dst, src, imm), ctx);
299 	} else if (is_addsub_imm(-(u32)imm)) {
300 		emit(A64_SUB_I(is64, dst, src, -imm), ctx);
301 	} else {
302 		emit_a64_mov_i(is64, tmp, imm, ctx);
303 		emit(A64_ADD(is64, dst, src, tmp), ctx);
304 	}
305 }
306 
307 /*
308  * There are 3 types of AArch64 LDR/STR (immediate) instruction:
309  * Post-index, Pre-index, Unsigned offset.
310  *
311  * For BPF ldr/str, the "unsigned offset" type is sufficient.
312  *
313  * "Unsigned offset" type LDR(immediate) format:
314  *
315  *    3                   2                   1                   0
316  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
317  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
318  * |x x|1 1 1 0 0 1 0 1|         imm12         |    Rn   |    Rt   |
319  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
320  * scale
321  *
322  * "Unsigned offset" type STR(immediate) format:
323  *    3                   2                   1                   0
324  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
325  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
326  * |x x|1 1 1 0 0 1 0 0|         imm12         |    Rn   |    Rt   |
327  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
328  * scale
329  *
330  * The offset is calculated from imm12 and scale in the following way:
331  *
332  * offset = (u64)imm12 << scale
333  */
334 static bool is_lsi_offset(int offset, int scale)
335 {
336 	if (offset < 0)
337 		return false;
338 
339 	if (offset > (0xFFF << scale))
340 		return false;
341 
342 	if (offset & ((1 << scale) - 1))
343 		return false;
344 
345 	return true;
346 }
347 
348 /* generated main prog prologue:
349  *      bti c // if CONFIG_ARM64_BTI_KERNEL
350  *      mov x9, lr
351  *      nop  // POKE_OFFSET
352  *      paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
353  *      stp x29, lr, [sp, #-16]!
354  *      mov x29, sp
355  *      stp xzr, x26, [sp, #-16]!
356  *      mov x26, sp
357  *      // PROLOGUE_OFFSET
358  *	// save callee-saved registers
359  */
360 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
361 {
362 	const bool is_main_prog = !bpf_is_subprog(ctx->prog);
363 	const u8 ptr = bpf2a64[TCCNT_PTR];
364 
365 	if (is_main_prog) {
366 		/* Initialize tail_call_cnt. */
367 		emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx);
368 		emit(A64_MOV(1, ptr, A64_SP), ctx);
369 	} else
370 		emit(A64_PUSH(ptr, ptr, A64_SP), ctx);
371 }
372 
373 static void find_used_callee_regs(struct jit_ctx *ctx)
374 {
375 	int i;
376 	const struct bpf_prog *prog = ctx->prog;
377 	const struct bpf_insn *insn = &prog->insnsi[0];
378 	int reg_used = 0;
379 
380 	for (i = 0; i < prog->len; i++, insn++) {
381 		if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
382 			reg_used |= 1;
383 
384 		if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
385 			reg_used |= 2;
386 
387 		if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
388 			reg_used |= 4;
389 
390 		if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
391 			reg_used |= 8;
392 
393 		if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) {
394 			ctx->fp_used = true;
395 			reg_used |= 16;
396 		}
397 	}
398 
399 	i = 0;
400 	if (reg_used & 1)
401 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6];
402 
403 	if (reg_used & 2)
404 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7];
405 
406 	if (reg_used & 4)
407 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8];
408 
409 	if (reg_used & 8)
410 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
411 
412 	if (reg_used & 16) {
413 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
414 		if (ctx->priv_sp_used)
415 			ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
416 	}
417 
418 	if (ctx->arena_vm_start)
419 		ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
420 
421 	ctx->nr_used_callee_reg = i;
422 }
423 
424 /* Save callee-saved registers */
425 static void push_callee_regs(struct jit_ctx *ctx)
426 {
427 	int reg1, reg2, i;
428 
429 	/*
430 	 * Program acting as exception boundary should save all ARM64
431 	 * Callee-saved registers as the exception callback needs to recover
432 	 * all ARM64 Callee-saved registers in its epilogue.
433 	 */
434 	if (ctx->prog->aux->exception_boundary) {
435 		emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx);
436 		emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx);
437 		emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
438 		emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
439 		emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
440 		ctx->fp_used = true;
441 	} else {
442 		find_used_callee_regs(ctx);
443 		for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
444 			reg1 = ctx->used_callee_reg[i];
445 			reg2 = ctx->used_callee_reg[i + 1];
446 			emit(A64_PUSH(reg1, reg2, A64_SP), ctx);
447 		}
448 		if (i < ctx->nr_used_callee_reg) {
449 			reg1 = ctx->used_callee_reg[i];
450 			/* keep SP 16-byte aligned */
451 			emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx);
452 		}
453 	}
454 }
455 
456 /* Restore callee-saved registers */
457 static void pop_callee_regs(struct jit_ctx *ctx)
458 {
459 	struct bpf_prog_aux *aux = ctx->prog->aux;
460 	int reg1, reg2, i;
461 
462 	/*
463 	 * Program acting as exception boundary pushes R23 and R24 in addition
464 	 * to BPF callee-saved registers. Exception callback uses the boundary
465 	 * program's stack frame, so recover these extra registers in the above
466 	 * two cases.
467 	 */
468 	if (aux->exception_boundary || aux->exception_cb) {
469 		emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx);
470 		emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx);
471 		emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
472 		emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx);
473 		emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx);
474 	} else {
475 		i = ctx->nr_used_callee_reg - 1;
476 		if (ctx->nr_used_callee_reg % 2 != 0) {
477 			reg1 = ctx->used_callee_reg[i];
478 			emit(A64_POP(reg1, A64_ZR, A64_SP), ctx);
479 			i--;
480 		}
481 		while (i > 0) {
482 			reg1 = ctx->used_callee_reg[i - 1];
483 			reg2 = ctx->used_callee_reg[i];
484 			emit(A64_POP(reg1, reg2, A64_SP), ctx);
485 			i -= 2;
486 		}
487 	}
488 }
489 
490 static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
491 			    struct jit_ctx *ctx)
492 {
493 	const u8 tmp = bpf2a64[TMP_REG_1];
494 
495 	emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
496 	if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
497 		emit(A64_MRS_TPIDR_EL2(tmp), ctx);
498 	else
499 		emit(A64_MRS_TPIDR_EL1(tmp), ctx);
500 	emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
501 }
502 
503 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
504 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
505 
506 /* Offset of nop instruction in bpf prog entry to be poked */
507 #define POKE_OFFSET (BTI_INSNS + 1)
508 
509 /* Tail call offset to jump into */
510 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4)
511 
512 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
513 {
514 	const struct bpf_prog *prog = ctx->prog;
515 	const bool is_main_prog = !bpf_is_subprog(prog);
516 	const u8 fp = bpf2a64[BPF_REG_FP];
517 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
518 	const u8 priv_sp = bpf2a64[PRIVATE_SP];
519 	void __percpu *priv_stack_ptr;
520 	int cur_offset;
521 
522 	/*
523 	 * BPF prog stack layout
524 	 *
525 	 *                         high
526 	 * original A64_SP =>   0:+-----+ BPF prologue
527 	 *                        |FP/LR|
528 	 * current A64_FP =>  -16:+-----+
529 	 *                        | ... | callee saved registers
530 	 * BPF fp register => -64:+-----+ <= (BPF_FP)
531 	 *                        |     |
532 	 *                        | ... | BPF prog stack
533 	 *                        |     |
534 	 *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
535 	 *                        |RSVD | padding
536 	 * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
537 	 *                        |     |
538 	 *                        | ... | Function call stack
539 	 *                        |     |
540 	 *                        +-----+
541 	 *                          low
542 	 *
543 	 */
544 
545 	emit_kcfi(is_main_prog ? cfi_bpf_hash : cfi_bpf_subprog_hash, ctx);
546 	const int idx0 = ctx->idx;
547 
548 	/* bpf function may be invoked by 3 instruction types:
549 	 * 1. bl, attached via freplace to bpf prog via short jump
550 	 * 2. br, attached via freplace to bpf prog via long jump
551 	 * 3. blr, working as a function pointer, used by emit_call.
552 	 * So BTI_JC should used here to support both br and blr.
553 	 */
554 	emit_bti(A64_BTI_JC, ctx);
555 
556 	emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
557 	emit(A64_NOP, ctx);
558 
559 	if (!prog->aux->exception_cb) {
560 		/* Sign lr */
561 		if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
562 			emit(A64_PACIASP, ctx);
563 
564 		/* Save FP and LR registers to stay align with ARM64 AAPCS */
565 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
566 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
567 
568 		prepare_bpf_tail_call_cnt(ctx);
569 
570 		if (!ebpf_from_cbpf && is_main_prog) {
571 			cur_offset = ctx->idx - idx0;
572 			if (cur_offset != PROLOGUE_OFFSET) {
573 				pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
574 						cur_offset, PROLOGUE_OFFSET);
575 				return -1;
576 			}
577 			/* BTI landing pad for the tail call, done with a BR */
578 			emit_bti(A64_BTI_J, ctx);
579 		}
580 		push_callee_regs(ctx);
581 	} else {
582 		/*
583 		 * Exception callback receives FP of Main Program as third
584 		 * parameter
585 		 */
586 		emit(A64_MOV(1, A64_FP, A64_R(2)), ctx);
587 		/*
588 		 * Main Program already pushed the frame record and the
589 		 * callee-saved registers. The exception callback will not push
590 		 * anything and re-use the main program's stack.
591 		 *
592 		 * 12 registers are on the stack
593 		 */
594 		emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
595 	}
596 
597 	/* Stack must be multiples of 16B */
598 	ctx->stack_size = round_up(prog->aux->stack_depth, 16);
599 
600 	if (ctx->fp_used) {
601 		if (ctx->priv_sp_used) {
602 			/* Set up private stack pointer */
603 			priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
604 			emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
605 			emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
606 		} else {
607 			/* Set up BPF prog stack base register */
608 			emit(A64_MOV(1, fp, A64_SP), ctx);
609 		}
610 	}
611 
612 	/* Set up function call stack */
613 	if (ctx->stack_size && !ctx->priv_sp_used)
614 		emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
615 
616 	if (ctx->arena_vm_start)
617 		emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx);
618 
619 	return 0;
620 }
621 
622 static int emit_bpf_tail_call(struct jit_ctx *ctx)
623 {
624 	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
625 	const u8 r2 = bpf2a64[BPF_REG_2];
626 	const u8 r3 = bpf2a64[BPF_REG_3];
627 
628 	const u8 tmp = bpf2a64[TMP_REG_1];
629 	const u8 prg = bpf2a64[TMP_REG_2];
630 	const u8 tcc = bpf2a64[TMP_REG_3];
631 	const u8 ptr = bpf2a64[TCCNT_PTR];
632 	size_t off;
633 	__le32 *branch1 = NULL;
634 	__le32 *branch2 = NULL;
635 	__le32 *branch3 = NULL;
636 
637 	/* if (index >= array->map.max_entries)
638 	 *     goto out;
639 	 */
640 	off = offsetof(struct bpf_array, map.max_entries);
641 	emit_a64_mov_i64(tmp, off, ctx);
642 	emit(A64_LDR32(tmp, r2, tmp), ctx);
643 	emit(A64_MOV(0, r3, r3), ctx);
644 	emit(A64_CMP(0, r3, tmp), ctx);
645 	branch1 = ctx->image + ctx->idx;
646 	emit(A64_NOP, ctx);
647 
648 	/*
649 	 * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT)
650 	 *     goto out;
651 	 */
652 	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
653 	emit(A64_LDR64I(tcc, ptr, 0), ctx);
654 	emit(A64_CMP(1, tcc, tmp), ctx);
655 	branch2 = ctx->image + ctx->idx;
656 	emit(A64_NOP, ctx);
657 
658 	/* (*tail_call_cnt_ptr)++; */
659 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
660 
661 	/* prog = array->ptrs[index];
662 	 * if (prog == NULL)
663 	 *     goto out;
664 	 */
665 	off = offsetof(struct bpf_array, ptrs);
666 	emit_a64_mov_i64(tmp, off, ctx);
667 	emit(A64_ADD(1, tmp, r2, tmp), ctx);
668 	emit(A64_LSL(1, prg, r3, 3), ctx);
669 	emit(A64_LDR64(prg, tmp, prg), ctx);
670 	branch3 = ctx->image + ctx->idx;
671 	emit(A64_NOP, ctx);
672 
673 	/* Update tail_call_cnt if the slot is populated. */
674 	emit(A64_STR64I(tcc, ptr, 0), ctx);
675 
676 	/* restore SP */
677 	if (ctx->stack_size && !ctx->priv_sp_used)
678 		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
679 
680 	pop_callee_regs(ctx);
681 
682 	/* goto *(prog->bpf_func + prologue_offset); */
683 	off = offsetof(struct bpf_prog, bpf_func);
684 	emit_a64_mov_i64(tmp, off, ctx);
685 	emit(A64_LDR64(tmp, prg, tmp), ctx);
686 	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
687 	emit(A64_BR(tmp), ctx);
688 
689 	if (ctx->image) {
690 		off = &ctx->image[ctx->idx] - branch1;
691 		*branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off));
692 
693 		off = &ctx->image[ctx->idx] - branch2;
694 		*branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off));
695 
696 		off = &ctx->image[ctx->idx] - branch3;
697 		*branch3 = cpu_to_le32(A64_CBZ(1, prg, off));
698 	}
699 
700 	return 0;
701 }
702 
703 static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
704 {
705 	const s32 imm = insn->imm;
706 	const s16 off = insn->off;
707 	const u8 code = insn->code;
708 	const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
709 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
710 	const u8 dst = bpf2a64[insn->dst_reg];
711 	const u8 src = bpf2a64[insn->src_reg];
712 	const u8 tmp = bpf2a64[TMP_REG_1];
713 	u8 reg;
714 
715 	switch (imm) {
716 	case BPF_LOAD_ACQ:
717 		reg = src;
718 		break;
719 	case BPF_STORE_REL:
720 		reg = dst;
721 		break;
722 	default:
723 		pr_err_once("unknown atomic load/store op code %02x\n", imm);
724 		return -EINVAL;
725 	}
726 
727 	if (off) {
728 		emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
729 		reg = tmp;
730 	}
731 	if (arena) {
732 		emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
733 		reg = tmp;
734 	}
735 
736 	switch (imm) {
737 	case BPF_LOAD_ACQ:
738 		switch (BPF_SIZE(code)) {
739 		case BPF_B:
740 			emit(A64_LDARB(dst, reg), ctx);
741 			break;
742 		case BPF_H:
743 			emit(A64_LDARH(dst, reg), ctx);
744 			break;
745 		case BPF_W:
746 			emit(A64_LDAR32(dst, reg), ctx);
747 			break;
748 		case BPF_DW:
749 			emit(A64_LDAR64(dst, reg), ctx);
750 			break;
751 		}
752 		break;
753 	case BPF_STORE_REL:
754 		switch (BPF_SIZE(code)) {
755 		case BPF_B:
756 			emit(A64_STLRB(src, reg), ctx);
757 			break;
758 		case BPF_H:
759 			emit(A64_STLRH(src, reg), ctx);
760 			break;
761 		case BPF_W:
762 			emit(A64_STLR32(src, reg), ctx);
763 			break;
764 		case BPF_DW:
765 			emit(A64_STLR64(src, reg), ctx);
766 			break;
767 		}
768 		break;
769 	default:
770 		pr_err_once("unexpected atomic load/store op code %02x\n",
771 			    imm);
772 		return -EINVAL;
773 	}
774 
775 	return 0;
776 }
777 
778 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
779 {
780 	const u8 code = insn->code;
781 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
782 	const u8 dst = bpf2a64[insn->dst_reg];
783 	const u8 src = bpf2a64[insn->src_reg];
784 	const u8 tmp = bpf2a64[TMP_REG_1];
785 	const u8 tmp2 = bpf2a64[TMP_REG_2];
786 	const bool isdw = BPF_SIZE(code) == BPF_DW;
787 	const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
788 	const s16 off = insn->off;
789 	u8 reg = dst;
790 
791 	if (off) {
792 		emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
793 		reg = tmp;
794 	}
795 	if (arena) {
796 		emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
797 		reg = tmp;
798 	}
799 
800 	switch (insn->imm) {
801 	/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
802 	case BPF_ADD:
803 		emit(A64_STADD(isdw, reg, src), ctx);
804 		break;
805 	case BPF_AND:
806 		emit(A64_MVN(isdw, tmp2, src), ctx);
807 		emit(A64_STCLR(isdw, reg, tmp2), ctx);
808 		break;
809 	case BPF_OR:
810 		emit(A64_STSET(isdw, reg, src), ctx);
811 		break;
812 	case BPF_XOR:
813 		emit(A64_STEOR(isdw, reg, src), ctx);
814 		break;
815 	/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
816 	case BPF_ADD | BPF_FETCH:
817 		emit(A64_LDADDAL(isdw, src, reg, src), ctx);
818 		break;
819 	case BPF_AND | BPF_FETCH:
820 		emit(A64_MVN(isdw, tmp2, src), ctx);
821 		emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
822 		break;
823 	case BPF_OR | BPF_FETCH:
824 		emit(A64_LDSETAL(isdw, src, reg, src), ctx);
825 		break;
826 	case BPF_XOR | BPF_FETCH:
827 		emit(A64_LDEORAL(isdw, src, reg, src), ctx);
828 		break;
829 	/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
830 	case BPF_XCHG:
831 		emit(A64_SWPAL(isdw, src, reg, src), ctx);
832 		break;
833 	/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
834 	case BPF_CMPXCHG:
835 		emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
836 		break;
837 	default:
838 		pr_err_once("unknown atomic op code %02x\n", insn->imm);
839 		return -EINVAL;
840 	}
841 
842 	return 0;
843 }
844 
845 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
846 {
847 	const u8 code = insn->code;
848 	const u8 dst = bpf2a64[insn->dst_reg];
849 	const u8 src = bpf2a64[insn->src_reg];
850 	const u8 tmp = bpf2a64[TMP_REG_1];
851 	const u8 tmp2 = bpf2a64[TMP_REG_2];
852 	const u8 tmp3 = bpf2a64[TMP_REG_3];
853 	const int i = insn - ctx->prog->insnsi;
854 	const s32 imm = insn->imm;
855 	const s16 off = insn->off;
856 	const bool isdw = BPF_SIZE(code) == BPF_DW;
857 	u8 reg = dst;
858 	s32 jmp_offset;
859 
860 	if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
861 		/* ll_sc based atomics don't support unsafe pointers yet. */
862 		pr_err_once("unknown atomic opcode %02x\n", code);
863 		return -EINVAL;
864 	}
865 
866 	if (off) {
867 		emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
868 		reg = tmp;
869 	}
870 
871 	if (imm == BPF_ADD || imm == BPF_AND ||
872 	    imm == BPF_OR || imm == BPF_XOR) {
873 		/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
874 		emit(A64_LDXR(isdw, tmp2, reg), ctx);
875 		if (imm == BPF_ADD)
876 			emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
877 		else if (imm == BPF_AND)
878 			emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
879 		else if (imm == BPF_OR)
880 			emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
881 		else
882 			emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
883 		emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
884 		jmp_offset = -3;
885 		check_imm19(jmp_offset);
886 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
887 	} else if (imm == (BPF_ADD | BPF_FETCH) ||
888 		   imm == (BPF_AND | BPF_FETCH) ||
889 		   imm == (BPF_OR | BPF_FETCH) ||
890 		   imm == (BPF_XOR | BPF_FETCH)) {
891 		/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
892 		const u8 ax = bpf2a64[BPF_REG_AX];
893 
894 		emit(A64_MOV(isdw, ax, src), ctx);
895 		emit(A64_LDXR(isdw, src, reg), ctx);
896 		if (imm == (BPF_ADD | BPF_FETCH))
897 			emit(A64_ADD(isdw, tmp2, src, ax), ctx);
898 		else if (imm == (BPF_AND | BPF_FETCH))
899 			emit(A64_AND(isdw, tmp2, src, ax), ctx);
900 		else if (imm == (BPF_OR | BPF_FETCH))
901 			emit(A64_ORR(isdw, tmp2, src, ax), ctx);
902 		else
903 			emit(A64_EOR(isdw, tmp2, src, ax), ctx);
904 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
905 		jmp_offset = -3;
906 		check_imm19(jmp_offset);
907 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
908 		emit(A64_DMB_ISH, ctx);
909 	} else if (imm == BPF_XCHG) {
910 		/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
911 		emit(A64_MOV(isdw, tmp2, src), ctx);
912 		emit(A64_LDXR(isdw, src, reg), ctx);
913 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
914 		jmp_offset = -2;
915 		check_imm19(jmp_offset);
916 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
917 		emit(A64_DMB_ISH, ctx);
918 	} else if (imm == BPF_CMPXCHG) {
919 		/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
920 		const u8 r0 = bpf2a64[BPF_REG_0];
921 
922 		emit(A64_MOV(isdw, tmp2, r0), ctx);
923 		emit(A64_LDXR(isdw, r0, reg), ctx);
924 		emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
925 		jmp_offset = 4;
926 		check_imm19(jmp_offset);
927 		emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
928 		emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
929 		jmp_offset = -4;
930 		check_imm19(jmp_offset);
931 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
932 		emit(A64_DMB_ISH, ctx);
933 	} else {
934 		pr_err_once("unknown atomic op code %02x\n", imm);
935 		return -EINVAL;
936 	}
937 
938 	return 0;
939 }
940 
941 void dummy_tramp(void);
942 
943 asm (
944 "	.pushsection .text, \"ax\", @progbits\n"
945 "	.global dummy_tramp\n"
946 "	.type dummy_tramp, %function\n"
947 "dummy_tramp:"
948 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
949 "	bti j\n" /* dummy_tramp is called via "br x10" */
950 #endif
951 "	mov x10, x30\n"
952 "	mov x30, x9\n"
953 "	ret x10\n"
954 "	.size dummy_tramp, .-dummy_tramp\n"
955 "	.popsection\n"
956 );
957 
958 /* build a plt initialized like this:
959  *
960  * plt:
961  *      ldr tmp, target
962  *      br tmp
963  * target:
964  *      .quad dummy_tramp
965  *
966  * when a long jump trampoline is attached, target is filled with the
967  * trampoline address, and when the trampoline is removed, target is
968  * restored to dummy_tramp address.
969  */
970 static void build_plt(struct jit_ctx *ctx)
971 {
972 	const u8 tmp = bpf2a64[TMP_REG_1];
973 	struct bpf_plt *plt = NULL;
974 
975 	/* make sure target is 64-bit aligned */
976 	if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
977 		emit(A64_NOP, ctx);
978 
979 	plt = (struct bpf_plt *)(ctx->image + ctx->idx);
980 	/* plt is called via bl, no BTI needed here */
981 	emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
982 	emit(A64_BR(tmp), ctx);
983 
984 	if (ctx->image)
985 		plt->target = (u64)&dummy_tramp;
986 }
987 
988 /* Clobbers BPF registers 1-4, aka x0-x3 */
989 static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
990 {
991 	const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */
992 	u8 k = get_spectre_bhb_loop_value();
993 
994 	if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
995 	    cpu_mitigations_off() || __nospectre_bhb ||
996 	    arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
997 		return;
998 
999 	if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
1000 		return;
1001 
1002 	if (supports_clearbhb(SCOPE_SYSTEM)) {
1003 		emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx);
1004 		return;
1005 	}
1006 
1007 	if (k) {
1008 		emit_a64_mov_i64(r1, k, ctx);
1009 		emit(A64_B(1), ctx);
1010 		emit(A64_SUBS_I(true, r1, r1, 1), ctx);
1011 		emit(A64_B_(A64_COND_NE, -2), ctx);
1012 		emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx);
1013 		emit(aarch64_insn_get_isb_value(), ctx);
1014 	}
1015 
1016 	if (is_spectre_bhb_fw_mitigated()) {
1017 		emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR,
1018 			       ARM_SMCCC_ARCH_WORKAROUND_3), ctx);
1019 		switch (arm_smccc_1_1_get_conduit()) {
1020 		case SMCCC_CONDUIT_HVC:
1021 			emit(aarch64_insn_get_hvc_value(), ctx);
1022 			break;
1023 		case SMCCC_CONDUIT_SMC:
1024 			emit(aarch64_insn_get_smc_value(), ctx);
1025 			break;
1026 		default:
1027 			pr_err_once("Firmware mitigation enabled with unknown conduit\n");
1028 		}
1029 	}
1030 }
1031 
1032 static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
1033 {
1034 	const u8 r0 = bpf2a64[BPF_REG_0];
1035 	const u8 ptr = bpf2a64[TCCNT_PTR];
1036 
1037 	/* We're done with BPF stack */
1038 	if (ctx->stack_size && !ctx->priv_sp_used)
1039 		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
1040 
1041 	pop_callee_regs(ctx);
1042 
1043 	emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
1044 
1045 	if (was_classic)
1046 		build_bhb_mitigation(ctx);
1047 
1048 	/* Restore FP/LR registers */
1049 	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
1050 
1051 	/* Move the return value from bpf:r0 (aka x7) to x0 */
1052 	emit(A64_MOV(1, A64_R(0), r0), ctx);
1053 
1054 	/* Authenticate lr */
1055 	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
1056 		emit(A64_AUTIASP, ctx);
1057 
1058 	emit(A64_RET(A64_LR), ctx);
1059 }
1060 
1061 /*
1062  * Metadata encoding for exception handling in JITed code.
1063  *
1064  * Format of `fixup` field in `struct exception_table_entry`:
1065  *
1066  * Bit layout of `fixup` (32-bit):
1067  *
1068  * +-----------+--------+-----------+-----------+----------+
1069  * |   31-27   | 26-22  |     21    |   20-16   |   15-0   |
1070  * |           |        |           |           |          |
1071  * | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG |  OFFSET  |
1072  * +-----------+--------+-----------+-----------+----------+
1073  *
1074  * - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
1075  * - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
1076  *                       accessing the arena region.
1077  * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
1078  * - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
1079  *                       DONT_CLEAR if it is a store instruction.
1080  */
1081 
1082 #define BPF_FIXUP_OFFSET_MASK      GENMASK(15, 0)
1083 #define BPF_FIXUP_ARENA_REG_MASK   GENMASK(20, 16)
1084 #define BPF_ARENA_ACCESS           BIT(21)
1085 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
1086 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
1087 
1088 bool ex_handler_bpf(const struct exception_table_entry *ex,
1089 		    struct pt_regs *regs)
1090 {
1091 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
1092 	s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
1093 	int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
1094 	bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
1095 	bool is_write = (dst_reg == DONT_CLEAR);
1096 	unsigned long addr;
1097 
1098 	if (is_arena) {
1099 		addr = regs->regs[arena_reg] + off;
1100 		bpf_prog_report_arena_violation(is_write, addr, regs->pc);
1101 	}
1102 
1103 	if (dst_reg != DONT_CLEAR)
1104 		regs->regs[dst_reg] = 0;
1105 	/* Skip the faulting instruction */
1106 	regs->pc += AARCH64_INSN_SIZE;
1107 
1108 	return true;
1109 }
1110 
1111 /* For accesses to BTF pointers, add an entry to the exception table */
1112 static int add_exception_handler(const struct bpf_insn *insn,
1113 				 struct jit_ctx *ctx,
1114 				 int dst_reg)
1115 {
1116 	off_t ins_offset;
1117 	s16 off = insn->off;
1118 	bool is_arena;
1119 	int arena_reg;
1120 	unsigned long pc;
1121 	struct exception_table_entry *ex;
1122 
1123 	if (!ctx->image)
1124 		/* First pass */
1125 		return 0;
1126 
1127 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
1128 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
1129 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
1130 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32SX &&
1131 	    BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
1132 		return 0;
1133 
1134 	is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
1135 		   (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) ||
1136 		   (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);
1137 
1138 	if (!ctx->prog->aux->extable ||
1139 	    WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
1140 		return -EINVAL;
1141 
1142 	ex = &ctx->prog->aux->extable[ctx->exentry_idx];
1143 	pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
1144 
1145 	/*
1146 	 * This is the relative offset of the instruction that may fault from
1147 	 * the exception table itself. This will be written to the exception
1148 	 * table and if this instruction faults, the destination register will
1149 	 * be set to '0' and the execution will jump to the next instruction.
1150 	 */
1151 	ins_offset = pc - (long)&ex->insn;
1152 	if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
1153 		return -ERANGE;
1154 
1155 	/*
1156 	 * The offsets above have been calculated using the RO buffer but we
1157 	 * need to use the R/W buffer for writes.
1158 	 * switch ex to rw buffer for writing.
1159 	 */
1160 	ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
1161 
1162 	ex->insn = ins_offset;
1163 
1164 	if (BPF_CLASS(insn->code) != BPF_LDX)
1165 		dst_reg = DONT_CLEAR;
1166 
1167 	ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
1168 
1169 	if (is_arena) {
1170 		ex->fixup |= BPF_ARENA_ACCESS;
1171 		/*
1172 		 * insn->src_reg/dst_reg holds the address in the arena region with upper 32-bits
1173 		 * being zero because of a preceding addr_space_cast(r<n>, 0x0, 0x1) instruction.
1174 		 * This address is adjusted with the addition of arena_vm_start (see the
1175 		 * implementation of BPF_PROBE_MEM32 and BPF_PROBE_ATOMIC) before being used for the
1176 		 * memory access. Pass the reg holding the unmodified 32-bit address to
1177 		 * ex_handler_bpf.
1178 		 */
1179 		if (BPF_CLASS(insn->code) == BPF_LDX)
1180 			arena_reg = bpf2a64[insn->src_reg];
1181 		else
1182 			arena_reg = bpf2a64[insn->dst_reg];
1183 
1184 		ex->fixup |=  FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
1185 			      FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
1186 	}
1187 
1188 	ex->type = EX_TYPE_BPF;
1189 
1190 	ctx->exentry_idx++;
1191 	return 0;
1192 }
1193 
1194 /* JITs an eBPF instruction.
1195  * Returns:
1196  * 0  - successfully JITed an 8-byte eBPF instruction.
1197  * >0 - successfully JITed a 16-byte eBPF instruction.
1198  * <0 - failed to JIT.
1199  */
1200 static int build_insn(const struct bpf_verifier_env *env, const struct bpf_insn *insn,
1201 		      struct jit_ctx *ctx, bool extra_pass)
1202 {
1203 	const u8 code = insn->code;
1204 	u8 dst = bpf2a64[insn->dst_reg];
1205 	u8 src = bpf2a64[insn->src_reg];
1206 	const u8 tmp = bpf2a64[TMP_REG_1];
1207 	const u8 tmp2 = bpf2a64[TMP_REG_2];
1208 	const u8 tmp3 = bpf2a64[TMP_REG_3];
1209 	const u8 fp = bpf2a64[BPF_REG_FP];
1210 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
1211 	const u8 priv_sp = bpf2a64[PRIVATE_SP];
1212 	const s16 off = insn->off;
1213 	const s32 imm = insn->imm;
1214 	const int i = insn - ctx->prog->insnsi;
1215 	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
1216 			  BPF_CLASS(code) == BPF_JMP;
1217 	u8 jmp_cond;
1218 	s32 jmp_offset;
1219 	u32 a64_insn;
1220 	u8 src_adj;
1221 	u8 dst_adj;
1222 	int off_adj;
1223 	int ret;
1224 	bool sign_extend;
1225 
1226 	if (bpf_insn_is_indirect_target(env, ctx->prog, i))
1227 		emit_bti(A64_BTI_J, ctx);
1228 
1229 	switch (code) {
1230 	/* dst = src */
1231 	case BPF_ALU | BPF_MOV | BPF_X:
1232 	case BPF_ALU64 | BPF_MOV | BPF_X:
1233 		if (insn_is_cast_user(insn)) {
1234 			emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
1235 			emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
1236 			emit(A64_LSL(1, dst, dst, 32), ctx);
1237 			emit(A64_CBZ(1, tmp, 2), ctx);
1238 			emit(A64_ORR(1, tmp, dst, tmp), ctx);
1239 			emit(A64_MOV(1, dst, tmp), ctx);
1240 			break;
1241 		} else if (insn_is_mov_percpu_addr(insn)) {
1242 			if (dst != src)
1243 				emit(A64_MOV(1, dst, src), ctx);
1244 			if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
1245 				emit(A64_MRS_TPIDR_EL2(tmp), ctx);
1246 			else
1247 				emit(A64_MRS_TPIDR_EL1(tmp), ctx);
1248 			emit(A64_ADD(1, dst, dst, tmp), ctx);
1249 			break;
1250 		}
1251 		switch (insn->off) {
1252 		case 0:
1253 			emit(A64_MOV(is64, dst, src), ctx);
1254 			break;
1255 		case 8:
1256 			emit(A64_SXTB(is64, dst, src), ctx);
1257 			break;
1258 		case 16:
1259 			emit(A64_SXTH(is64, dst, src), ctx);
1260 			break;
1261 		case 32:
1262 			emit(A64_SXTW(is64, dst, src), ctx);
1263 			break;
1264 		}
1265 		break;
1266 	/* dst = dst OP src */
1267 	case BPF_ALU | BPF_ADD | BPF_X:
1268 	case BPF_ALU64 | BPF_ADD | BPF_X:
1269 		emit(A64_ADD(is64, dst, dst, src), ctx);
1270 		break;
1271 	case BPF_ALU | BPF_SUB | BPF_X:
1272 	case BPF_ALU64 | BPF_SUB | BPF_X:
1273 		emit(A64_SUB(is64, dst, dst, src), ctx);
1274 		break;
1275 	case BPF_ALU | BPF_AND | BPF_X:
1276 	case BPF_ALU64 | BPF_AND | BPF_X:
1277 		emit(A64_AND(is64, dst, dst, src), ctx);
1278 		break;
1279 	case BPF_ALU | BPF_OR | BPF_X:
1280 	case BPF_ALU64 | BPF_OR | BPF_X:
1281 		emit(A64_ORR(is64, dst, dst, src), ctx);
1282 		break;
1283 	case BPF_ALU | BPF_XOR | BPF_X:
1284 	case BPF_ALU64 | BPF_XOR | BPF_X:
1285 		emit(A64_EOR(is64, dst, dst, src), ctx);
1286 		break;
1287 	case BPF_ALU | BPF_MUL | BPF_X:
1288 	case BPF_ALU64 | BPF_MUL | BPF_X:
1289 		emit(A64_MUL(is64, dst, dst, src), ctx);
1290 		break;
1291 	case BPF_ALU | BPF_DIV | BPF_X:
1292 	case BPF_ALU64 | BPF_DIV | BPF_X:
1293 		if (!off)
1294 			emit(A64_UDIV(is64, dst, dst, src), ctx);
1295 		else
1296 			emit(A64_SDIV(is64, dst, dst, src), ctx);
1297 		break;
1298 	case BPF_ALU | BPF_MOD | BPF_X:
1299 	case BPF_ALU64 | BPF_MOD | BPF_X:
1300 		if (!off)
1301 			emit(A64_UDIV(is64, tmp, dst, src), ctx);
1302 		else
1303 			emit(A64_SDIV(is64, tmp, dst, src), ctx);
1304 		emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
1305 		break;
1306 	case BPF_ALU | BPF_LSH | BPF_X:
1307 	case BPF_ALU64 | BPF_LSH | BPF_X:
1308 		emit(A64_LSLV(is64, dst, dst, src), ctx);
1309 		break;
1310 	case BPF_ALU | BPF_RSH | BPF_X:
1311 	case BPF_ALU64 | BPF_RSH | BPF_X:
1312 		emit(A64_LSRV(is64, dst, dst, src), ctx);
1313 		break;
1314 	case BPF_ALU | BPF_ARSH | BPF_X:
1315 	case BPF_ALU64 | BPF_ARSH | BPF_X:
1316 		emit(A64_ASRV(is64, dst, dst, src), ctx);
1317 		break;
1318 	/* dst = -dst */
1319 	case BPF_ALU | BPF_NEG:
1320 	case BPF_ALU64 | BPF_NEG:
1321 		emit(A64_NEG(is64, dst, dst), ctx);
1322 		break;
1323 	/* dst = BSWAP##imm(dst) */
1324 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1325 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1326 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1327 #ifdef CONFIG_CPU_BIG_ENDIAN
1328 		if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE)
1329 			goto emit_bswap_uxt;
1330 #else /* !CONFIG_CPU_BIG_ENDIAN */
1331 		if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE)
1332 			goto emit_bswap_uxt;
1333 #endif
1334 		switch (imm) {
1335 		case 16:
1336 			emit(A64_REV16(is64, dst, dst), ctx);
1337 			/* zero-extend 16 bits into 64 bits */
1338 			emit(A64_UXTH(is64, dst, dst), ctx);
1339 			break;
1340 		case 32:
1341 			emit(A64_REV32(0, dst, dst), ctx);
1342 			/* upper 32 bits already cleared */
1343 			break;
1344 		case 64:
1345 			emit(A64_REV64(dst, dst), ctx);
1346 			break;
1347 		}
1348 		break;
1349 emit_bswap_uxt:
1350 		switch (imm) {
1351 		case 16:
1352 			/* zero-extend 16 bits into 64 bits */
1353 			emit(A64_UXTH(is64, dst, dst), ctx);
1354 			break;
1355 		case 32:
1356 			/* zero-extend 32 bits into 64 bits */
1357 			emit(A64_UXTW(is64, dst, dst), ctx);
1358 			break;
1359 		case 64:
1360 			/* nop */
1361 			break;
1362 		}
1363 		break;
1364 	/* dst = imm */
1365 	case BPF_ALU | BPF_MOV | BPF_K:
1366 	case BPF_ALU64 | BPF_MOV | BPF_K:
1367 		emit_a64_mov_i(is64, dst, imm, ctx);
1368 		break;
1369 	/* dst = dst OP imm */
1370 	case BPF_ALU | BPF_ADD | BPF_K:
1371 	case BPF_ALU64 | BPF_ADD | BPF_K:
1372 		emit_a64_add_i(is64, dst, dst, tmp, imm, ctx);
1373 		break;
1374 	case BPF_ALU | BPF_SUB | BPF_K:
1375 	case BPF_ALU64 | BPF_SUB | BPF_K:
1376 		if (is_addsub_imm(imm)) {
1377 			emit(A64_SUB_I(is64, dst, dst, imm), ctx);
1378 		} else if (is_addsub_imm(-(u32)imm)) {
1379 			emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
1380 		} else {
1381 			emit_a64_mov_i(is64, tmp, imm, ctx);
1382 			emit(A64_SUB(is64, dst, dst, tmp), ctx);
1383 		}
1384 		break;
1385 	case BPF_ALU | BPF_AND | BPF_K:
1386 	case BPF_ALU64 | BPF_AND | BPF_K:
1387 		a64_insn = A64_AND_I(is64, dst, dst, imm);
1388 		if (a64_insn != AARCH64_BREAK_FAULT) {
1389 			emit(a64_insn, ctx);
1390 		} else {
1391 			emit_a64_mov_i(is64, tmp, imm, ctx);
1392 			emit(A64_AND(is64, dst, dst, tmp), ctx);
1393 		}
1394 		break;
1395 	case BPF_ALU | BPF_OR | BPF_K:
1396 	case BPF_ALU64 | BPF_OR | BPF_K:
1397 		a64_insn = A64_ORR_I(is64, dst, dst, imm);
1398 		if (a64_insn != AARCH64_BREAK_FAULT) {
1399 			emit(a64_insn, ctx);
1400 		} else {
1401 			emit_a64_mov_i(is64, tmp, imm, ctx);
1402 			emit(A64_ORR(is64, dst, dst, tmp), ctx);
1403 		}
1404 		break;
1405 	case BPF_ALU | BPF_XOR | BPF_K:
1406 	case BPF_ALU64 | BPF_XOR | BPF_K:
1407 		a64_insn = A64_EOR_I(is64, dst, dst, imm);
1408 		if (a64_insn != AARCH64_BREAK_FAULT) {
1409 			emit(a64_insn, ctx);
1410 		} else {
1411 			emit_a64_mov_i(is64, tmp, imm, ctx);
1412 			emit(A64_EOR(is64, dst, dst, tmp), ctx);
1413 		}
1414 		break;
1415 	case BPF_ALU | BPF_MUL | BPF_K:
1416 	case BPF_ALU64 | BPF_MUL | BPF_K:
1417 		emit_a64_mov_i(is64, tmp, imm, ctx);
1418 		emit(A64_MUL(is64, dst, dst, tmp), ctx);
1419 		break;
1420 	case BPF_ALU | BPF_DIV | BPF_K:
1421 	case BPF_ALU64 | BPF_DIV | BPF_K:
1422 		emit_a64_mov_i(is64, tmp, imm, ctx);
1423 		if (!off)
1424 			emit(A64_UDIV(is64, dst, dst, tmp), ctx);
1425 		else
1426 			emit(A64_SDIV(is64, dst, dst, tmp), ctx);
1427 		break;
1428 	case BPF_ALU | BPF_MOD | BPF_K:
1429 	case BPF_ALU64 | BPF_MOD | BPF_K:
1430 		emit_a64_mov_i(is64, tmp2, imm, ctx);
1431 		if (!off)
1432 			emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
1433 		else
1434 			emit(A64_SDIV(is64, tmp, dst, tmp2), ctx);
1435 		emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
1436 		break;
1437 	case BPF_ALU | BPF_LSH | BPF_K:
1438 	case BPF_ALU64 | BPF_LSH | BPF_K:
1439 		emit(A64_LSL(is64, dst, dst, imm), ctx);
1440 		break;
1441 	case BPF_ALU | BPF_RSH | BPF_K:
1442 	case BPF_ALU64 | BPF_RSH | BPF_K:
1443 		emit(A64_LSR(is64, dst, dst, imm), ctx);
1444 		break;
1445 	case BPF_ALU | BPF_ARSH | BPF_K:
1446 	case BPF_ALU64 | BPF_ARSH | BPF_K:
1447 		emit(A64_ASR(is64, dst, dst, imm), ctx);
1448 		break;
1449 
1450 	/* JUMP reg */
1451 	case BPF_JMP | BPF_JA | BPF_X:
1452 		emit(A64_BR(dst), ctx);
1453 		break;
1454 	/* JUMP off */
1455 	case BPF_JMP | BPF_JA:
1456 	case BPF_JMP32 | BPF_JA:
1457 		if (BPF_CLASS(code) == BPF_JMP)
1458 			jmp_offset = bpf2a64_offset(i, off, ctx);
1459 		else
1460 			jmp_offset = bpf2a64_offset(i, imm, ctx);
1461 		check_imm26(jmp_offset);
1462 		emit(A64_B(jmp_offset), ctx);
1463 		break;
1464 	/* IF (dst COND src) JUMP off */
1465 	case BPF_JMP | BPF_JEQ | BPF_X:
1466 	case BPF_JMP | BPF_JGT | BPF_X:
1467 	case BPF_JMP | BPF_JLT | BPF_X:
1468 	case BPF_JMP | BPF_JGE | BPF_X:
1469 	case BPF_JMP | BPF_JLE | BPF_X:
1470 	case BPF_JMP | BPF_JNE | BPF_X:
1471 	case BPF_JMP | BPF_JSGT | BPF_X:
1472 	case BPF_JMP | BPF_JSLT | BPF_X:
1473 	case BPF_JMP | BPF_JSGE | BPF_X:
1474 	case BPF_JMP | BPF_JSLE | BPF_X:
1475 	case BPF_JMP32 | BPF_JEQ | BPF_X:
1476 	case BPF_JMP32 | BPF_JGT | BPF_X:
1477 	case BPF_JMP32 | BPF_JLT | BPF_X:
1478 	case BPF_JMP32 | BPF_JGE | BPF_X:
1479 	case BPF_JMP32 | BPF_JLE | BPF_X:
1480 	case BPF_JMP32 | BPF_JNE | BPF_X:
1481 	case BPF_JMP32 | BPF_JSGT | BPF_X:
1482 	case BPF_JMP32 | BPF_JSLT | BPF_X:
1483 	case BPF_JMP32 | BPF_JSGE | BPF_X:
1484 	case BPF_JMP32 | BPF_JSLE | BPF_X:
1485 		emit(A64_CMP(is64, dst, src), ctx);
1486 emit_cond_jmp:
1487 		jmp_offset = bpf2a64_offset(i, off, ctx);
1488 		check_imm19(jmp_offset);
1489 		switch (BPF_OP(code)) {
1490 		case BPF_JEQ:
1491 			jmp_cond = A64_COND_EQ;
1492 			break;
1493 		case BPF_JGT:
1494 			jmp_cond = A64_COND_HI;
1495 			break;
1496 		case BPF_JLT:
1497 			jmp_cond = A64_COND_CC;
1498 			break;
1499 		case BPF_JGE:
1500 			jmp_cond = A64_COND_CS;
1501 			break;
1502 		case BPF_JLE:
1503 			jmp_cond = A64_COND_LS;
1504 			break;
1505 		case BPF_JSET:
1506 		case BPF_JNE:
1507 			jmp_cond = A64_COND_NE;
1508 			break;
1509 		case BPF_JSGT:
1510 			jmp_cond = A64_COND_GT;
1511 			break;
1512 		case BPF_JSLT:
1513 			jmp_cond = A64_COND_LT;
1514 			break;
1515 		case BPF_JSGE:
1516 			jmp_cond = A64_COND_GE;
1517 			break;
1518 		case BPF_JSLE:
1519 			jmp_cond = A64_COND_LE;
1520 			break;
1521 		default:
1522 			return -EFAULT;
1523 		}
1524 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
1525 		break;
1526 	case BPF_JMP | BPF_JSET | BPF_X:
1527 	case BPF_JMP32 | BPF_JSET | BPF_X:
1528 		emit(A64_TST(is64, dst, src), ctx);
1529 		goto emit_cond_jmp;
1530 	/* IF (dst COND imm) JUMP off */
1531 	case BPF_JMP | BPF_JEQ | BPF_K:
1532 	case BPF_JMP | BPF_JGT | BPF_K:
1533 	case BPF_JMP | BPF_JLT | BPF_K:
1534 	case BPF_JMP | BPF_JGE | BPF_K:
1535 	case BPF_JMP | BPF_JLE | BPF_K:
1536 	case BPF_JMP | BPF_JNE | BPF_K:
1537 	case BPF_JMP | BPF_JSGT | BPF_K:
1538 	case BPF_JMP | BPF_JSLT | BPF_K:
1539 	case BPF_JMP | BPF_JSGE | BPF_K:
1540 	case BPF_JMP | BPF_JSLE | BPF_K:
1541 	case BPF_JMP32 | BPF_JEQ | BPF_K:
1542 	case BPF_JMP32 | BPF_JGT | BPF_K:
1543 	case BPF_JMP32 | BPF_JLT | BPF_K:
1544 	case BPF_JMP32 | BPF_JGE | BPF_K:
1545 	case BPF_JMP32 | BPF_JLE | BPF_K:
1546 	case BPF_JMP32 | BPF_JNE | BPF_K:
1547 	case BPF_JMP32 | BPF_JSGT | BPF_K:
1548 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1549 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1550 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1551 		if (is_addsub_imm(imm)) {
1552 			emit(A64_CMP_I(is64, dst, imm), ctx);
1553 		} else if (is_addsub_imm(-(u32)imm)) {
1554 			emit(A64_CMN_I(is64, dst, -imm), ctx);
1555 		} else {
1556 			emit_a64_mov_i(is64, tmp, imm, ctx);
1557 			emit(A64_CMP(is64, dst, tmp), ctx);
1558 		}
1559 		goto emit_cond_jmp;
1560 	case BPF_JMP | BPF_JSET | BPF_K:
1561 	case BPF_JMP32 | BPF_JSET | BPF_K:
1562 		a64_insn = A64_TST_I(is64, dst, imm);
1563 		if (a64_insn != AARCH64_BREAK_FAULT) {
1564 			emit(a64_insn, ctx);
1565 		} else {
1566 			emit_a64_mov_i(is64, tmp, imm, ctx);
1567 			emit(A64_TST(is64, dst, tmp), ctx);
1568 		}
1569 		goto emit_cond_jmp;
1570 	/* function call */
1571 	case BPF_JMP | BPF_CALL:
1572 	{
1573 		const u8 r0 = bpf2a64[BPF_REG_0];
1574 		bool func_addr_fixed;
1575 		u64 func_addr;
1576 		u32 cpu_offset;
1577 
1578 		/* Implement helper call to bpf_get_smp_processor_id() inline */
1579 		if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
1580 			cpu_offset = offsetof(struct thread_info, cpu);
1581 
1582 			emit(A64_MRS_SP_EL0(tmp), ctx);
1583 			if (is_lsi_offset(cpu_offset, 2)) {
1584 				emit(A64_LDR32I(r0, tmp, cpu_offset), ctx);
1585 			} else {
1586 				emit_a64_mov_i(1, tmp2, cpu_offset, ctx);
1587 				emit(A64_LDR32(r0, tmp, tmp2), ctx);
1588 			}
1589 			break;
1590 		}
1591 
1592 		/* Implement helper call to bpf_get_current_task/_btf() inline */
1593 		if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task ||
1594 					   insn->imm == BPF_FUNC_get_current_task_btf)) {
1595 			emit(A64_MRS_SP_EL0(r0), ctx);
1596 			break;
1597 		}
1598 
1599 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1600 					    &func_addr, &func_addr_fixed);
1601 		if (ret < 0)
1602 			return ret;
1603 		emit_call(func_addr, ctx);
1604 		/*
1605 		 * Call to arch_bpf_timed_may_goto() is emitted by the
1606 		 * verifier and called with custom calling convention with
1607 		 * first argument and return value in BPF_REG_AX (x9).
1608 		 */
1609 		if (func_addr != (u64)arch_bpf_timed_may_goto)
1610 			emit(A64_MOV(1, r0, A64_R(0)), ctx);
1611 		break;
1612 	}
1613 	/* tail call */
1614 	case BPF_JMP | BPF_TAIL_CALL:
1615 		if (emit_bpf_tail_call(ctx))
1616 			return -EFAULT;
1617 		break;
1618 	/* function return */
1619 	case BPF_JMP | BPF_EXIT:
1620 		/* Optimization: when last instruction is EXIT,
1621 		   simply fallthrough to epilogue. */
1622 		if (i == ctx->prog->len - 1)
1623 			break;
1624 		jmp_offset = epilogue_offset(ctx);
1625 		check_imm26(jmp_offset);
1626 		emit(A64_B(jmp_offset), ctx);
1627 		break;
1628 
1629 	/* dst = imm64 */
1630 	case BPF_LD | BPF_IMM | BPF_DW:
1631 	{
1632 		const struct bpf_insn insn1 = insn[1];
1633 		u64 imm64;
1634 
1635 		imm64 = (u64)insn1.imm << 32 | (u32)imm;
1636 		if (bpf_pseudo_func(insn))
1637 			emit_addr_mov_i64(dst, imm64, ctx);
1638 		else
1639 			emit_a64_mov_i64(dst, imm64, ctx);
1640 
1641 		return 1;
1642 	}
1643 
1644 	/* LDX: dst = (u64)*(unsigned size *)(src + off) */
1645 	case BPF_LDX | BPF_MEM | BPF_W:
1646 	case BPF_LDX | BPF_MEM | BPF_H:
1647 	case BPF_LDX | BPF_MEM | BPF_B:
1648 	case BPF_LDX | BPF_MEM | BPF_DW:
1649 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1650 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1651 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1652 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1653 	/* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */
1654 	case BPF_LDX | BPF_MEMSX | BPF_B:
1655 	case BPF_LDX | BPF_MEMSX | BPF_H:
1656 	case BPF_LDX | BPF_MEMSX | BPF_W:
1657 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1658 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1659 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1660 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1661 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1662 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1663 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1664 	case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
1665 	case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
1666 	case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
1667 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
1668 		    BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) {
1669 			emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
1670 			src = tmp2;
1671 		}
1672 		if (src == fp) {
1673 			src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
1674 			off_adj = off + ctx->stack_size;
1675 		} else {
1676 			src_adj = src;
1677 			off_adj = off;
1678 		}
1679 		sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
1680 				BPF_MODE(insn->code) == BPF_PROBE_MEMSX ||
1681 				 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX);
1682 		switch (BPF_SIZE(code)) {
1683 		case BPF_W:
1684 			if (is_lsi_offset(off_adj, 2)) {
1685 				if (sign_extend)
1686 					emit(A64_LDRSWI(dst, src_adj, off_adj), ctx);
1687 				else
1688 					emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
1689 			} else {
1690 				emit_a64_mov_i(1, tmp, off, ctx);
1691 				if (sign_extend)
1692 					emit(A64_LDRSW(dst, src, tmp), ctx);
1693 				else
1694 					emit(A64_LDR32(dst, src, tmp), ctx);
1695 			}
1696 			break;
1697 		case BPF_H:
1698 			if (is_lsi_offset(off_adj, 1)) {
1699 				if (sign_extend)
1700 					emit(A64_LDRSHI(dst, src_adj, off_adj), ctx);
1701 				else
1702 					emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
1703 			} else {
1704 				emit_a64_mov_i(1, tmp, off, ctx);
1705 				if (sign_extend)
1706 					emit(A64_LDRSH(dst, src, tmp), ctx);
1707 				else
1708 					emit(A64_LDRH(dst, src, tmp), ctx);
1709 			}
1710 			break;
1711 		case BPF_B:
1712 			if (is_lsi_offset(off_adj, 0)) {
1713 				if (sign_extend)
1714 					emit(A64_LDRSBI(dst, src_adj, off_adj), ctx);
1715 				else
1716 					emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
1717 			} else {
1718 				emit_a64_mov_i(1, tmp, off, ctx);
1719 				if (sign_extend)
1720 					emit(A64_LDRSB(dst, src, tmp), ctx);
1721 				else
1722 					emit(A64_LDRB(dst, src, tmp), ctx);
1723 			}
1724 			break;
1725 		case BPF_DW:
1726 			if (is_lsi_offset(off_adj, 3)) {
1727 				emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
1728 			} else {
1729 				emit_a64_mov_i(1, tmp, off, ctx);
1730 				emit(A64_LDR64(dst, src, tmp), ctx);
1731 			}
1732 			break;
1733 		}
1734 
1735 		ret = add_exception_handler(insn, ctx, dst);
1736 		if (ret)
1737 			return ret;
1738 		break;
1739 
1740 	/* speculation barrier against v1 and v4 */
1741 	case BPF_ST | BPF_NOSPEC:
1742 		if (alternative_has_cap_likely(ARM64_HAS_SB)) {
1743 			emit(A64_SB, ctx);
1744 		} else {
1745 			emit(A64_DSB_NSH, ctx);
1746 			emit(A64_ISB, ctx);
1747 		}
1748 		break;
1749 
1750 	/* ST: *(size *)(dst + off) = imm */
1751 	case BPF_ST | BPF_MEM | BPF_W:
1752 	case BPF_ST | BPF_MEM | BPF_H:
1753 	case BPF_ST | BPF_MEM | BPF_B:
1754 	case BPF_ST | BPF_MEM | BPF_DW:
1755 	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1756 	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1757 	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1758 	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1759 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1760 			emit(A64_ADD(1, tmp3, dst, arena_vm_base), ctx);
1761 			dst = tmp3;
1762 		}
1763 		if (dst == fp) {
1764 			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
1765 			off_adj = off + ctx->stack_size;
1766 		} else {
1767 			dst_adj = dst;
1768 			off_adj = off;
1769 		}
1770 		/* Load imm to a register then store it */
1771 		emit_a64_mov_i(1, tmp, imm, ctx);
1772 		switch (BPF_SIZE(code)) {
1773 		case BPF_W:
1774 			if (is_lsi_offset(off_adj, 2)) {
1775 				emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
1776 			} else {
1777 				emit_a64_mov_i(1, tmp2, off, ctx);
1778 				emit(A64_STR32(tmp, dst, tmp2), ctx);
1779 			}
1780 			break;
1781 		case BPF_H:
1782 			if (is_lsi_offset(off_adj, 1)) {
1783 				emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
1784 			} else {
1785 				emit_a64_mov_i(1, tmp2, off, ctx);
1786 				emit(A64_STRH(tmp, dst, tmp2), ctx);
1787 			}
1788 			break;
1789 		case BPF_B:
1790 			if (is_lsi_offset(off_adj, 0)) {
1791 				emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
1792 			} else {
1793 				emit_a64_mov_i(1, tmp2, off, ctx);
1794 				emit(A64_STRB(tmp, dst, tmp2), ctx);
1795 			}
1796 			break;
1797 		case BPF_DW:
1798 			if (is_lsi_offset(off_adj, 3)) {
1799 				emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
1800 			} else {
1801 				emit_a64_mov_i(1, tmp2, off, ctx);
1802 				emit(A64_STR64(tmp, dst, tmp2), ctx);
1803 			}
1804 			break;
1805 		}
1806 
1807 		ret = add_exception_handler(insn, ctx, dst);
1808 		if (ret)
1809 			return ret;
1810 		break;
1811 
1812 	/* STX: *(size *)(dst + off) = src */
1813 	case BPF_STX | BPF_MEM | BPF_W:
1814 	case BPF_STX | BPF_MEM | BPF_H:
1815 	case BPF_STX | BPF_MEM | BPF_B:
1816 	case BPF_STX | BPF_MEM | BPF_DW:
1817 	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1818 	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1819 	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1820 	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1821 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1822 			emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
1823 			dst = tmp2;
1824 		}
1825 		if (dst == fp) {
1826 			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
1827 			off_adj = off + ctx->stack_size;
1828 		} else {
1829 			dst_adj = dst;
1830 			off_adj = off;
1831 		}
1832 		switch (BPF_SIZE(code)) {
1833 		case BPF_W:
1834 			if (is_lsi_offset(off_adj, 2)) {
1835 				emit(A64_STR32I(src, dst_adj, off_adj), ctx);
1836 			} else {
1837 				emit_a64_mov_i(1, tmp, off, ctx);
1838 				emit(A64_STR32(src, dst, tmp), ctx);
1839 			}
1840 			break;
1841 		case BPF_H:
1842 			if (is_lsi_offset(off_adj, 1)) {
1843 				emit(A64_STRHI(src, dst_adj, off_adj), ctx);
1844 			} else {
1845 				emit_a64_mov_i(1, tmp, off, ctx);
1846 				emit(A64_STRH(src, dst, tmp), ctx);
1847 			}
1848 			break;
1849 		case BPF_B:
1850 			if (is_lsi_offset(off_adj, 0)) {
1851 				emit(A64_STRBI(src, dst_adj, off_adj), ctx);
1852 			} else {
1853 				emit_a64_mov_i(1, tmp, off, ctx);
1854 				emit(A64_STRB(src, dst, tmp), ctx);
1855 			}
1856 			break;
1857 		case BPF_DW:
1858 			if (is_lsi_offset(off_adj, 3)) {
1859 				emit(A64_STR64I(src, dst_adj, off_adj), ctx);
1860 			} else {
1861 				emit_a64_mov_i(1, tmp, off, ctx);
1862 				emit(A64_STR64(src, dst, tmp), ctx);
1863 			}
1864 			break;
1865 		}
1866 
1867 		ret = add_exception_handler(insn, ctx, dst);
1868 		if (ret)
1869 			return ret;
1870 		break;
1871 
1872 	case BPF_STX | BPF_ATOMIC | BPF_B:
1873 	case BPF_STX | BPF_ATOMIC | BPF_H:
1874 	case BPF_STX | BPF_ATOMIC | BPF_W:
1875 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1876 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
1877 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
1878 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
1879 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1880 		if (bpf_atomic_is_load_store(insn))
1881 			ret = emit_atomic_ld_st(insn, ctx);
1882 		else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1883 			ret = emit_lse_atomic(insn, ctx);
1884 		else
1885 			ret = emit_ll_sc_atomic(insn, ctx);
1886 		if (ret)
1887 			return ret;
1888 
1889 		if (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
1890 			ret = add_exception_handler(insn, ctx, dst);
1891 			if (ret)
1892 				return ret;
1893 		}
1894 		break;
1895 
1896 	default:
1897 		pr_err_once("unknown opcode %02x\n", code);
1898 		return -EINVAL;
1899 	}
1900 
1901 	return 0;
1902 }
1903 
1904 static int build_body(struct bpf_verifier_env *env, struct jit_ctx *ctx, bool extra_pass)
1905 {
1906 	const struct bpf_prog *prog = ctx->prog;
1907 	int i;
1908 
1909 	/*
1910 	 * - offset[0] offset of the end of prologue,
1911 	 *   start of the 1st instruction.
1912 	 * - offset[1] - offset of the end of 1st instruction,
1913 	 *   start of the 2nd instruction
1914 	 * [....]
1915 	 * - offset[3] - offset of the end of 3rd instruction,
1916 	 *   start of 4th instruction
1917 	 */
1918 	for (i = 0; i < prog->len; i++) {
1919 		const struct bpf_insn *insn = &prog->insnsi[i];
1920 		int ret;
1921 
1922 		ctx->offset[i] = ctx->idx;
1923 		ret = build_insn(env, insn, ctx, extra_pass);
1924 		if (ret > 0) {
1925 			i++;
1926 			ctx->offset[i] = ctx->idx;
1927 			continue;
1928 		}
1929 		if (ret)
1930 			return ret;
1931 	}
1932 	/*
1933 	 * offset is allocated with prog->len + 1 so fill in
1934 	 * the last element with the offset after the last
1935 	 * instruction (end of program)
1936 	 */
1937 	ctx->offset[i] = ctx->idx;
1938 
1939 	return 0;
1940 }
1941 
1942 static int validate_code(struct jit_ctx *ctx)
1943 {
1944 	int i;
1945 
1946 	for (i = 0; i < ctx->idx; i++) {
1947 		u32 a64_insn = le32_to_cpu(ctx->image[i]);
1948 
1949 		if (a64_insn == AARCH64_BREAK_FAULT)
1950 			return -1;
1951 	}
1952 	return 0;
1953 }
1954 
1955 static int validate_ctx(struct jit_ctx *ctx)
1956 {
1957 	if (validate_code(ctx))
1958 		return -1;
1959 
1960 	if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
1961 		return -1;
1962 
1963 	return 0;
1964 }
1965 
1966 static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
1967 {
1968 	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
1969 	u64 *stack_ptr;
1970 
1971 	for_each_possible_cpu(cpu) {
1972 		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
1973 		stack_ptr[0] = PRIV_STACK_GUARD_VAL;
1974 		stack_ptr[1] = PRIV_STACK_GUARD_VAL;
1975 		stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
1976 		stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
1977 	}
1978 }
1979 
1980 static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
1981 				   struct bpf_prog *prog)
1982 {
1983 	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
1984 	u64 *stack_ptr;
1985 
1986 	for_each_possible_cpu(cpu) {
1987 		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
1988 		if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
1989 		    stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
1990 		    stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
1991 		    stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
1992 			pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
1993 			       bpf_jit_get_prog_name(prog));
1994 			break;
1995 		}
1996 	}
1997 }
1998 
1999 struct arm64_jit_data {
2000 	struct bpf_binary_header *header;
2001 	u8 *ro_image;
2002 	struct bpf_binary_header *ro_header;
2003 	struct jit_ctx ctx;
2004 };
2005 
2006 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
2007 {
2008 	int image_size, prog_size, extable_size, extable_align, extable_offset;
2009 	struct bpf_binary_header *header;
2010 	struct bpf_binary_header *ro_header = NULL;
2011 	struct arm64_jit_data *jit_data;
2012 	void __percpu *priv_stack_ptr = NULL;
2013 	bool was_classic = bpf_prog_was_classic(prog);
2014 	int priv_stack_alloc_sz;
2015 	bool extra_pass = false;
2016 	struct jit_ctx ctx;
2017 	u8 *image_ptr;
2018 	u8 *ro_image_ptr;
2019 	int body_idx;
2020 	int exentry_idx;
2021 
2022 	if (!prog->jit_requested)
2023 		return prog;
2024 
2025 	jit_data = prog->aux->jit_data;
2026 	if (!jit_data) {
2027 		jit_data = kzalloc_obj(*jit_data);
2028 		if (!jit_data)
2029 			return prog;
2030 		prog->aux->jit_data = jit_data;
2031 	}
2032 	priv_stack_ptr = prog->aux->priv_stack_ptr;
2033 	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
2034 		/* Allocate actual private stack size with verifier-calculated
2035 		 * stack size plus two memory guards to protect overflow and
2036 		 * underflow.
2037 		 */
2038 		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
2039 				      2 * PRIV_STACK_GUARD_SZ;
2040 		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
2041 		if (!priv_stack_ptr)
2042 			goto out_priv_stack;
2043 
2044 		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
2045 		prog->aux->priv_stack_ptr = priv_stack_ptr;
2046 	}
2047 	if (jit_data->ctx.offset) {
2048 		ctx = jit_data->ctx;
2049 		ro_image_ptr = jit_data->ro_image;
2050 		ro_header = jit_data->ro_header;
2051 		header = jit_data->header;
2052 		image_ptr = (void *)header + ((void *)ro_image_ptr
2053 						 - (void *)ro_header);
2054 		extra_pass = true;
2055 		prog_size = sizeof(u32) * ctx.idx;
2056 		goto skip_init_ctx;
2057 	}
2058 	memset(&ctx, 0, sizeof(ctx));
2059 	ctx.prog = prog;
2060 
2061 	ctx.offset = kvzalloc_objs(int, prog->len + 1);
2062 	if (ctx.offset == NULL)
2063 		goto out_off;
2064 
2065 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
2066 	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
2067 
2068 	if (priv_stack_ptr)
2069 		ctx.priv_sp_used = true;
2070 
2071 	/* Pass 1: Estimate the maximum image size.
2072 	 *
2073 	 * BPF line info needs ctx->offset[i] to be the offset of
2074 	 * instruction[i] in jited image, so build prologue first.
2075 	 */
2076 	if (build_prologue(&ctx, was_classic))
2077 		goto out_off;
2078 
2079 	if (build_body(env, &ctx, extra_pass))
2080 		goto out_off;
2081 
2082 	ctx.epilogue_offset = ctx.idx;
2083 	build_epilogue(&ctx, was_classic);
2084 	build_plt(&ctx);
2085 
2086 	extable_align = __alignof__(struct exception_table_entry);
2087 	extable_size = prog->aux->num_exentries *
2088 		sizeof(struct exception_table_entry);
2089 
2090 	/* Now we know the maximum image size. */
2091 	prog_size = sizeof(u32) * ctx.idx;
2092 	/* also allocate space for plt target */
2093 	extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
2094 	image_size = extable_offset + extable_size;
2095 	ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
2096 					      sizeof(u64), &header, &image_ptr,
2097 					      jit_fill_hole);
2098 	if (!ro_header)
2099 		goto out_off;
2100 
2101 	/* Pass 2: Determine jited position and result for each instruction */
2102 
2103 	/*
2104 	 * Use the image(RW) for writing the JITed instructions. But also save
2105 	 * the ro_image(RX) for calculating the offsets in the image. The RW
2106 	 * image will be later copied to the RX image from where the program
2107 	 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
2108 	 * final step.
2109 	 */
2110 	ctx.image = (__le32 *)image_ptr;
2111 	ctx.ro_image = (__le32 *)ro_image_ptr;
2112 	if (extable_size)
2113 		prog->aux->extable = (void *)ro_image_ptr + extable_offset;
2114 skip_init_ctx:
2115 	ctx.idx = 0;
2116 	ctx.exentry_idx = 0;
2117 	ctx.write = true;
2118 
2119 	build_prologue(&ctx, was_classic);
2120 
2121 	/* Record exentry_idx and body_idx before first build_body */
2122 	exentry_idx = ctx.exentry_idx;
2123 	body_idx = ctx.idx;
2124 	/* Dont write body instructions to memory for now */
2125 	ctx.write = false;
2126 
2127 	if (build_body(env, &ctx, extra_pass))
2128 		goto out_free_hdr;
2129 
2130 	ctx.epilogue_offset = ctx.idx;
2131 	ctx.exentry_idx = exentry_idx;
2132 	ctx.idx = body_idx;
2133 	ctx.write = true;
2134 
2135 	/* Pass 3: Adjust jump offset and write final image */
2136 	if (build_body(env, &ctx, extra_pass) ||
2137 		WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset))
2138 		goto out_free_hdr;
2139 
2140 	build_epilogue(&ctx, was_classic);
2141 	build_plt(&ctx);
2142 
2143 	/* Extra pass to validate JITed code. */
2144 	if (validate_ctx(&ctx))
2145 		goto out_free_hdr;
2146 
2147 	/* update the real prog size */
2148 	prog_size = sizeof(u32) * ctx.idx;
2149 
2150 	/* And we're done. */
2151 	if (bpf_jit_enable > 1)
2152 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
2153 
2154 	if (!prog->is_func || extra_pass) {
2155 		/* The jited image may shrink since the jited result for
2156 		 * BPF_CALL to subprog may be changed from indirect call
2157 		 * to direct call.
2158 		 */
2159 		if (extra_pass && ctx.idx > jit_data->ctx.idx) {
2160 			pr_err_once("multi-func JIT bug %d > %d\n",
2161 				    ctx.idx, jit_data->ctx.idx);
2162 			goto out_free_hdr;
2163 		}
2164 		if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
2165 			/* ro_header and header has been freed */
2166 			ro_header = NULL;
2167 			header = NULL;
2168 			goto out_free_hdr;
2169 		}
2170 	} else {
2171 		jit_data->ctx = ctx;
2172 		jit_data->ro_image = ro_image_ptr;
2173 		jit_data->header = header;
2174 		jit_data->ro_header = ro_header;
2175 	}
2176 
2177 	prog->bpf_func = (void *)ctx.ro_image + cfi_get_offset();
2178 	prog->jited = 1;
2179 	prog->jited_len = prog_size - cfi_get_offset();
2180 
2181 	if (!prog->is_func || extra_pass) {
2182 		int i;
2183 
2184 		/* offset[prog->len] is the size of program */
2185 		for (i = 0; i <= prog->len; i++)
2186 			ctx.offset[i] *= AARCH64_INSN_SIZE;
2187 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
2188 		/*
2189 		 * The bpf_prog_update_insn_ptrs function expects offsets to
2190 		 * point to the first byte of the jitted instruction (unlike
2191 		 * the bpf_prog_fill_jited_linfo above, which, for historical
2192 		 * reasons, expects to point to the next instruction)
2193 		 */
2194 		bpf_prog_update_insn_ptrs(prog, ctx.offset, ctx.ro_image);
2195 out_off:
2196 		if (!ro_header && priv_stack_ptr) {
2197 			free_percpu(priv_stack_ptr);
2198 			prog->aux->priv_stack_ptr = NULL;
2199 		}
2200 		kvfree(ctx.offset);
2201 out_priv_stack:
2202 		kfree(jit_data);
2203 		prog->aux->jit_data = NULL;
2204 	}
2205 
2206 	return prog;
2207 
2208 out_free_hdr:
2209 	if (extra_pass) {
2210 		prog->bpf_func = NULL;
2211 		prog->jited = 0;
2212 		prog->jited_len = 0;
2213 	}
2214 	if (header) {
2215 		bpf_arch_text_copy(&ro_header->size, &header->size,
2216 				   sizeof(header->size));
2217 		bpf_jit_binary_pack_free(ro_header, header);
2218 	}
2219 	goto out_off;
2220 }
2221 
2222 bool bpf_jit_supports_private_stack(void)
2223 {
2224 	return true;
2225 }
2226 
2227 bool bpf_jit_supports_kfunc_call(void)
2228 {
2229 	return true;
2230 }
2231 
2232 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
2233 {
2234 	if (!aarch64_insn_copy(dst, src, len))
2235 		return ERR_PTR(-EINVAL);
2236 	return dst;
2237 }
2238 
2239 u64 bpf_jit_alloc_exec_limit(void)
2240 {
2241 	return VMALLOC_END - VMALLOC_START;
2242 }
2243 
2244 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
2245 bool bpf_jit_supports_subprog_tailcalls(void)
2246 {
2247 	return true;
2248 }
2249 
2250 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
2251 			    int bargs_off, int retval_off, int run_ctx_off,
2252 			    bool save_ret)
2253 {
2254 	__le32 *branch;
2255 	u64 enter_prog;
2256 	u64 exit_prog;
2257 	struct bpf_prog *p = l->link.prog;
2258 	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2259 
2260 	enter_prog = (u64)bpf_trampoline_enter(p);
2261 	exit_prog = (u64)bpf_trampoline_exit(p);
2262 
2263 	if (l->cookie == 0) {
2264 		/* if cookie is zero, one instruction is enough to store it */
2265 		emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
2266 	} else {
2267 		emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
2268 		emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
2269 		     ctx);
2270 	}
2271 
2272 	/* save p to callee saved register x19 to avoid loading p with mov_i64
2273 	 * each time.
2274 	 */
2275 	emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
2276 
2277 	/* arg1: prog */
2278 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2279 	/* arg2: &run_ctx */
2280 	emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
2281 
2282 	emit_call(enter_prog, ctx);
2283 
2284 	/* save return value to callee saved register x20 */
2285 	emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
2286 
2287 	/* if (__bpf_prog_enter(prog) == 0)
2288 	 *         goto skip_exec_of_prog;
2289 	 */
2290 	branch = ctx->image + ctx->idx;
2291 	emit(A64_NOP, ctx);
2292 
2293 	emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx);
2294 	if (!p->jited)
2295 		emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
2296 
2297 	emit_call((const u64)p->bpf_func, ctx);
2298 
2299 	if (save_ret)
2300 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2301 
2302 	if (ctx->image) {
2303 		int offset = &ctx->image[ctx->idx] - branch;
2304 		*branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
2305 	}
2306 
2307 	/* arg1: prog */
2308 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2309 	/* arg2: start time */
2310 	emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
2311 	/* arg3: &run_ctx */
2312 	emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
2313 
2314 	emit_call(exit_prog, ctx);
2315 }
2316 
2317 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
2318 			       int bargs_off, int retval_off, int run_ctx_off,
2319 			       __le32 **branches)
2320 {
2321 	int i;
2322 
2323 	/* The first fmod_ret program will receive a garbage return value.
2324 	 * Set this to 0 to avoid confusing the program.
2325 	 */
2326 	emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
2327 	for (i = 0; i < tl->nr_links; i++) {
2328 		invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
2329 				run_ctx_off, true);
2330 		/* if (*(u64 *)(sp + retval_off) !=  0)
2331 		 *	goto do_fexit;
2332 		 */
2333 		emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
2334 		/* Save the location of branch, and generate a nop.
2335 		 * This nop will be replaced with a cbnz later.
2336 		 */
2337 		branches[i] = ctx->image + ctx->idx;
2338 		emit(A64_NOP, ctx);
2339 	}
2340 }
2341 
2342 struct arg_aux {
2343 	/* how many args are passed through registers, the rest of the args are
2344 	 * passed through stack
2345 	 */
2346 	int args_in_regs;
2347 	/* how many registers are used to pass arguments */
2348 	int regs_for_args;
2349 	/* how much stack is used for additional args passed to bpf program
2350 	 * that did not fit in original function registers
2351 	 */
2352 	int bstack_for_args;
2353 	/* home much stack is used for additional args passed to the
2354 	 * original function when called from trampoline (this one needs
2355 	 * arguments to be properly aligned)
2356 	 */
2357 	int ostack_for_args;
2358 };
2359 
2360 static int calc_arg_aux(const struct btf_func_model *m,
2361 			 struct arg_aux *a)
2362 {
2363 	int stack_slots, nregs, slots, i;
2364 
2365 	/* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */
2366 	for (i = 0, nregs = 0; i < m->nr_args; i++) {
2367 		slots = (m->arg_size[i] + 7) / 8;
2368 		if (nregs + slots <= 8) /* passed through register ? */
2369 			nregs += slots;
2370 		else
2371 			break;
2372 	}
2373 
2374 	a->args_in_regs = i;
2375 	a->regs_for_args = nregs;
2376 	a->ostack_for_args = 0;
2377 	a->bstack_for_args = 0;
2378 
2379 	/* the rest arguments are passed through stack */
2380 	for (; i < m->nr_args; i++) {
2381 		stack_slots = (m->arg_size[i] + 7) / 8;
2382 		a->bstack_for_args += stack_slots * 8;
2383 		a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
2384 	}
2385 
2386 	return 0;
2387 }
2388 
2389 static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
2390 {
2391 	if (effective_bytes) {
2392 		int garbage_bits = 64 - 8 * effective_bytes;
2393 #ifdef CONFIG_CPU_BIG_ENDIAN
2394 		/* garbage bits are at the right end */
2395 		emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2396 		emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2397 #else
2398 		/* garbage bits are at the left end */
2399 		emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2400 		emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2401 #endif
2402 	}
2403 }
2404 
2405 static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off,
2406 		      const struct btf_func_model *m,
2407 		      const struct arg_aux *a,
2408 		      bool for_call_origin)
2409 {
2410 	int i;
2411 	int reg;
2412 	int doff;
2413 	int soff;
2414 	int slots;
2415 	u8 tmp = bpf2a64[TMP_REG_1];
2416 
2417 	/* store arguments to the stack for the bpf program, or restore
2418 	 * arguments from stack for the original function
2419 	 */
2420 	for (reg = 0; reg < a->regs_for_args; reg++) {
2421 		emit(for_call_origin ?
2422 		     A64_LDR64I(reg, A64_SP, bargs_off) :
2423 		     A64_STR64I(reg, A64_SP, bargs_off),
2424 		     ctx);
2425 		bargs_off += 8;
2426 	}
2427 
2428 	soff = 32; /* on stack arguments start from FP + 32 */
2429 	doff = (for_call_origin ? oargs_off : bargs_off);
2430 
2431 	/* save on stack arguments */
2432 	for (i = a->args_in_regs; i < m->nr_args; i++) {
2433 		slots = (m->arg_size[i] + 7) / 8;
2434 		/* verifier ensures arg_size <= 16, so slots equals 1 or 2 */
2435 		while (slots-- > 0) {
2436 			emit(A64_LDR64I(tmp, A64_FP, soff), ctx);
2437 			/* if there is unused space in the last slot, clear
2438 			 * the garbage contained in the space.
2439 			 */
2440 			if (slots == 0 && !for_call_origin)
2441 				clear_garbage(ctx, tmp, m->arg_size[i] % 8);
2442 			emit(A64_STR64I(tmp, A64_SP, doff), ctx);
2443 			soff += 8;
2444 			doff += 8;
2445 		}
2446 	}
2447 }
2448 
2449 static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
2450 {
2451 	int reg;
2452 
2453 	for (reg = 0; reg < nregs; reg++) {
2454 		emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx);
2455 		bargs_off += 8;
2456 	}
2457 }
2458 
2459 static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
2460 {
2461 	return fentry_links->nr_links == 1 &&
2462 		fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
2463 }
2464 
2465 static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_off)
2466 {
2467 	emit_a64_mov_i64(A64_R(10), func_meta, ctx);
2468 	emit(A64_STR64I(A64_R(10), A64_SP, func_meta_off), ctx);
2469 }
2470 
2471 /* Based on the x86's implementation of arch_prepare_bpf_trampoline().
2472  *
2473  * bpf prog and function entry before bpf trampoline hooked:
2474  *   mov x9, lr
2475  *   nop
2476  *
2477  * bpf prog and function entry after bpf trampoline hooked:
2478  *   mov x9, lr
2479  *   bl  <bpf_trampoline or plt>
2480  *
2481  */
2482 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
2483 			      struct bpf_tramp_links *tlinks, void *func_addr,
2484 			      const struct btf_func_model *m,
2485 			      const struct arg_aux *a,
2486 			      u32 flags)
2487 {
2488 	int i;
2489 	int stack_size;
2490 	int retaddr_off;
2491 	int regs_off;
2492 	int retval_off;
2493 	int bargs_off;
2494 	int func_meta_off;
2495 	int ip_off;
2496 	int run_ctx_off;
2497 	int oargs_off;
2498 	int nfuncargs;
2499 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2500 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2501 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2502 	bool save_ret;
2503 	__le32 **branches = NULL;
2504 	bool is_struct_ops = is_struct_ops_tramp(fentry);
2505 	int cookie_off, cookie_cnt, cookie_bargs_off;
2506 	int fsession_cnt = bpf_fsession_cnt(tlinks);
2507 	u64 func_meta;
2508 
2509 	/* trampoline stack layout:
2510 	 *                    [ parent ip         ]
2511 	 *                    [ FP                ]
2512 	 * SP + retaddr_off   [ self ip           ]
2513 	 *                    [ FP                ]
2514 	 *
2515 	 *                    [ padding           ] align SP to multiples of 16
2516 	 *
2517 	 *                    [ x20               ] callee saved reg x20
2518 	 * SP + regs_off      [ x19               ] callee saved reg x19
2519 	 *
2520 	 * SP + retval_off    [ return value      ] BPF_TRAMP_F_CALL_ORIG or
2521 	 *                                          BPF_TRAMP_F_RET_FENTRY_RET
2522 	 *                    [ arg reg N         ]
2523 	 *                    [ ...               ]
2524 	 * SP + bargs_off     [ arg reg 1         ] for bpf
2525 	 *
2526 	 * SP + func_meta_off [ regs count, etc   ]
2527 	 *
2528 	 * SP + ip_off        [ traced function   ] BPF_TRAMP_F_IP_ARG flag
2529 	 *
2530 	 *                    [ stack cookie N    ]
2531 	 *                    [ ...               ]
2532 	 * SP + cookie_off    [ stack cookie 1    ]
2533 	 *
2534 	 * SP + run_ctx_off   [ bpf_tramp_run_ctx ]
2535 	 *
2536 	 *                    [ stack arg N       ]
2537 	 *                    [ ...               ]
2538 	 * SP + oargs_off     [ stack arg 1       ] for original func
2539 	 */
2540 
2541 	stack_size = 0;
2542 	oargs_off = stack_size;
2543 	if (flags & BPF_TRAMP_F_CALL_ORIG)
2544 		stack_size +=  a->ostack_for_args;
2545 
2546 	run_ctx_off = stack_size;
2547 	/* room for bpf_tramp_run_ctx */
2548 	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
2549 
2550 	cookie_off = stack_size;
2551 	/* room for session cookies */
2552 	cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
2553 	stack_size += cookie_cnt * 8;
2554 
2555 	ip_off = stack_size;
2556 	/* room for IP address argument */
2557 	if (flags & BPF_TRAMP_F_IP_ARG)
2558 		stack_size += 8;
2559 
2560 	func_meta_off = stack_size;
2561 	/* room for function metadata, such as regs count */
2562 	stack_size += 8;
2563 
2564 	bargs_off = stack_size;
2565 	/* room for args */
2566 	nfuncargs = a->regs_for_args + a->bstack_for_args / 8;
2567 	stack_size += 8 * nfuncargs;
2568 
2569 	/* room for return value */
2570 	retval_off = stack_size;
2571 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
2572 	if (save_ret)
2573 		stack_size += 8;
2574 
2575 	/* room for callee saved registers, currently x19 and x20 are used */
2576 	regs_off = stack_size;
2577 	stack_size += 16;
2578 
2579 	/* round up to multiples of 16 to avoid SPAlignmentFault */
2580 	stack_size = round_up(stack_size, 16);
2581 
2582 	/* return address locates above FP */
2583 	retaddr_off = stack_size + 8;
2584 
2585 	if (flags & BPF_TRAMP_F_INDIRECT) {
2586 		/*
2587 		 * Indirect call for bpf_struct_ops
2588 		 */
2589 		emit_kcfi(cfi_get_func_hash(func_addr), ctx);
2590 	}
2591 	/* bpf trampoline may be invoked by 3 instruction types:
2592 	 * 1. bl, attached to bpf prog or kernel function via short jump
2593 	 * 2. br, attached to bpf prog or kernel function via long jump
2594 	 * 3. blr, working as a function pointer, used by struct_ops.
2595 	 * So BTI_JC should used here to support both br and blr.
2596 	 */
2597 	emit_bti(A64_BTI_JC, ctx);
2598 
2599 	/* x9 is not set for struct_ops */
2600 	if (!is_struct_ops) {
2601 		/* frame for parent function */
2602 		emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
2603 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2604 	}
2605 
2606 	/* frame for patched function for tracing, or caller for struct_ops */
2607 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
2608 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2609 
2610 	/* allocate stack space */
2611 	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
2612 
2613 	if (flags & BPF_TRAMP_F_IP_ARG) {
2614 		/* save ip address of the traced function */
2615 		emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
2616 		emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
2617 	}
2618 
2619 	/* save function metadata */
2620 	func_meta = nfuncargs;
2621 	store_func_meta(ctx, func_meta, func_meta_off);
2622 
2623 	/* save args for bpf */
2624 	save_args(ctx, bargs_off, oargs_off, m, a, false);
2625 
2626 	/* save callee saved registers */
2627 	emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
2628 	emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2629 
2630 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2631 		/* for the first pass, assume the worst case */
2632 		if (!ctx->image)
2633 			ctx->idx += 4;
2634 		else
2635 			emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2636 		emit_call((const u64)__bpf_tramp_enter, ctx);
2637 	}
2638 
2639 	if (fsession_cnt) {
2640 		/* clear all the session cookies' value */
2641 		emit(A64_MOVZ(1, A64_R(10), 0, 0), ctx);
2642 		for (int i = 0; i < cookie_cnt; i++)
2643 			emit(A64_STR64I(A64_R(10), A64_SP, cookie_off + 8 * i), ctx);
2644 		/* clear the return value to make sure fentry always gets 0 */
2645 		emit(A64_STR64I(A64_R(10), A64_SP, retval_off), ctx);
2646 	}
2647 
2648 	cookie_bargs_off = (bargs_off - cookie_off) / 8;
2649 	for (i = 0; i < fentry->nr_links; i++) {
2650 		if (bpf_prog_calls_session_cookie(fentry->links[i])) {
2651 			u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
2652 
2653 			store_func_meta(ctx, meta, func_meta_off);
2654 			cookie_bargs_off--;
2655 		}
2656 		invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
2657 				retval_off, run_ctx_off,
2658 				flags & BPF_TRAMP_F_RET_FENTRY_RET);
2659 	}
2660 
2661 	if (fmod_ret->nr_links) {
2662 		branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
2663 				   GFP_KERNEL);
2664 		if (!branches)
2665 			return -ENOMEM;
2666 
2667 		invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off,
2668 				   run_ctx_off, branches);
2669 	}
2670 
2671 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2672 		/* save args for original func */
2673 		save_args(ctx, bargs_off, oargs_off, m, a, true);
2674 		/* call original func */
2675 		emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
2676 		emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
2677 		emit(A64_RET(A64_R(10)), ctx);
2678 		/* store return value */
2679 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2680 		/* reserve a nop for bpf_tramp_image_put */
2681 		im->ip_after_call = ctx->ro_image + ctx->idx;
2682 		emit(A64_NOP, ctx);
2683 	}
2684 
2685 	/* update the branches saved in invoke_bpf_mod_ret with cbnz */
2686 	for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
2687 		int offset = &ctx->image[ctx->idx] - branches[i];
2688 		*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
2689 	}
2690 
2691 	/* set the "is_return" flag for fsession */
2692 	func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
2693 	if (fsession_cnt)
2694 		store_func_meta(ctx, func_meta, func_meta_off);
2695 
2696 	cookie_bargs_off = (bargs_off - cookie_off) / 8;
2697 	for (i = 0; i < fexit->nr_links; i++) {
2698 		if (bpf_prog_calls_session_cookie(fexit->links[i])) {
2699 			u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
2700 
2701 			store_func_meta(ctx, meta, func_meta_off);
2702 			cookie_bargs_off--;
2703 		}
2704 		invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
2705 				run_ctx_off, false);
2706 	}
2707 
2708 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2709 		im->ip_epilogue = ctx->ro_image + ctx->idx;
2710 		/* for the first pass, assume the worst case */
2711 		if (!ctx->image)
2712 			ctx->idx += 4;
2713 		else
2714 			emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2715 		emit_call((const u64)__bpf_tramp_exit, ctx);
2716 	}
2717 
2718 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
2719 		restore_args(ctx, bargs_off, a->regs_for_args);
2720 
2721 	/* restore callee saved register x19 and x20 */
2722 	emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
2723 	emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2724 
2725 	if (save_ret)
2726 		emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
2727 
2728 	/* reset SP  */
2729 	emit(A64_MOV(1, A64_SP, A64_FP), ctx);
2730 
2731 	if (is_struct_ops) {
2732 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2733 		emit(A64_RET(A64_LR), ctx);
2734 	} else {
2735 		/* pop frames */
2736 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2737 		emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
2738 
2739 		if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2740 			/* skip patched function, return to parent */
2741 			emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2742 			emit(A64_RET(A64_R(9)), ctx);
2743 		} else {
2744 			/* return to patched function */
2745 			emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
2746 			emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2747 			emit(A64_RET(A64_R(10)), ctx);
2748 		}
2749 	}
2750 
2751 	kfree(branches);
2752 
2753 	return ctx->idx;
2754 }
2755 
2756 bool bpf_jit_supports_fsession(void)
2757 {
2758 	return true;
2759 }
2760 
2761 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2762 			     struct bpf_tramp_links *tlinks, void *func_addr)
2763 {
2764 	struct jit_ctx ctx = {
2765 		.image = NULL,
2766 		.idx = 0,
2767 	};
2768 	struct bpf_tramp_image im;
2769 	struct arg_aux aaux;
2770 	int ret;
2771 
2772 	ret = calc_arg_aux(m, &aaux);
2773 	if (ret < 0)
2774 		return ret;
2775 
2776 	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
2777 	if (ret < 0)
2778 		return ret;
2779 
2780 	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
2781 }
2782 
2783 void *arch_alloc_bpf_trampoline(unsigned int size)
2784 {
2785 	return bpf_prog_pack_alloc(size, jit_fill_hole);
2786 }
2787 
2788 void arch_free_bpf_trampoline(void *image, unsigned int size)
2789 {
2790 	bpf_prog_pack_free(image, size);
2791 }
2792 
2793 int arch_protect_bpf_trampoline(void *image, unsigned int size)
2794 {
2795 	return 0;
2796 }
2797 
2798 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
2799 				void *ro_image_end, const struct btf_func_model *m,
2800 				u32 flags, struct bpf_tramp_links *tlinks,
2801 				void *func_addr)
2802 {
2803 	u32 size = ro_image_end - ro_image;
2804 	struct arg_aux aaux;
2805 	void *image, *tmp;
2806 	int ret;
2807 
2808 	/* image doesn't need to be in module memory range, so we can
2809 	 * use kvmalloc.
2810 	 */
2811 	image = kvmalloc(size, GFP_KERNEL);
2812 	if (!image)
2813 		return -ENOMEM;
2814 
2815 	struct jit_ctx ctx = {
2816 		.image = image,
2817 		.ro_image = ro_image,
2818 		.idx = 0,
2819 		.write = true,
2820 	};
2821 
2822 
2823 	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2824 	ret = calc_arg_aux(m, &aaux);
2825 	if (ret)
2826 		goto out;
2827 	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
2828 
2829 	if (ret > 0 && validate_code(&ctx) < 0) {
2830 		ret = -EINVAL;
2831 		goto out;
2832 	}
2833 
2834 	if (ret > 0)
2835 		ret *= AARCH64_INSN_SIZE;
2836 
2837 	tmp = bpf_arch_text_copy(ro_image, image, size);
2838 	if (IS_ERR(tmp)) {
2839 		ret = PTR_ERR(tmp);
2840 		goto out;
2841 	}
2842 
2843 out:
2844 	kvfree(image);
2845 	return ret;
2846 }
2847 
2848 static bool is_long_jump(void *ip, void *target)
2849 {
2850 	long offset;
2851 
2852 	/* NULL target means this is a NOP */
2853 	if (!target)
2854 		return false;
2855 
2856 	offset = (long)target - (long)ip;
2857 	return offset < -SZ_128M || offset >= SZ_128M;
2858 }
2859 
2860 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
2861 			     void *addr, void *plt, u32 *insn)
2862 {
2863 	void *target;
2864 
2865 	if (!addr) {
2866 		*insn = aarch64_insn_gen_nop();
2867 		return 0;
2868 	}
2869 
2870 	if (is_long_jump(ip, addr))
2871 		target = plt;
2872 	else
2873 		target = addr;
2874 
2875 	*insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
2876 					    (unsigned long)target,
2877 					    type);
2878 
2879 	return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
2880 }
2881 
2882 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
2883  * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
2884  * or @new_addr is NULL, the old or new instruction is NOP.
2885  *
2886  * When @ip is the bpf prog entry, a bpf trampoline is being attached or
2887  * detached. Since bpf trampoline and bpf prog are allocated separately with
2888  * vmalloc, the address distance may exceed 128MB, the maximum branch range.
2889  * So long jump should be handled.
2890  *
2891  * When a bpf prog is constructed, a plt pointing to empty trampoline
2892  * dummy_tramp is placed at the end:
2893  *
2894  *      bpf_prog:
2895  *              mov x9, lr
2896  *              nop // patchsite
2897  *              ...
2898  *              ret
2899  *
2900  *      plt:
2901  *              ldr x10, target
2902  *              br x10
2903  *      target:
2904  *              .quad dummy_tramp // plt target
2905  *
2906  * This is also the state when no trampoline is attached.
2907  *
2908  * When a short-jump bpf trampoline is attached, the patchsite is patched
2909  * to a bl instruction to the trampoline directly:
2910  *
2911  *      bpf_prog:
2912  *              mov x9, lr
2913  *              bl <short-jump bpf trampoline address> // patchsite
2914  *              ...
2915  *              ret
2916  *
2917  *      plt:
2918  *              ldr x10, target
2919  *              br x10
2920  *      target:
2921  *              .quad dummy_tramp // plt target
2922  *
2923  * When a long-jump bpf trampoline is attached, the plt target is filled with
2924  * the trampoline address and the patchsite is patched to a bl instruction to
2925  * the plt:
2926  *
2927  *      bpf_prog:
2928  *              mov x9, lr
2929  *              bl plt // patchsite
2930  *              ...
2931  *              ret
2932  *
2933  *      plt:
2934  *              ldr x10, target
2935  *              br x10
2936  *      target:
2937  *              .quad <long-jump bpf trampoline address> // plt target
2938  *
2939  * The dummy_tramp is used to prevent another CPU from jumping to unknown
2940  * locations during the patching process, making the patching process easier.
2941  */
2942 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
2943 		       enum bpf_text_poke_type new_t, void *old_addr,
2944 		       void *new_addr)
2945 {
2946 	int ret;
2947 	u32 old_insn;
2948 	u32 new_insn;
2949 	u32 replaced;
2950 	struct bpf_plt *plt = NULL;
2951 	unsigned long size = 0UL;
2952 	unsigned long offset = ~0UL;
2953 	enum aarch64_insn_branch_type branch_type;
2954 	char namebuf[KSYM_NAME_LEN];
2955 	void *image = NULL;
2956 	u64 plt_target = 0ULL;
2957 	bool poking_bpf_entry;
2958 
2959 	if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
2960 		/* Only poking bpf text is supported. Since kernel function
2961 		 * entry is set up by ftrace, we reply on ftrace to poke kernel
2962 		 * functions.
2963 		 */
2964 		return -ENOTSUPP;
2965 
2966 	image = ip - offset;
2967 	/* zero offset means we're poking bpf prog entry */
2968 	poking_bpf_entry = (offset == 0UL);
2969 
2970 	/* bpf prog entry, find plt and the real patchsite */
2971 	if (poking_bpf_entry) {
2972 		/* plt locates at the end of bpf prog */
2973 		plt = image + size - PLT_TARGET_OFFSET;
2974 
2975 		/* skip to the nop instruction in bpf prog entry:
2976 		 * bti c // if BTI enabled
2977 		 * mov x9, x30
2978 		 * nop
2979 		 */
2980 		ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
2981 	}
2982 
2983 	/* long jump is only possible at bpf prog entry */
2984 	if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
2985 		    !poking_bpf_entry))
2986 		return -EINVAL;
2987 
2988 	branch_type = old_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
2989 					      AARCH64_INSN_BRANCH_NOLINK;
2990 	if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
2991 		return -EFAULT;
2992 
2993 	branch_type = new_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
2994 					      AARCH64_INSN_BRANCH_NOLINK;
2995 	if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
2996 		return -EFAULT;
2997 
2998 	if (is_long_jump(ip, new_addr))
2999 		plt_target = (u64)new_addr;
3000 	else if (is_long_jump(ip, old_addr))
3001 		/* if the old target is a long jump and the new target is not,
3002 		 * restore the plt target to dummy_tramp, so there is always a
3003 		 * legal and harmless address stored in plt target, and we'll
3004 		 * never jump from plt to an unknown place.
3005 		 */
3006 		plt_target = (u64)&dummy_tramp;
3007 
3008 	if (plt_target) {
3009 		/* non-zero plt_target indicates we're patching a bpf prog,
3010 		 * which is read only.
3011 		 */
3012 		if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
3013 			return -EFAULT;
3014 		WRITE_ONCE(plt->target, plt_target);
3015 		set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
3016 		/* since plt target points to either the new trampoline
3017 		 * or dummy_tramp, even if another CPU reads the old plt
3018 		 * target value before fetching the bl instruction to plt,
3019 		 * it will be brought back by dummy_tramp, so no barrier is
3020 		 * required here.
3021 		 */
3022 	}
3023 
3024 	/* if the old target and the new target are both long jumps, no
3025 	 * patching is required
3026 	 */
3027 	if (old_insn == new_insn)
3028 		return 0;
3029 
3030 	mutex_lock(&text_mutex);
3031 	if (aarch64_insn_read(ip, &replaced)) {
3032 		ret = -EFAULT;
3033 		goto out;
3034 	}
3035 
3036 	if (replaced != old_insn) {
3037 		ret = -EFAULT;
3038 		goto out;
3039 	}
3040 
3041 	/* We call aarch64_insn_patch_text_nosync() to replace instruction
3042 	 * atomically, so no other CPUs will fetch a half-new and half-old
3043 	 * instruction. But there is chance that another CPU executes the
3044 	 * old instruction after the patching operation finishes (e.g.,
3045 	 * pipeline not flushed, or icache not synchronized yet).
3046 	 *
3047 	 * 1. when a new trampoline is attached, it is not a problem for
3048 	 *    different CPUs to jump to different trampolines temporarily.
3049 	 *
3050 	 * 2. when an old trampoline is freed, we should wait for all other
3051 	 *    CPUs to exit the trampoline and make sure the trampoline is no
3052 	 *    longer reachable, since bpf_tramp_image_put() function already
3053 	 *    uses percpu_ref and task-based rcu to do the sync, no need to call
3054 	 *    the sync version here, see bpf_tramp_image_put() for details.
3055 	 */
3056 	ret = aarch64_insn_patch_text_nosync(ip, new_insn);
3057 out:
3058 	mutex_unlock(&text_mutex);
3059 
3060 	return ret;
3061 }
3062 
3063 bool bpf_jit_supports_ptr_xchg(void)
3064 {
3065 	return true;
3066 }
3067 
3068 bool bpf_jit_supports_exceptions(void)
3069 {
3070 	/* We unwind through both kernel frames starting from within bpf_throw
3071 	 * call and BPF frames. Therefore we require FP unwinder to be enabled
3072 	 * to walk kernel frames and reach BPF frames in the stack trace.
3073 	 * ARM64 kernel is always compiled with CONFIG_FRAME_POINTER=y
3074 	 */
3075 	return true;
3076 }
3077 
3078 bool bpf_jit_supports_arena(void)
3079 {
3080 	return true;
3081 }
3082 
3083 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
3084 {
3085 	if (!in_arena)
3086 		return true;
3087 	switch (insn->code) {
3088 	case BPF_STX | BPF_ATOMIC | BPF_W:
3089 	case BPF_STX | BPF_ATOMIC | BPF_DW:
3090 		if (!bpf_atomic_is_load_store(insn) &&
3091 		    !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
3092 			return false;
3093 	}
3094 	return true;
3095 }
3096 
3097 bool bpf_jit_supports_percpu_insn(void)
3098 {
3099 	return true;
3100 }
3101 
3102 bool bpf_jit_bypass_spec_v4(void)
3103 {
3104 	/* In case of arm64, we rely on the firmware mitigation of Speculative
3105 	 * Store Bypass as controlled via the ssbd kernel parameter. Whenever
3106 	 * the mitigation is enabled, it works for all of the kernel code with
3107 	 * no need to provide any additional instructions. Therefore, skip
3108 	 * inserting nospec insns against Spectre v4.
3109 	 */
3110 	return true;
3111 }
3112 
3113 bool bpf_jit_supports_timed_may_goto(void)
3114 {
3115 	return true;
3116 }
3117 
3118 bool bpf_jit_inlines_helper_call(s32 imm)
3119 {
3120 	switch (imm) {
3121 	case BPF_FUNC_get_smp_processor_id:
3122 	case BPF_FUNC_get_current_task:
3123 	case BPF_FUNC_get_current_task_btf:
3124 		return true;
3125 	default:
3126 		return false;
3127 	}
3128 }
3129 
3130 void bpf_jit_free(struct bpf_prog *prog)
3131 {
3132 	if (prog->jited) {
3133 		struct arm64_jit_data *jit_data = prog->aux->jit_data;
3134 		struct bpf_binary_header *hdr;
3135 		void __percpu *priv_stack_ptr;
3136 		int priv_stack_alloc_sz;
3137 
3138 		/*
3139 		 * If we fail the final pass of JIT (from jit_subprogs),
3140 		 * the program may not be finalized yet. Call finalize here
3141 		 * before freeing it.
3142 		 */
3143 		if (jit_data) {
3144 			bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
3145 			kfree(jit_data);
3146 		}
3147 		prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
3148 		hdr = bpf_jit_binary_pack_hdr(prog);
3149 		bpf_jit_binary_pack_free(hdr, NULL);
3150 		priv_stack_ptr = prog->aux->priv_stack_ptr;
3151 		if (priv_stack_ptr) {
3152 			priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
3153 					      2 * PRIV_STACK_GUARD_SZ;
3154 			priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
3155 			free_percpu(prog->aux->priv_stack_ptr);
3156 		}
3157 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
3158 	}
3159 
3160 	bpf_prog_unlock_free(prog);
3161 }
3162