xref: /linux/arch/arm64/net/bpf_jit_comp.c (revision ef815d2cba782e96b9aad9483523d474ed41c62a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for ARM64
4  *
5  * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
6  */
7 
8 #define pr_fmt(fmt) "bpf_jit: " fmt
9 
10 #include <linux/bitfield.h>
11 #include <linux/bpf.h>
12 #include <linux/filter.h>
13 #include <linux/memory.h>
14 #include <linux/printk.h>
15 #include <linux/slab.h>
16 
17 #include <asm/asm-extable.h>
18 #include <asm/byteorder.h>
19 #include <asm/cacheflush.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/insn.h>
22 #include <asm/patching.h>
23 #include <asm/set_memory.h>
24 
25 #include "bpf_jit.h"
26 
27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
29 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
32 
33 #define check_imm(bits, imm) do {				\
34 	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
35 	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
36 		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
37 			i, imm, imm);				\
38 		return -EINVAL;					\
39 	}							\
40 } while (0)
41 #define check_imm19(imm) check_imm(19, imm)
42 #define check_imm26(imm) check_imm(26, imm)
43 
44 /* Map BPF registers to A64 registers */
45 static const int bpf2a64[] = {
46 	/* return value from in-kernel function, and exit value from eBPF */
47 	[BPF_REG_0] = A64_R(7),
48 	/* arguments from eBPF program to in-kernel function */
49 	[BPF_REG_1] = A64_R(0),
50 	[BPF_REG_2] = A64_R(1),
51 	[BPF_REG_3] = A64_R(2),
52 	[BPF_REG_4] = A64_R(3),
53 	[BPF_REG_5] = A64_R(4),
54 	/* callee saved registers that in-kernel function will preserve */
55 	[BPF_REG_6] = A64_R(19),
56 	[BPF_REG_7] = A64_R(20),
57 	[BPF_REG_8] = A64_R(21),
58 	[BPF_REG_9] = A64_R(22),
59 	/* read-only frame pointer to access stack */
60 	[BPF_REG_FP] = A64_R(25),
61 	/* temporary registers for BPF JIT */
62 	[TMP_REG_1] = A64_R(10),
63 	[TMP_REG_2] = A64_R(11),
64 	[TMP_REG_3] = A64_R(12),
65 	/* tail_call_cnt */
66 	[TCALL_CNT] = A64_R(26),
67 	/* temporary register for blinding constants */
68 	[BPF_REG_AX] = A64_R(9),
69 	[FP_BOTTOM] = A64_R(27),
70 };
71 
72 struct jit_ctx {
73 	const struct bpf_prog *prog;
74 	int idx;
75 	int epilogue_offset;
76 	int *offset;
77 	int exentry_idx;
78 	__le32 *image;
79 	u32 stack_size;
80 	int fpb_offset;
81 };
82 
83 struct bpf_plt {
84 	u32 insn_ldr; /* load target */
85 	u32 insn_br;  /* branch to target */
86 	u64 target;   /* target value */
87 };
88 
89 #define PLT_TARGET_SIZE   sizeof_field(struct bpf_plt, target)
90 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
91 
92 static inline void emit(const u32 insn, struct jit_ctx *ctx)
93 {
94 	if (ctx->image != NULL)
95 		ctx->image[ctx->idx] = cpu_to_le32(insn);
96 
97 	ctx->idx++;
98 }
99 
100 static inline void emit_a64_mov_i(const int is64, const int reg,
101 				  const s32 val, struct jit_ctx *ctx)
102 {
103 	u16 hi = val >> 16;
104 	u16 lo = val & 0xffff;
105 
106 	if (hi & 0x8000) {
107 		if (hi == 0xffff) {
108 			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
109 		} else {
110 			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
111 			if (lo != 0xffff)
112 				emit(A64_MOVK(is64, reg, lo, 0), ctx);
113 		}
114 	} else {
115 		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
116 		if (hi)
117 			emit(A64_MOVK(is64, reg, hi, 16), ctx);
118 	}
119 }
120 
121 static int i64_i16_blocks(const u64 val, bool inverse)
122 {
123 	return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
124 	       (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
125 	       (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
126 	       (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
127 }
128 
129 static inline void emit_a64_mov_i64(const int reg, const u64 val,
130 				    struct jit_ctx *ctx)
131 {
132 	u64 nrm_tmp = val, rev_tmp = ~val;
133 	bool inverse;
134 	int shift;
135 
136 	if (!(nrm_tmp >> 32))
137 		return emit_a64_mov_i(0, reg, (u32)val, ctx);
138 
139 	inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
140 	shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
141 					  (fls64(nrm_tmp) - 1)), 16), 0);
142 	if (inverse)
143 		emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
144 	else
145 		emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
146 	shift -= 16;
147 	while (shift >= 0) {
148 		if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
149 			emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
150 		shift -= 16;
151 	}
152 }
153 
154 static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
155 {
156 	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
157 		emit(insn, ctx);
158 }
159 
160 /*
161  * Kernel addresses in the vmalloc space use at most 48 bits, and the
162  * remaining bits are guaranteed to be 0x1. So we can compose the address
163  * with a fixed length movn/movk/movk sequence.
164  */
165 static inline void emit_addr_mov_i64(const int reg, const u64 val,
166 				     struct jit_ctx *ctx)
167 {
168 	u64 tmp = val;
169 	int shift = 0;
170 
171 	emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
172 	while (shift < 32) {
173 		tmp >>= 16;
174 		shift += 16;
175 		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
176 	}
177 }
178 
179 static inline void emit_call(u64 target, struct jit_ctx *ctx)
180 {
181 	u8 tmp = bpf2a64[TMP_REG_1];
182 
183 	emit_addr_mov_i64(tmp, target, ctx);
184 	emit(A64_BLR(tmp), ctx);
185 }
186 
187 static inline int bpf2a64_offset(int bpf_insn, int off,
188 				 const struct jit_ctx *ctx)
189 {
190 	/* BPF JMP offset is relative to the next instruction */
191 	bpf_insn++;
192 	/*
193 	 * Whereas arm64 branch instructions encode the offset
194 	 * from the branch itself, so we must subtract 1 from the
195 	 * instruction offset.
196 	 */
197 	return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
198 }
199 
200 static void jit_fill_hole(void *area, unsigned int size)
201 {
202 	__le32 *ptr;
203 	/* We are guaranteed to have aligned memory. */
204 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
205 		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
206 }
207 
208 static inline int epilogue_offset(const struct jit_ctx *ctx)
209 {
210 	int to = ctx->epilogue_offset;
211 	int from = ctx->idx;
212 
213 	return to - from;
214 }
215 
216 static bool is_addsub_imm(u32 imm)
217 {
218 	/* Either imm12 or shifted imm12. */
219 	return !(imm & ~0xfff) || !(imm & ~0xfff000);
220 }
221 
222 /*
223  * There are 3 types of AArch64 LDR/STR (immediate) instruction:
224  * Post-index, Pre-index, Unsigned offset.
225  *
226  * For BPF ldr/str, the "unsigned offset" type is sufficient.
227  *
228  * "Unsigned offset" type LDR(immediate) format:
229  *
230  *    3                   2                   1                   0
231  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
232  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
233  * |x x|1 1 1 0 0 1 0 1|         imm12         |    Rn   |    Rt   |
234  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
235  * scale
236  *
237  * "Unsigned offset" type STR(immediate) format:
238  *    3                   2                   1                   0
239  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
240  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
241  * |x x|1 1 1 0 0 1 0 0|         imm12         |    Rn   |    Rt   |
242  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
243  * scale
244  *
245  * The offset is calculated from imm12 and scale in the following way:
246  *
247  * offset = (u64)imm12 << scale
248  */
249 static bool is_lsi_offset(int offset, int scale)
250 {
251 	if (offset < 0)
252 		return false;
253 
254 	if (offset > (0xFFF << scale))
255 		return false;
256 
257 	if (offset & ((1 << scale) - 1))
258 		return false;
259 
260 	return true;
261 }
262 
263 /* generated prologue:
264  *      bti c // if CONFIG_ARM64_BTI_KERNEL
265  *      mov x9, lr
266  *      nop  // POKE_OFFSET
267  *      paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
268  *      stp x29, lr, [sp, #-16]!
269  *      mov x29, sp
270  *      stp x19, x20, [sp, #-16]!
271  *      stp x21, x22, [sp, #-16]!
272  *      stp x25, x26, [sp, #-16]!
273  *      stp x27, x28, [sp, #-16]!
274  *      mov x25, sp
275  *      mov tcc, #0
276  *      // PROLOGUE_OFFSET
277  */
278 
279 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
280 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
281 
282 /* Offset of nop instruction in bpf prog entry to be poked */
283 #define POKE_OFFSET (BTI_INSNS + 1)
284 
285 /* Tail call offset to jump into */
286 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
287 
288 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
289 {
290 	const struct bpf_prog *prog = ctx->prog;
291 	const bool is_main_prog = prog->aux->func_idx == 0;
292 	const u8 r6 = bpf2a64[BPF_REG_6];
293 	const u8 r7 = bpf2a64[BPF_REG_7];
294 	const u8 r8 = bpf2a64[BPF_REG_8];
295 	const u8 r9 = bpf2a64[BPF_REG_9];
296 	const u8 fp = bpf2a64[BPF_REG_FP];
297 	const u8 tcc = bpf2a64[TCALL_CNT];
298 	const u8 fpb = bpf2a64[FP_BOTTOM];
299 	const int idx0 = ctx->idx;
300 	int cur_offset;
301 
302 	/*
303 	 * BPF prog stack layout
304 	 *
305 	 *                         high
306 	 * original A64_SP =>   0:+-----+ BPF prologue
307 	 *                        |FP/LR|
308 	 * current A64_FP =>  -16:+-----+
309 	 *                        | ... | callee saved registers
310 	 * BPF fp register => -64:+-----+ <= (BPF_FP)
311 	 *                        |     |
312 	 *                        | ... | BPF prog stack
313 	 *                        |     |
314 	 *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
315 	 *                        |RSVD | padding
316 	 * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
317 	 *                        |     |
318 	 *                        | ... | Function call stack
319 	 *                        |     |
320 	 *                        +-----+
321 	 *                          low
322 	 *
323 	 */
324 
325 	/* bpf function may be invoked by 3 instruction types:
326 	 * 1. bl, attached via freplace to bpf prog via short jump
327 	 * 2. br, attached via freplace to bpf prog via long jump
328 	 * 3. blr, working as a function pointer, used by emit_call.
329 	 * So BTI_JC should used here to support both br and blr.
330 	 */
331 	emit_bti(A64_BTI_JC, ctx);
332 
333 	emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
334 	emit(A64_NOP, ctx);
335 
336 	/* Sign lr */
337 	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
338 		emit(A64_PACIASP, ctx);
339 
340 	/* Save FP and LR registers to stay align with ARM64 AAPCS */
341 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
342 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
343 
344 	/* Save callee-saved registers */
345 	emit(A64_PUSH(r6, r7, A64_SP), ctx);
346 	emit(A64_PUSH(r8, r9, A64_SP), ctx);
347 	emit(A64_PUSH(fp, tcc, A64_SP), ctx);
348 	emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
349 
350 	/* Set up BPF prog stack base register */
351 	emit(A64_MOV(1, fp, A64_SP), ctx);
352 
353 	if (!ebpf_from_cbpf && is_main_prog) {
354 		/* Initialize tail_call_cnt */
355 		emit(A64_MOVZ(1, tcc, 0, 0), ctx);
356 
357 		cur_offset = ctx->idx - idx0;
358 		if (cur_offset != PROLOGUE_OFFSET) {
359 			pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
360 				    cur_offset, PROLOGUE_OFFSET);
361 			return -1;
362 		}
363 
364 		/* BTI landing pad for the tail call, done with a BR */
365 		emit_bti(A64_BTI_J, ctx);
366 	}
367 
368 	emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
369 
370 	/* Stack must be multiples of 16B */
371 	ctx->stack_size = round_up(prog->aux->stack_depth, 16);
372 
373 	/* Set up function call stack */
374 	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
375 	return 0;
376 }
377 
378 static int out_offset = -1; /* initialized on the first pass of build_body() */
379 static int emit_bpf_tail_call(struct jit_ctx *ctx)
380 {
381 	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
382 	const u8 r2 = bpf2a64[BPF_REG_2];
383 	const u8 r3 = bpf2a64[BPF_REG_3];
384 
385 	const u8 tmp = bpf2a64[TMP_REG_1];
386 	const u8 prg = bpf2a64[TMP_REG_2];
387 	const u8 tcc = bpf2a64[TCALL_CNT];
388 	const int idx0 = ctx->idx;
389 #define cur_offset (ctx->idx - idx0)
390 #define jmp_offset (out_offset - (cur_offset))
391 	size_t off;
392 
393 	/* if (index >= array->map.max_entries)
394 	 *     goto out;
395 	 */
396 	off = offsetof(struct bpf_array, map.max_entries);
397 	emit_a64_mov_i64(tmp, off, ctx);
398 	emit(A64_LDR32(tmp, r2, tmp), ctx);
399 	emit(A64_MOV(0, r3, r3), ctx);
400 	emit(A64_CMP(0, r3, tmp), ctx);
401 	emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
402 
403 	/*
404 	 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
405 	 *     goto out;
406 	 * tail_call_cnt++;
407 	 */
408 	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
409 	emit(A64_CMP(1, tcc, tmp), ctx);
410 	emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
411 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
412 
413 	/* prog = array->ptrs[index];
414 	 * if (prog == NULL)
415 	 *     goto out;
416 	 */
417 	off = offsetof(struct bpf_array, ptrs);
418 	emit_a64_mov_i64(tmp, off, ctx);
419 	emit(A64_ADD(1, tmp, r2, tmp), ctx);
420 	emit(A64_LSL(1, prg, r3, 3), ctx);
421 	emit(A64_LDR64(prg, tmp, prg), ctx);
422 	emit(A64_CBZ(1, prg, jmp_offset), ctx);
423 
424 	/* goto *(prog->bpf_func + prologue_offset); */
425 	off = offsetof(struct bpf_prog, bpf_func);
426 	emit_a64_mov_i64(tmp, off, ctx);
427 	emit(A64_LDR64(tmp, prg, tmp), ctx);
428 	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
429 	emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
430 	emit(A64_BR(tmp), ctx);
431 
432 	/* out: */
433 	if (out_offset == -1)
434 		out_offset = cur_offset;
435 	if (cur_offset != out_offset) {
436 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
437 			    cur_offset, out_offset);
438 		return -1;
439 	}
440 	return 0;
441 #undef cur_offset
442 #undef jmp_offset
443 }
444 
445 #ifdef CONFIG_ARM64_LSE_ATOMICS
446 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
447 {
448 	const u8 code = insn->code;
449 	const u8 dst = bpf2a64[insn->dst_reg];
450 	const u8 src = bpf2a64[insn->src_reg];
451 	const u8 tmp = bpf2a64[TMP_REG_1];
452 	const u8 tmp2 = bpf2a64[TMP_REG_2];
453 	const bool isdw = BPF_SIZE(code) == BPF_DW;
454 	const s16 off = insn->off;
455 	u8 reg;
456 
457 	if (!off) {
458 		reg = dst;
459 	} else {
460 		emit_a64_mov_i(1, tmp, off, ctx);
461 		emit(A64_ADD(1, tmp, tmp, dst), ctx);
462 		reg = tmp;
463 	}
464 
465 	switch (insn->imm) {
466 	/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
467 	case BPF_ADD:
468 		emit(A64_STADD(isdw, reg, src), ctx);
469 		break;
470 	case BPF_AND:
471 		emit(A64_MVN(isdw, tmp2, src), ctx);
472 		emit(A64_STCLR(isdw, reg, tmp2), ctx);
473 		break;
474 	case BPF_OR:
475 		emit(A64_STSET(isdw, reg, src), ctx);
476 		break;
477 	case BPF_XOR:
478 		emit(A64_STEOR(isdw, reg, src), ctx);
479 		break;
480 	/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
481 	case BPF_ADD | BPF_FETCH:
482 		emit(A64_LDADDAL(isdw, src, reg, src), ctx);
483 		break;
484 	case BPF_AND | BPF_FETCH:
485 		emit(A64_MVN(isdw, tmp2, src), ctx);
486 		emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
487 		break;
488 	case BPF_OR | BPF_FETCH:
489 		emit(A64_LDSETAL(isdw, src, reg, src), ctx);
490 		break;
491 	case BPF_XOR | BPF_FETCH:
492 		emit(A64_LDEORAL(isdw, src, reg, src), ctx);
493 		break;
494 	/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
495 	case BPF_XCHG:
496 		emit(A64_SWPAL(isdw, src, reg, src), ctx);
497 		break;
498 	/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
499 	case BPF_CMPXCHG:
500 		emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
501 		break;
502 	default:
503 		pr_err_once("unknown atomic op code %02x\n", insn->imm);
504 		return -EINVAL;
505 	}
506 
507 	return 0;
508 }
509 #else
510 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
511 {
512 	return -EINVAL;
513 }
514 #endif
515 
516 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
517 {
518 	const u8 code = insn->code;
519 	const u8 dst = bpf2a64[insn->dst_reg];
520 	const u8 src = bpf2a64[insn->src_reg];
521 	const u8 tmp = bpf2a64[TMP_REG_1];
522 	const u8 tmp2 = bpf2a64[TMP_REG_2];
523 	const u8 tmp3 = bpf2a64[TMP_REG_3];
524 	const int i = insn - ctx->prog->insnsi;
525 	const s32 imm = insn->imm;
526 	const s16 off = insn->off;
527 	const bool isdw = BPF_SIZE(code) == BPF_DW;
528 	u8 reg;
529 	s32 jmp_offset;
530 
531 	if (!off) {
532 		reg = dst;
533 	} else {
534 		emit_a64_mov_i(1, tmp, off, ctx);
535 		emit(A64_ADD(1, tmp, tmp, dst), ctx);
536 		reg = tmp;
537 	}
538 
539 	if (imm == BPF_ADD || imm == BPF_AND ||
540 	    imm == BPF_OR || imm == BPF_XOR) {
541 		/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
542 		emit(A64_LDXR(isdw, tmp2, reg), ctx);
543 		if (imm == BPF_ADD)
544 			emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
545 		else if (imm == BPF_AND)
546 			emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
547 		else if (imm == BPF_OR)
548 			emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
549 		else
550 			emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
551 		emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
552 		jmp_offset = -3;
553 		check_imm19(jmp_offset);
554 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
555 	} else if (imm == (BPF_ADD | BPF_FETCH) ||
556 		   imm == (BPF_AND | BPF_FETCH) ||
557 		   imm == (BPF_OR | BPF_FETCH) ||
558 		   imm == (BPF_XOR | BPF_FETCH)) {
559 		/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
560 		const u8 ax = bpf2a64[BPF_REG_AX];
561 
562 		emit(A64_MOV(isdw, ax, src), ctx);
563 		emit(A64_LDXR(isdw, src, reg), ctx);
564 		if (imm == (BPF_ADD | BPF_FETCH))
565 			emit(A64_ADD(isdw, tmp2, src, ax), ctx);
566 		else if (imm == (BPF_AND | BPF_FETCH))
567 			emit(A64_AND(isdw, tmp2, src, ax), ctx);
568 		else if (imm == (BPF_OR | BPF_FETCH))
569 			emit(A64_ORR(isdw, tmp2, src, ax), ctx);
570 		else
571 			emit(A64_EOR(isdw, tmp2, src, ax), ctx);
572 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
573 		jmp_offset = -3;
574 		check_imm19(jmp_offset);
575 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
576 		emit(A64_DMB_ISH, ctx);
577 	} else if (imm == BPF_XCHG) {
578 		/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
579 		emit(A64_MOV(isdw, tmp2, src), ctx);
580 		emit(A64_LDXR(isdw, src, reg), ctx);
581 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
582 		jmp_offset = -2;
583 		check_imm19(jmp_offset);
584 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
585 		emit(A64_DMB_ISH, ctx);
586 	} else if (imm == BPF_CMPXCHG) {
587 		/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
588 		const u8 r0 = bpf2a64[BPF_REG_0];
589 
590 		emit(A64_MOV(isdw, tmp2, r0), ctx);
591 		emit(A64_LDXR(isdw, r0, reg), ctx);
592 		emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
593 		jmp_offset = 4;
594 		check_imm19(jmp_offset);
595 		emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
596 		emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
597 		jmp_offset = -4;
598 		check_imm19(jmp_offset);
599 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
600 		emit(A64_DMB_ISH, ctx);
601 	} else {
602 		pr_err_once("unknown atomic op code %02x\n", imm);
603 		return -EINVAL;
604 	}
605 
606 	return 0;
607 }
608 
609 void dummy_tramp(void);
610 
611 asm (
612 "	.pushsection .text, \"ax\", @progbits\n"
613 "	.global dummy_tramp\n"
614 "	.type dummy_tramp, %function\n"
615 "dummy_tramp:"
616 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
617 "	bti j\n" /* dummy_tramp is called via "br x10" */
618 #endif
619 "	mov x10, x30\n"
620 "	mov x30, x9\n"
621 "	ret x10\n"
622 "	.size dummy_tramp, .-dummy_tramp\n"
623 "	.popsection\n"
624 );
625 
626 /* build a plt initialized like this:
627  *
628  * plt:
629  *      ldr tmp, target
630  *      br tmp
631  * target:
632  *      .quad dummy_tramp
633  *
634  * when a long jump trampoline is attached, target is filled with the
635  * trampoline address, and when the trampoline is removed, target is
636  * restored to dummy_tramp address.
637  */
638 static void build_plt(struct jit_ctx *ctx)
639 {
640 	const u8 tmp = bpf2a64[TMP_REG_1];
641 	struct bpf_plt *plt = NULL;
642 
643 	/* make sure target is 64-bit aligned */
644 	if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
645 		emit(A64_NOP, ctx);
646 
647 	plt = (struct bpf_plt *)(ctx->image + ctx->idx);
648 	/* plt is called via bl, no BTI needed here */
649 	emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
650 	emit(A64_BR(tmp), ctx);
651 
652 	if (ctx->image)
653 		plt->target = (u64)&dummy_tramp;
654 }
655 
656 static void build_epilogue(struct jit_ctx *ctx)
657 {
658 	const u8 r0 = bpf2a64[BPF_REG_0];
659 	const u8 r6 = bpf2a64[BPF_REG_6];
660 	const u8 r7 = bpf2a64[BPF_REG_7];
661 	const u8 r8 = bpf2a64[BPF_REG_8];
662 	const u8 r9 = bpf2a64[BPF_REG_9];
663 	const u8 fp = bpf2a64[BPF_REG_FP];
664 	const u8 fpb = bpf2a64[FP_BOTTOM];
665 
666 	/* We're done with BPF stack */
667 	emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
668 
669 	/* Restore x27 and x28 */
670 	emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
671 	/* Restore fs (x25) and x26 */
672 	emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
673 
674 	/* Restore callee-saved register */
675 	emit(A64_POP(r8, r9, A64_SP), ctx);
676 	emit(A64_POP(r6, r7, A64_SP), ctx);
677 
678 	/* Restore FP/LR registers */
679 	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
680 
681 	/* Set return value */
682 	emit(A64_MOV(1, A64_R(0), r0), ctx);
683 
684 	/* Authenticate lr */
685 	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
686 		emit(A64_AUTIASP, ctx);
687 
688 	emit(A64_RET(A64_LR), ctx);
689 }
690 
691 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
692 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
693 
694 bool ex_handler_bpf(const struct exception_table_entry *ex,
695 		    struct pt_regs *regs)
696 {
697 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
698 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
699 
700 	regs->regs[dst_reg] = 0;
701 	regs->pc = (unsigned long)&ex->fixup - offset;
702 	return true;
703 }
704 
705 /* For accesses to BTF pointers, add an entry to the exception table */
706 static int add_exception_handler(const struct bpf_insn *insn,
707 				 struct jit_ctx *ctx,
708 				 int dst_reg)
709 {
710 	off_t offset;
711 	unsigned long pc;
712 	struct exception_table_entry *ex;
713 
714 	if (!ctx->image)
715 		/* First pass */
716 		return 0;
717 
718 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM)
719 		return 0;
720 
721 	if (!ctx->prog->aux->extable ||
722 	    WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
723 		return -EINVAL;
724 
725 	ex = &ctx->prog->aux->extable[ctx->exentry_idx];
726 	pc = (unsigned long)&ctx->image[ctx->idx - 1];
727 
728 	offset = pc - (long)&ex->insn;
729 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
730 		return -ERANGE;
731 	ex->insn = offset;
732 
733 	/*
734 	 * Since the extable follows the program, the fixup offset is always
735 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
736 	 * to keep things simple, and put the destination register in the upper
737 	 * bits. We don't need to worry about buildtime or runtime sort
738 	 * modifying the upper bits because the table is already sorted, and
739 	 * isn't part of the main exception table.
740 	 */
741 	offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
742 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
743 		return -ERANGE;
744 
745 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
746 		    FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
747 
748 	ex->type = EX_TYPE_BPF;
749 
750 	ctx->exentry_idx++;
751 	return 0;
752 }
753 
754 /* JITs an eBPF instruction.
755  * Returns:
756  * 0  - successfully JITed an 8-byte eBPF instruction.
757  * >0 - successfully JITed a 16-byte eBPF instruction.
758  * <0 - failed to JIT.
759  */
760 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
761 		      bool extra_pass)
762 {
763 	const u8 code = insn->code;
764 	const u8 dst = bpf2a64[insn->dst_reg];
765 	const u8 src = bpf2a64[insn->src_reg];
766 	const u8 tmp = bpf2a64[TMP_REG_1];
767 	const u8 tmp2 = bpf2a64[TMP_REG_2];
768 	const u8 fp = bpf2a64[BPF_REG_FP];
769 	const u8 fpb = bpf2a64[FP_BOTTOM];
770 	const s16 off = insn->off;
771 	const s32 imm = insn->imm;
772 	const int i = insn - ctx->prog->insnsi;
773 	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
774 			  BPF_CLASS(code) == BPF_JMP;
775 	u8 jmp_cond;
776 	s32 jmp_offset;
777 	u32 a64_insn;
778 	u8 src_adj;
779 	u8 dst_adj;
780 	int off_adj;
781 	int ret;
782 
783 	switch (code) {
784 	/* dst = src */
785 	case BPF_ALU | BPF_MOV | BPF_X:
786 	case BPF_ALU64 | BPF_MOV | BPF_X:
787 		emit(A64_MOV(is64, dst, src), ctx);
788 		break;
789 	/* dst = dst OP src */
790 	case BPF_ALU | BPF_ADD | BPF_X:
791 	case BPF_ALU64 | BPF_ADD | BPF_X:
792 		emit(A64_ADD(is64, dst, dst, src), ctx);
793 		break;
794 	case BPF_ALU | BPF_SUB | BPF_X:
795 	case BPF_ALU64 | BPF_SUB | BPF_X:
796 		emit(A64_SUB(is64, dst, dst, src), ctx);
797 		break;
798 	case BPF_ALU | BPF_AND | BPF_X:
799 	case BPF_ALU64 | BPF_AND | BPF_X:
800 		emit(A64_AND(is64, dst, dst, src), ctx);
801 		break;
802 	case BPF_ALU | BPF_OR | BPF_X:
803 	case BPF_ALU64 | BPF_OR | BPF_X:
804 		emit(A64_ORR(is64, dst, dst, src), ctx);
805 		break;
806 	case BPF_ALU | BPF_XOR | BPF_X:
807 	case BPF_ALU64 | BPF_XOR | BPF_X:
808 		emit(A64_EOR(is64, dst, dst, src), ctx);
809 		break;
810 	case BPF_ALU | BPF_MUL | BPF_X:
811 	case BPF_ALU64 | BPF_MUL | BPF_X:
812 		emit(A64_MUL(is64, dst, dst, src), ctx);
813 		break;
814 	case BPF_ALU | BPF_DIV | BPF_X:
815 	case BPF_ALU64 | BPF_DIV | BPF_X:
816 		emit(A64_UDIV(is64, dst, dst, src), ctx);
817 		break;
818 	case BPF_ALU | BPF_MOD | BPF_X:
819 	case BPF_ALU64 | BPF_MOD | BPF_X:
820 		emit(A64_UDIV(is64, tmp, dst, src), ctx);
821 		emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
822 		break;
823 	case BPF_ALU | BPF_LSH | BPF_X:
824 	case BPF_ALU64 | BPF_LSH | BPF_X:
825 		emit(A64_LSLV(is64, dst, dst, src), ctx);
826 		break;
827 	case BPF_ALU | BPF_RSH | BPF_X:
828 	case BPF_ALU64 | BPF_RSH | BPF_X:
829 		emit(A64_LSRV(is64, dst, dst, src), ctx);
830 		break;
831 	case BPF_ALU | BPF_ARSH | BPF_X:
832 	case BPF_ALU64 | BPF_ARSH | BPF_X:
833 		emit(A64_ASRV(is64, dst, dst, src), ctx);
834 		break;
835 	/* dst = -dst */
836 	case BPF_ALU | BPF_NEG:
837 	case BPF_ALU64 | BPF_NEG:
838 		emit(A64_NEG(is64, dst, dst), ctx);
839 		break;
840 	/* dst = BSWAP##imm(dst) */
841 	case BPF_ALU | BPF_END | BPF_FROM_LE:
842 	case BPF_ALU | BPF_END | BPF_FROM_BE:
843 #ifdef CONFIG_CPU_BIG_ENDIAN
844 		if (BPF_SRC(code) == BPF_FROM_BE)
845 			goto emit_bswap_uxt;
846 #else /* !CONFIG_CPU_BIG_ENDIAN */
847 		if (BPF_SRC(code) == BPF_FROM_LE)
848 			goto emit_bswap_uxt;
849 #endif
850 		switch (imm) {
851 		case 16:
852 			emit(A64_REV16(is64, dst, dst), ctx);
853 			/* zero-extend 16 bits into 64 bits */
854 			emit(A64_UXTH(is64, dst, dst), ctx);
855 			break;
856 		case 32:
857 			emit(A64_REV32(is64, dst, dst), ctx);
858 			/* upper 32 bits already cleared */
859 			break;
860 		case 64:
861 			emit(A64_REV64(dst, dst), ctx);
862 			break;
863 		}
864 		break;
865 emit_bswap_uxt:
866 		switch (imm) {
867 		case 16:
868 			/* zero-extend 16 bits into 64 bits */
869 			emit(A64_UXTH(is64, dst, dst), ctx);
870 			break;
871 		case 32:
872 			/* zero-extend 32 bits into 64 bits */
873 			emit(A64_UXTW(is64, dst, dst), ctx);
874 			break;
875 		case 64:
876 			/* nop */
877 			break;
878 		}
879 		break;
880 	/* dst = imm */
881 	case BPF_ALU | BPF_MOV | BPF_K:
882 	case BPF_ALU64 | BPF_MOV | BPF_K:
883 		emit_a64_mov_i(is64, dst, imm, ctx);
884 		break;
885 	/* dst = dst OP imm */
886 	case BPF_ALU | BPF_ADD | BPF_K:
887 	case BPF_ALU64 | BPF_ADD | BPF_K:
888 		if (is_addsub_imm(imm)) {
889 			emit(A64_ADD_I(is64, dst, dst, imm), ctx);
890 		} else if (is_addsub_imm(-imm)) {
891 			emit(A64_SUB_I(is64, dst, dst, -imm), ctx);
892 		} else {
893 			emit_a64_mov_i(is64, tmp, imm, ctx);
894 			emit(A64_ADD(is64, dst, dst, tmp), ctx);
895 		}
896 		break;
897 	case BPF_ALU | BPF_SUB | BPF_K:
898 	case BPF_ALU64 | BPF_SUB | BPF_K:
899 		if (is_addsub_imm(imm)) {
900 			emit(A64_SUB_I(is64, dst, dst, imm), ctx);
901 		} else if (is_addsub_imm(-imm)) {
902 			emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
903 		} else {
904 			emit_a64_mov_i(is64, tmp, imm, ctx);
905 			emit(A64_SUB(is64, dst, dst, tmp), ctx);
906 		}
907 		break;
908 	case BPF_ALU | BPF_AND | BPF_K:
909 	case BPF_ALU64 | BPF_AND | BPF_K:
910 		a64_insn = A64_AND_I(is64, dst, dst, imm);
911 		if (a64_insn != AARCH64_BREAK_FAULT) {
912 			emit(a64_insn, ctx);
913 		} else {
914 			emit_a64_mov_i(is64, tmp, imm, ctx);
915 			emit(A64_AND(is64, dst, dst, tmp), ctx);
916 		}
917 		break;
918 	case BPF_ALU | BPF_OR | BPF_K:
919 	case BPF_ALU64 | BPF_OR | BPF_K:
920 		a64_insn = A64_ORR_I(is64, dst, dst, imm);
921 		if (a64_insn != AARCH64_BREAK_FAULT) {
922 			emit(a64_insn, ctx);
923 		} else {
924 			emit_a64_mov_i(is64, tmp, imm, ctx);
925 			emit(A64_ORR(is64, dst, dst, tmp), ctx);
926 		}
927 		break;
928 	case BPF_ALU | BPF_XOR | BPF_K:
929 	case BPF_ALU64 | BPF_XOR | BPF_K:
930 		a64_insn = A64_EOR_I(is64, dst, dst, imm);
931 		if (a64_insn != AARCH64_BREAK_FAULT) {
932 			emit(a64_insn, ctx);
933 		} else {
934 			emit_a64_mov_i(is64, tmp, imm, ctx);
935 			emit(A64_EOR(is64, dst, dst, tmp), ctx);
936 		}
937 		break;
938 	case BPF_ALU | BPF_MUL | BPF_K:
939 	case BPF_ALU64 | BPF_MUL | BPF_K:
940 		emit_a64_mov_i(is64, tmp, imm, ctx);
941 		emit(A64_MUL(is64, dst, dst, tmp), ctx);
942 		break;
943 	case BPF_ALU | BPF_DIV | BPF_K:
944 	case BPF_ALU64 | BPF_DIV | BPF_K:
945 		emit_a64_mov_i(is64, tmp, imm, ctx);
946 		emit(A64_UDIV(is64, dst, dst, tmp), ctx);
947 		break;
948 	case BPF_ALU | BPF_MOD | BPF_K:
949 	case BPF_ALU64 | BPF_MOD | BPF_K:
950 		emit_a64_mov_i(is64, tmp2, imm, ctx);
951 		emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
952 		emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
953 		break;
954 	case BPF_ALU | BPF_LSH | BPF_K:
955 	case BPF_ALU64 | BPF_LSH | BPF_K:
956 		emit(A64_LSL(is64, dst, dst, imm), ctx);
957 		break;
958 	case BPF_ALU | BPF_RSH | BPF_K:
959 	case BPF_ALU64 | BPF_RSH | BPF_K:
960 		emit(A64_LSR(is64, dst, dst, imm), ctx);
961 		break;
962 	case BPF_ALU | BPF_ARSH | BPF_K:
963 	case BPF_ALU64 | BPF_ARSH | BPF_K:
964 		emit(A64_ASR(is64, dst, dst, imm), ctx);
965 		break;
966 
967 	/* JUMP off */
968 	case BPF_JMP | BPF_JA:
969 		jmp_offset = bpf2a64_offset(i, off, ctx);
970 		check_imm26(jmp_offset);
971 		emit(A64_B(jmp_offset), ctx);
972 		break;
973 	/* IF (dst COND src) JUMP off */
974 	case BPF_JMP | BPF_JEQ | BPF_X:
975 	case BPF_JMP | BPF_JGT | BPF_X:
976 	case BPF_JMP | BPF_JLT | BPF_X:
977 	case BPF_JMP | BPF_JGE | BPF_X:
978 	case BPF_JMP | BPF_JLE | BPF_X:
979 	case BPF_JMP | BPF_JNE | BPF_X:
980 	case BPF_JMP | BPF_JSGT | BPF_X:
981 	case BPF_JMP | BPF_JSLT | BPF_X:
982 	case BPF_JMP | BPF_JSGE | BPF_X:
983 	case BPF_JMP | BPF_JSLE | BPF_X:
984 	case BPF_JMP32 | BPF_JEQ | BPF_X:
985 	case BPF_JMP32 | BPF_JGT | BPF_X:
986 	case BPF_JMP32 | BPF_JLT | BPF_X:
987 	case BPF_JMP32 | BPF_JGE | BPF_X:
988 	case BPF_JMP32 | BPF_JLE | BPF_X:
989 	case BPF_JMP32 | BPF_JNE | BPF_X:
990 	case BPF_JMP32 | BPF_JSGT | BPF_X:
991 	case BPF_JMP32 | BPF_JSLT | BPF_X:
992 	case BPF_JMP32 | BPF_JSGE | BPF_X:
993 	case BPF_JMP32 | BPF_JSLE | BPF_X:
994 		emit(A64_CMP(is64, dst, src), ctx);
995 emit_cond_jmp:
996 		jmp_offset = bpf2a64_offset(i, off, ctx);
997 		check_imm19(jmp_offset);
998 		switch (BPF_OP(code)) {
999 		case BPF_JEQ:
1000 			jmp_cond = A64_COND_EQ;
1001 			break;
1002 		case BPF_JGT:
1003 			jmp_cond = A64_COND_HI;
1004 			break;
1005 		case BPF_JLT:
1006 			jmp_cond = A64_COND_CC;
1007 			break;
1008 		case BPF_JGE:
1009 			jmp_cond = A64_COND_CS;
1010 			break;
1011 		case BPF_JLE:
1012 			jmp_cond = A64_COND_LS;
1013 			break;
1014 		case BPF_JSET:
1015 		case BPF_JNE:
1016 			jmp_cond = A64_COND_NE;
1017 			break;
1018 		case BPF_JSGT:
1019 			jmp_cond = A64_COND_GT;
1020 			break;
1021 		case BPF_JSLT:
1022 			jmp_cond = A64_COND_LT;
1023 			break;
1024 		case BPF_JSGE:
1025 			jmp_cond = A64_COND_GE;
1026 			break;
1027 		case BPF_JSLE:
1028 			jmp_cond = A64_COND_LE;
1029 			break;
1030 		default:
1031 			return -EFAULT;
1032 		}
1033 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
1034 		break;
1035 	case BPF_JMP | BPF_JSET | BPF_X:
1036 	case BPF_JMP32 | BPF_JSET | BPF_X:
1037 		emit(A64_TST(is64, dst, src), ctx);
1038 		goto emit_cond_jmp;
1039 	/* IF (dst COND imm) JUMP off */
1040 	case BPF_JMP | BPF_JEQ | BPF_K:
1041 	case BPF_JMP | BPF_JGT | BPF_K:
1042 	case BPF_JMP | BPF_JLT | BPF_K:
1043 	case BPF_JMP | BPF_JGE | BPF_K:
1044 	case BPF_JMP | BPF_JLE | BPF_K:
1045 	case BPF_JMP | BPF_JNE | BPF_K:
1046 	case BPF_JMP | BPF_JSGT | BPF_K:
1047 	case BPF_JMP | BPF_JSLT | BPF_K:
1048 	case BPF_JMP | BPF_JSGE | BPF_K:
1049 	case BPF_JMP | BPF_JSLE | BPF_K:
1050 	case BPF_JMP32 | BPF_JEQ | BPF_K:
1051 	case BPF_JMP32 | BPF_JGT | BPF_K:
1052 	case BPF_JMP32 | BPF_JLT | BPF_K:
1053 	case BPF_JMP32 | BPF_JGE | BPF_K:
1054 	case BPF_JMP32 | BPF_JLE | BPF_K:
1055 	case BPF_JMP32 | BPF_JNE | BPF_K:
1056 	case BPF_JMP32 | BPF_JSGT | BPF_K:
1057 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1058 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1059 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1060 		if (is_addsub_imm(imm)) {
1061 			emit(A64_CMP_I(is64, dst, imm), ctx);
1062 		} else if (is_addsub_imm(-imm)) {
1063 			emit(A64_CMN_I(is64, dst, -imm), ctx);
1064 		} else {
1065 			emit_a64_mov_i(is64, tmp, imm, ctx);
1066 			emit(A64_CMP(is64, dst, tmp), ctx);
1067 		}
1068 		goto emit_cond_jmp;
1069 	case BPF_JMP | BPF_JSET | BPF_K:
1070 	case BPF_JMP32 | BPF_JSET | BPF_K:
1071 		a64_insn = A64_TST_I(is64, dst, imm);
1072 		if (a64_insn != AARCH64_BREAK_FAULT) {
1073 			emit(a64_insn, ctx);
1074 		} else {
1075 			emit_a64_mov_i(is64, tmp, imm, ctx);
1076 			emit(A64_TST(is64, dst, tmp), ctx);
1077 		}
1078 		goto emit_cond_jmp;
1079 	/* function call */
1080 	case BPF_JMP | BPF_CALL:
1081 	{
1082 		const u8 r0 = bpf2a64[BPF_REG_0];
1083 		bool func_addr_fixed;
1084 		u64 func_addr;
1085 
1086 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1087 					    &func_addr, &func_addr_fixed);
1088 		if (ret < 0)
1089 			return ret;
1090 		emit_call(func_addr, ctx);
1091 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
1092 		break;
1093 	}
1094 	/* tail call */
1095 	case BPF_JMP | BPF_TAIL_CALL:
1096 		if (emit_bpf_tail_call(ctx))
1097 			return -EFAULT;
1098 		break;
1099 	/* function return */
1100 	case BPF_JMP | BPF_EXIT:
1101 		/* Optimization: when last instruction is EXIT,
1102 		   simply fallthrough to epilogue. */
1103 		if (i == ctx->prog->len - 1)
1104 			break;
1105 		jmp_offset = epilogue_offset(ctx);
1106 		check_imm26(jmp_offset);
1107 		emit(A64_B(jmp_offset), ctx);
1108 		break;
1109 
1110 	/* dst = imm64 */
1111 	case BPF_LD | BPF_IMM | BPF_DW:
1112 	{
1113 		const struct bpf_insn insn1 = insn[1];
1114 		u64 imm64;
1115 
1116 		imm64 = (u64)insn1.imm << 32 | (u32)imm;
1117 		if (bpf_pseudo_func(insn))
1118 			emit_addr_mov_i64(dst, imm64, ctx);
1119 		else
1120 			emit_a64_mov_i64(dst, imm64, ctx);
1121 
1122 		return 1;
1123 	}
1124 
1125 	/* LDX: dst = *(size *)(src + off) */
1126 	case BPF_LDX | BPF_MEM | BPF_W:
1127 	case BPF_LDX | BPF_MEM | BPF_H:
1128 	case BPF_LDX | BPF_MEM | BPF_B:
1129 	case BPF_LDX | BPF_MEM | BPF_DW:
1130 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1131 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1132 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1133 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1134 		if (ctx->fpb_offset > 0 && src == fp) {
1135 			src_adj = fpb;
1136 			off_adj = off + ctx->fpb_offset;
1137 		} else {
1138 			src_adj = src;
1139 			off_adj = off;
1140 		}
1141 		switch (BPF_SIZE(code)) {
1142 		case BPF_W:
1143 			if (is_lsi_offset(off_adj, 2)) {
1144 				emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
1145 			} else {
1146 				emit_a64_mov_i(1, tmp, off, ctx);
1147 				emit(A64_LDR32(dst, src, tmp), ctx);
1148 			}
1149 			break;
1150 		case BPF_H:
1151 			if (is_lsi_offset(off_adj, 1)) {
1152 				emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
1153 			} else {
1154 				emit_a64_mov_i(1, tmp, off, ctx);
1155 				emit(A64_LDRH(dst, src, tmp), ctx);
1156 			}
1157 			break;
1158 		case BPF_B:
1159 			if (is_lsi_offset(off_adj, 0)) {
1160 				emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
1161 			} else {
1162 				emit_a64_mov_i(1, tmp, off, ctx);
1163 				emit(A64_LDRB(dst, src, tmp), ctx);
1164 			}
1165 			break;
1166 		case BPF_DW:
1167 			if (is_lsi_offset(off_adj, 3)) {
1168 				emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
1169 			} else {
1170 				emit_a64_mov_i(1, tmp, off, ctx);
1171 				emit(A64_LDR64(dst, src, tmp), ctx);
1172 			}
1173 			break;
1174 		}
1175 
1176 		ret = add_exception_handler(insn, ctx, dst);
1177 		if (ret)
1178 			return ret;
1179 		break;
1180 
1181 	/* speculation barrier */
1182 	case BPF_ST | BPF_NOSPEC:
1183 		/*
1184 		 * Nothing required here.
1185 		 *
1186 		 * In case of arm64, we rely on the firmware mitigation of
1187 		 * Speculative Store Bypass as controlled via the ssbd kernel
1188 		 * parameter. Whenever the mitigation is enabled, it works
1189 		 * for all of the kernel code with no need to provide any
1190 		 * additional instructions.
1191 		 */
1192 		break;
1193 
1194 	/* ST: *(size *)(dst + off) = imm */
1195 	case BPF_ST | BPF_MEM | BPF_W:
1196 	case BPF_ST | BPF_MEM | BPF_H:
1197 	case BPF_ST | BPF_MEM | BPF_B:
1198 	case BPF_ST | BPF_MEM | BPF_DW:
1199 		if (ctx->fpb_offset > 0 && dst == fp) {
1200 			dst_adj = fpb;
1201 			off_adj = off + ctx->fpb_offset;
1202 		} else {
1203 			dst_adj = dst;
1204 			off_adj = off;
1205 		}
1206 		/* Load imm to a register then store it */
1207 		emit_a64_mov_i(1, tmp, imm, ctx);
1208 		switch (BPF_SIZE(code)) {
1209 		case BPF_W:
1210 			if (is_lsi_offset(off_adj, 2)) {
1211 				emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
1212 			} else {
1213 				emit_a64_mov_i(1, tmp2, off, ctx);
1214 				emit(A64_STR32(tmp, dst, tmp2), ctx);
1215 			}
1216 			break;
1217 		case BPF_H:
1218 			if (is_lsi_offset(off_adj, 1)) {
1219 				emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
1220 			} else {
1221 				emit_a64_mov_i(1, tmp2, off, ctx);
1222 				emit(A64_STRH(tmp, dst, tmp2), ctx);
1223 			}
1224 			break;
1225 		case BPF_B:
1226 			if (is_lsi_offset(off_adj, 0)) {
1227 				emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
1228 			} else {
1229 				emit_a64_mov_i(1, tmp2, off, ctx);
1230 				emit(A64_STRB(tmp, dst, tmp2), ctx);
1231 			}
1232 			break;
1233 		case BPF_DW:
1234 			if (is_lsi_offset(off_adj, 3)) {
1235 				emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
1236 			} else {
1237 				emit_a64_mov_i(1, tmp2, off, ctx);
1238 				emit(A64_STR64(tmp, dst, tmp2), ctx);
1239 			}
1240 			break;
1241 		}
1242 		break;
1243 
1244 	/* STX: *(size *)(dst + off) = src */
1245 	case BPF_STX | BPF_MEM | BPF_W:
1246 	case BPF_STX | BPF_MEM | BPF_H:
1247 	case BPF_STX | BPF_MEM | BPF_B:
1248 	case BPF_STX | BPF_MEM | BPF_DW:
1249 		if (ctx->fpb_offset > 0 && dst == fp) {
1250 			dst_adj = fpb;
1251 			off_adj = off + ctx->fpb_offset;
1252 		} else {
1253 			dst_adj = dst;
1254 			off_adj = off;
1255 		}
1256 		switch (BPF_SIZE(code)) {
1257 		case BPF_W:
1258 			if (is_lsi_offset(off_adj, 2)) {
1259 				emit(A64_STR32I(src, dst_adj, off_adj), ctx);
1260 			} else {
1261 				emit_a64_mov_i(1, tmp, off, ctx);
1262 				emit(A64_STR32(src, dst, tmp), ctx);
1263 			}
1264 			break;
1265 		case BPF_H:
1266 			if (is_lsi_offset(off_adj, 1)) {
1267 				emit(A64_STRHI(src, dst_adj, off_adj), ctx);
1268 			} else {
1269 				emit_a64_mov_i(1, tmp, off, ctx);
1270 				emit(A64_STRH(src, dst, tmp), ctx);
1271 			}
1272 			break;
1273 		case BPF_B:
1274 			if (is_lsi_offset(off_adj, 0)) {
1275 				emit(A64_STRBI(src, dst_adj, off_adj), ctx);
1276 			} else {
1277 				emit_a64_mov_i(1, tmp, off, ctx);
1278 				emit(A64_STRB(src, dst, tmp), ctx);
1279 			}
1280 			break;
1281 		case BPF_DW:
1282 			if (is_lsi_offset(off_adj, 3)) {
1283 				emit(A64_STR64I(src, dst_adj, off_adj), ctx);
1284 			} else {
1285 				emit_a64_mov_i(1, tmp, off, ctx);
1286 				emit(A64_STR64(src, dst, tmp), ctx);
1287 			}
1288 			break;
1289 		}
1290 		break;
1291 
1292 	case BPF_STX | BPF_ATOMIC | BPF_W:
1293 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1294 		if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1295 			ret = emit_lse_atomic(insn, ctx);
1296 		else
1297 			ret = emit_ll_sc_atomic(insn, ctx);
1298 		if (ret)
1299 			return ret;
1300 		break;
1301 
1302 	default:
1303 		pr_err_once("unknown opcode %02x\n", code);
1304 		return -EINVAL;
1305 	}
1306 
1307 	return 0;
1308 }
1309 
1310 /*
1311  * Return 0 if FP may change at runtime, otherwise find the minimum negative
1312  * offset to FP, converts it to positive number, and align down to 8 bytes.
1313  */
1314 static int find_fpb_offset(struct bpf_prog *prog)
1315 {
1316 	int i;
1317 	int offset = 0;
1318 
1319 	for (i = 0; i < prog->len; i++) {
1320 		const struct bpf_insn *insn = &prog->insnsi[i];
1321 		const u8 class = BPF_CLASS(insn->code);
1322 		const u8 mode = BPF_MODE(insn->code);
1323 		const u8 src = insn->src_reg;
1324 		const u8 dst = insn->dst_reg;
1325 		const s32 imm = insn->imm;
1326 		const s16 off = insn->off;
1327 
1328 		switch (class) {
1329 		case BPF_STX:
1330 		case BPF_ST:
1331 			/* fp holds atomic operation result */
1332 			if (class == BPF_STX && mode == BPF_ATOMIC &&
1333 			    ((imm == BPF_XCHG ||
1334 			      imm == (BPF_FETCH | BPF_ADD) ||
1335 			      imm == (BPF_FETCH | BPF_AND) ||
1336 			      imm == (BPF_FETCH | BPF_XOR) ||
1337 			      imm == (BPF_FETCH | BPF_OR)) &&
1338 			     src == BPF_REG_FP))
1339 				return 0;
1340 
1341 			if (mode == BPF_MEM && dst == BPF_REG_FP &&
1342 			    off < offset)
1343 				offset = insn->off;
1344 			break;
1345 
1346 		case BPF_JMP32:
1347 		case BPF_JMP:
1348 			break;
1349 
1350 		case BPF_LDX:
1351 		case BPF_LD:
1352 			/* fp holds load result */
1353 			if (dst == BPF_REG_FP)
1354 				return 0;
1355 
1356 			if (class == BPF_LDX && mode == BPF_MEM &&
1357 			    src == BPF_REG_FP && off < offset)
1358 				offset = off;
1359 			break;
1360 
1361 		case BPF_ALU:
1362 		case BPF_ALU64:
1363 		default:
1364 			/* fp holds ALU result */
1365 			if (dst == BPF_REG_FP)
1366 				return 0;
1367 		}
1368 	}
1369 
1370 	if (offset < 0) {
1371 		/*
1372 		 * safely be converted to a positive 'int', since insn->off
1373 		 * is 's16'
1374 		 */
1375 		offset = -offset;
1376 		/* align down to 8 bytes */
1377 		offset = ALIGN_DOWN(offset, 8);
1378 	}
1379 
1380 	return offset;
1381 }
1382 
1383 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1384 {
1385 	const struct bpf_prog *prog = ctx->prog;
1386 	int i;
1387 
1388 	/*
1389 	 * - offset[0] offset of the end of prologue,
1390 	 *   start of the 1st instruction.
1391 	 * - offset[1] - offset of the end of 1st instruction,
1392 	 *   start of the 2nd instruction
1393 	 * [....]
1394 	 * - offset[3] - offset of the end of 3rd instruction,
1395 	 *   start of 4th instruction
1396 	 */
1397 	for (i = 0; i < prog->len; i++) {
1398 		const struct bpf_insn *insn = &prog->insnsi[i];
1399 		int ret;
1400 
1401 		if (ctx->image == NULL)
1402 			ctx->offset[i] = ctx->idx;
1403 		ret = build_insn(insn, ctx, extra_pass);
1404 		if (ret > 0) {
1405 			i++;
1406 			if (ctx->image == NULL)
1407 				ctx->offset[i] = ctx->idx;
1408 			continue;
1409 		}
1410 		if (ret)
1411 			return ret;
1412 	}
1413 	/*
1414 	 * offset is allocated with prog->len + 1 so fill in
1415 	 * the last element with the offset after the last
1416 	 * instruction (end of program)
1417 	 */
1418 	if (ctx->image == NULL)
1419 		ctx->offset[i] = ctx->idx;
1420 
1421 	return 0;
1422 }
1423 
1424 static int validate_code(struct jit_ctx *ctx)
1425 {
1426 	int i;
1427 
1428 	for (i = 0; i < ctx->idx; i++) {
1429 		u32 a64_insn = le32_to_cpu(ctx->image[i]);
1430 
1431 		if (a64_insn == AARCH64_BREAK_FAULT)
1432 			return -1;
1433 	}
1434 	return 0;
1435 }
1436 
1437 static int validate_ctx(struct jit_ctx *ctx)
1438 {
1439 	if (validate_code(ctx))
1440 		return -1;
1441 
1442 	if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
1443 		return -1;
1444 
1445 	return 0;
1446 }
1447 
1448 static inline void bpf_flush_icache(void *start, void *end)
1449 {
1450 	flush_icache_range((unsigned long)start, (unsigned long)end);
1451 }
1452 
1453 struct arm64_jit_data {
1454 	struct bpf_binary_header *header;
1455 	u8 *image;
1456 	struct jit_ctx ctx;
1457 };
1458 
1459 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1460 {
1461 	int image_size, prog_size, extable_size, extable_align, extable_offset;
1462 	struct bpf_prog *tmp, *orig_prog = prog;
1463 	struct bpf_binary_header *header;
1464 	struct arm64_jit_data *jit_data;
1465 	bool was_classic = bpf_prog_was_classic(prog);
1466 	bool tmp_blinded = false;
1467 	bool extra_pass = false;
1468 	struct jit_ctx ctx;
1469 	u8 *image_ptr;
1470 
1471 	if (!prog->jit_requested)
1472 		return orig_prog;
1473 
1474 	tmp = bpf_jit_blind_constants(prog);
1475 	/* If blinding was requested and we failed during blinding,
1476 	 * we must fall back to the interpreter.
1477 	 */
1478 	if (IS_ERR(tmp))
1479 		return orig_prog;
1480 	if (tmp != prog) {
1481 		tmp_blinded = true;
1482 		prog = tmp;
1483 	}
1484 
1485 	jit_data = prog->aux->jit_data;
1486 	if (!jit_data) {
1487 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1488 		if (!jit_data) {
1489 			prog = orig_prog;
1490 			goto out;
1491 		}
1492 		prog->aux->jit_data = jit_data;
1493 	}
1494 	if (jit_data->ctx.offset) {
1495 		ctx = jit_data->ctx;
1496 		image_ptr = jit_data->image;
1497 		header = jit_data->header;
1498 		extra_pass = true;
1499 		prog_size = sizeof(u32) * ctx.idx;
1500 		goto skip_init_ctx;
1501 	}
1502 	memset(&ctx, 0, sizeof(ctx));
1503 	ctx.prog = prog;
1504 
1505 	ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
1506 	if (ctx.offset == NULL) {
1507 		prog = orig_prog;
1508 		goto out_off;
1509 	}
1510 
1511 	ctx.fpb_offset = find_fpb_offset(prog);
1512 
1513 	/*
1514 	 * 1. Initial fake pass to compute ctx->idx and ctx->offset.
1515 	 *
1516 	 * BPF line info needs ctx->offset[i] to be the offset of
1517 	 * instruction[i] in jited image, so build prologue first.
1518 	 */
1519 	if (build_prologue(&ctx, was_classic)) {
1520 		prog = orig_prog;
1521 		goto out_off;
1522 	}
1523 
1524 	if (build_body(&ctx, extra_pass)) {
1525 		prog = orig_prog;
1526 		goto out_off;
1527 	}
1528 
1529 	ctx.epilogue_offset = ctx.idx;
1530 	build_epilogue(&ctx);
1531 	build_plt(&ctx);
1532 
1533 	extable_align = __alignof__(struct exception_table_entry);
1534 	extable_size = prog->aux->num_exentries *
1535 		sizeof(struct exception_table_entry);
1536 
1537 	/* Now we know the actual image size. */
1538 	prog_size = sizeof(u32) * ctx.idx;
1539 	/* also allocate space for plt target */
1540 	extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
1541 	image_size = extable_offset + extable_size;
1542 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1543 				      sizeof(u32), jit_fill_hole);
1544 	if (header == NULL) {
1545 		prog = orig_prog;
1546 		goto out_off;
1547 	}
1548 
1549 	/* 2. Now, the actual pass. */
1550 
1551 	ctx.image = (__le32 *)image_ptr;
1552 	if (extable_size)
1553 		prog->aux->extable = (void *)image_ptr + extable_offset;
1554 skip_init_ctx:
1555 	ctx.idx = 0;
1556 	ctx.exentry_idx = 0;
1557 
1558 	build_prologue(&ctx, was_classic);
1559 
1560 	if (build_body(&ctx, extra_pass)) {
1561 		bpf_jit_binary_free(header);
1562 		prog = orig_prog;
1563 		goto out_off;
1564 	}
1565 
1566 	build_epilogue(&ctx);
1567 	build_plt(&ctx);
1568 
1569 	/* 3. Extra pass to validate JITed code. */
1570 	if (validate_ctx(&ctx)) {
1571 		bpf_jit_binary_free(header);
1572 		prog = orig_prog;
1573 		goto out_off;
1574 	}
1575 
1576 	/* And we're done. */
1577 	if (bpf_jit_enable > 1)
1578 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1579 
1580 	bpf_flush_icache(header, ctx.image + ctx.idx);
1581 
1582 	if (!prog->is_func || extra_pass) {
1583 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1584 			pr_err_once("multi-func JIT bug %d != %d\n",
1585 				    ctx.idx, jit_data->ctx.idx);
1586 			bpf_jit_binary_free(header);
1587 			prog->bpf_func = NULL;
1588 			prog->jited = 0;
1589 			prog->jited_len = 0;
1590 			goto out_off;
1591 		}
1592 		bpf_jit_binary_lock_ro(header);
1593 	} else {
1594 		jit_data->ctx = ctx;
1595 		jit_data->image = image_ptr;
1596 		jit_data->header = header;
1597 	}
1598 	prog->bpf_func = (void *)ctx.image;
1599 	prog->jited = 1;
1600 	prog->jited_len = prog_size;
1601 
1602 	if (!prog->is_func || extra_pass) {
1603 		int i;
1604 
1605 		/* offset[prog->len] is the size of program */
1606 		for (i = 0; i <= prog->len; i++)
1607 			ctx.offset[i] *= AARCH64_INSN_SIZE;
1608 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1609 out_off:
1610 		kvfree(ctx.offset);
1611 		kfree(jit_data);
1612 		prog->aux->jit_data = NULL;
1613 	}
1614 out:
1615 	if (tmp_blinded)
1616 		bpf_jit_prog_release_other(prog, prog == orig_prog ?
1617 					   tmp : orig_prog);
1618 	return prog;
1619 }
1620 
1621 bool bpf_jit_supports_kfunc_call(void)
1622 {
1623 	return true;
1624 }
1625 
1626 u64 bpf_jit_alloc_exec_limit(void)
1627 {
1628 	return VMALLOC_END - VMALLOC_START;
1629 }
1630 
1631 void *bpf_jit_alloc_exec(unsigned long size)
1632 {
1633 	/* Memory is intended to be executable, reset the pointer tag. */
1634 	return kasan_reset_tag(vmalloc(size));
1635 }
1636 
1637 void bpf_jit_free_exec(void *addr)
1638 {
1639 	return vfree(addr);
1640 }
1641 
1642 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
1643 bool bpf_jit_supports_subprog_tailcalls(void)
1644 {
1645 	return true;
1646 }
1647 
1648 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
1649 			    int args_off, int retval_off, int run_ctx_off,
1650 			    bool save_ret)
1651 {
1652 	__le32 *branch;
1653 	u64 enter_prog;
1654 	u64 exit_prog;
1655 	struct bpf_prog *p = l->link.prog;
1656 	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1657 
1658 	enter_prog = (u64)bpf_trampoline_enter(p);
1659 	exit_prog = (u64)bpf_trampoline_exit(p);
1660 
1661 	if (l->cookie == 0) {
1662 		/* if cookie is zero, one instruction is enough to store it */
1663 		emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
1664 	} else {
1665 		emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
1666 		emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
1667 		     ctx);
1668 	}
1669 
1670 	/* save p to callee saved register x19 to avoid loading p with mov_i64
1671 	 * each time.
1672 	 */
1673 	emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
1674 
1675 	/* arg1: prog */
1676 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
1677 	/* arg2: &run_ctx */
1678 	emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
1679 
1680 	emit_call(enter_prog, ctx);
1681 
1682 	/* if (__bpf_prog_enter(prog) == 0)
1683 	 *         goto skip_exec_of_prog;
1684 	 */
1685 	branch = ctx->image + ctx->idx;
1686 	emit(A64_NOP, ctx);
1687 
1688 	/* save return value to callee saved register x20 */
1689 	emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
1690 
1691 	emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
1692 	if (!p->jited)
1693 		emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
1694 
1695 	emit_call((const u64)p->bpf_func, ctx);
1696 
1697 	if (save_ret)
1698 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
1699 
1700 	if (ctx->image) {
1701 		int offset = &ctx->image[ctx->idx] - branch;
1702 		*branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
1703 	}
1704 
1705 	/* arg1: prog */
1706 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
1707 	/* arg2: start time */
1708 	emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
1709 	/* arg3: &run_ctx */
1710 	emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
1711 
1712 	emit_call(exit_prog, ctx);
1713 }
1714 
1715 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
1716 			       int args_off, int retval_off, int run_ctx_off,
1717 			       __le32 **branches)
1718 {
1719 	int i;
1720 
1721 	/* The first fmod_ret program will receive a garbage return value.
1722 	 * Set this to 0 to avoid confusing the program.
1723 	 */
1724 	emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
1725 	for (i = 0; i < tl->nr_links; i++) {
1726 		invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
1727 				run_ctx_off, true);
1728 		/* if (*(u64 *)(sp + retval_off) !=  0)
1729 		 *	goto do_fexit;
1730 		 */
1731 		emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
1732 		/* Save the location of branch, and generate a nop.
1733 		 * This nop will be replaced with a cbnz later.
1734 		 */
1735 		branches[i] = ctx->image + ctx->idx;
1736 		emit(A64_NOP, ctx);
1737 	}
1738 }
1739 
1740 static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
1741 {
1742 	int i;
1743 
1744 	for (i = 0; i < nregs; i++) {
1745 		emit(A64_STR64I(i, A64_SP, args_off), ctx);
1746 		args_off += 8;
1747 	}
1748 }
1749 
1750 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
1751 {
1752 	int i;
1753 
1754 	for (i = 0; i < nregs; i++) {
1755 		emit(A64_LDR64I(i, A64_SP, args_off), ctx);
1756 		args_off += 8;
1757 	}
1758 }
1759 
1760 /* Based on the x86's implementation of arch_prepare_bpf_trampoline().
1761  *
1762  * bpf prog and function entry before bpf trampoline hooked:
1763  *   mov x9, lr
1764  *   nop
1765  *
1766  * bpf prog and function entry after bpf trampoline hooked:
1767  *   mov x9, lr
1768  *   bl  <bpf_trampoline or plt>
1769  *
1770  */
1771 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
1772 			      struct bpf_tramp_links *tlinks, void *orig_call,
1773 			      int nregs, u32 flags)
1774 {
1775 	int i;
1776 	int stack_size;
1777 	int retaddr_off;
1778 	int regs_off;
1779 	int retval_off;
1780 	int args_off;
1781 	int nregs_off;
1782 	int ip_off;
1783 	int run_ctx_off;
1784 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
1785 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
1786 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
1787 	bool save_ret;
1788 	__le32 **branches = NULL;
1789 
1790 	/* trampoline stack layout:
1791 	 *                  [ parent ip         ]
1792 	 *                  [ FP                ]
1793 	 * SP + retaddr_off [ self ip           ]
1794 	 *                  [ FP                ]
1795 	 *
1796 	 *                  [ padding           ] align SP to multiples of 16
1797 	 *
1798 	 *                  [ x20               ] callee saved reg x20
1799 	 * SP + regs_off    [ x19               ] callee saved reg x19
1800 	 *
1801 	 * SP + retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
1802 	 *                                        BPF_TRAMP_F_RET_FENTRY_RET
1803 	 *
1804 	 *                  [ arg reg N         ]
1805 	 *                  [ ...               ]
1806 	 * SP + args_off    [ arg reg 1         ]
1807 	 *
1808 	 * SP + nregs_off   [ arg regs count    ]
1809 	 *
1810 	 * SP + ip_off      [ traced function   ] BPF_TRAMP_F_IP_ARG flag
1811 	 *
1812 	 * SP + run_ctx_off [ bpf_tramp_run_ctx ]
1813 	 */
1814 
1815 	stack_size = 0;
1816 	run_ctx_off = stack_size;
1817 	/* room for bpf_tramp_run_ctx */
1818 	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
1819 
1820 	ip_off = stack_size;
1821 	/* room for IP address argument */
1822 	if (flags & BPF_TRAMP_F_IP_ARG)
1823 		stack_size += 8;
1824 
1825 	nregs_off = stack_size;
1826 	/* room for args count */
1827 	stack_size += 8;
1828 
1829 	args_off = stack_size;
1830 	/* room for args */
1831 	stack_size += nregs * 8;
1832 
1833 	/* room for return value */
1834 	retval_off = stack_size;
1835 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
1836 	if (save_ret)
1837 		stack_size += 8;
1838 
1839 	/* room for callee saved registers, currently x19 and x20 are used */
1840 	regs_off = stack_size;
1841 	stack_size += 16;
1842 
1843 	/* round up to multiples of 16 to avoid SPAlignmentFault */
1844 	stack_size = round_up(stack_size, 16);
1845 
1846 	/* return address locates above FP */
1847 	retaddr_off = stack_size + 8;
1848 
1849 	/* bpf trampoline may be invoked by 3 instruction types:
1850 	 * 1. bl, attached to bpf prog or kernel function via short jump
1851 	 * 2. br, attached to bpf prog or kernel function via long jump
1852 	 * 3. blr, working as a function pointer, used by struct_ops.
1853 	 * So BTI_JC should used here to support both br and blr.
1854 	 */
1855 	emit_bti(A64_BTI_JC, ctx);
1856 
1857 	/* frame for parent function */
1858 	emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
1859 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
1860 
1861 	/* frame for patched function */
1862 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
1863 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
1864 
1865 	/* allocate stack space */
1866 	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
1867 
1868 	if (flags & BPF_TRAMP_F_IP_ARG) {
1869 		/* save ip address of the traced function */
1870 		emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx);
1871 		emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
1872 	}
1873 
1874 	/* save arg regs count*/
1875 	emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
1876 	emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
1877 
1878 	/* save arg regs */
1879 	save_args(ctx, args_off, nregs);
1880 
1881 	/* save callee saved registers */
1882 	emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
1883 	emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
1884 
1885 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1886 		emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
1887 		emit_call((const u64)__bpf_tramp_enter, ctx);
1888 	}
1889 
1890 	for (i = 0; i < fentry->nr_links; i++)
1891 		invoke_bpf_prog(ctx, fentry->links[i], args_off,
1892 				retval_off, run_ctx_off,
1893 				flags & BPF_TRAMP_F_RET_FENTRY_RET);
1894 
1895 	if (fmod_ret->nr_links) {
1896 		branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
1897 				   GFP_KERNEL);
1898 		if (!branches)
1899 			return -ENOMEM;
1900 
1901 		invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
1902 				   run_ctx_off, branches);
1903 	}
1904 
1905 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1906 		restore_args(ctx, args_off, nregs);
1907 		/* call original func */
1908 		emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
1909 		emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
1910 		emit(A64_RET(A64_R(10)), ctx);
1911 		/* store return value */
1912 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
1913 		/* reserve a nop for bpf_tramp_image_put */
1914 		im->ip_after_call = ctx->image + ctx->idx;
1915 		emit(A64_NOP, ctx);
1916 	}
1917 
1918 	/* update the branches saved in invoke_bpf_mod_ret with cbnz */
1919 	for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
1920 		int offset = &ctx->image[ctx->idx] - branches[i];
1921 		*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
1922 	}
1923 
1924 	for (i = 0; i < fexit->nr_links; i++)
1925 		invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
1926 				run_ctx_off, false);
1927 
1928 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1929 		im->ip_epilogue = ctx->image + ctx->idx;
1930 		emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
1931 		emit_call((const u64)__bpf_tramp_exit, ctx);
1932 	}
1933 
1934 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
1935 		restore_args(ctx, args_off, nregs);
1936 
1937 	/* restore callee saved register x19 and x20 */
1938 	emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
1939 	emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
1940 
1941 	if (save_ret)
1942 		emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
1943 
1944 	/* reset SP  */
1945 	emit(A64_MOV(1, A64_SP, A64_FP), ctx);
1946 
1947 	/* pop frames  */
1948 	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
1949 	emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
1950 
1951 	if (flags & BPF_TRAMP_F_SKIP_FRAME) {
1952 		/* skip patched function, return to parent */
1953 		emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
1954 		emit(A64_RET(A64_R(9)), ctx);
1955 	} else {
1956 		/* return to patched function */
1957 		emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
1958 		emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
1959 		emit(A64_RET(A64_R(10)), ctx);
1960 	}
1961 
1962 	if (ctx->image)
1963 		bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
1964 
1965 	kfree(branches);
1966 
1967 	return ctx->idx;
1968 }
1969 
1970 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
1971 				void *image_end, const struct btf_func_model *m,
1972 				u32 flags, struct bpf_tramp_links *tlinks,
1973 				void *orig_call)
1974 {
1975 	int i, ret;
1976 	int nregs = m->nr_args;
1977 	int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
1978 	struct jit_ctx ctx = {
1979 		.image = NULL,
1980 		.idx = 0,
1981 	};
1982 
1983 	/* extra registers needed for struct argument */
1984 	for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
1985 		/* The arg_size is at most 16 bytes, enforced by the verifier. */
1986 		if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
1987 			nregs += (m->arg_size[i] + 7) / 8 - 1;
1988 	}
1989 
1990 	/* the first 8 registers are used for arguments */
1991 	if (nregs > 8)
1992 		return -ENOTSUPP;
1993 
1994 	ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
1995 	if (ret < 0)
1996 		return ret;
1997 
1998 	if (ret > max_insns)
1999 		return -EFBIG;
2000 
2001 	ctx.image = image;
2002 	ctx.idx = 0;
2003 
2004 	jit_fill_hole(image, (unsigned int)(image_end - image));
2005 	ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
2006 
2007 	if (ret > 0 && validate_code(&ctx) < 0)
2008 		ret = -EINVAL;
2009 
2010 	if (ret > 0)
2011 		ret *= AARCH64_INSN_SIZE;
2012 
2013 	return ret;
2014 }
2015 
2016 static bool is_long_jump(void *ip, void *target)
2017 {
2018 	long offset;
2019 
2020 	/* NULL target means this is a NOP */
2021 	if (!target)
2022 		return false;
2023 
2024 	offset = (long)target - (long)ip;
2025 	return offset < -SZ_128M || offset >= SZ_128M;
2026 }
2027 
2028 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
2029 			     void *addr, void *plt, u32 *insn)
2030 {
2031 	void *target;
2032 
2033 	if (!addr) {
2034 		*insn = aarch64_insn_gen_nop();
2035 		return 0;
2036 	}
2037 
2038 	if (is_long_jump(ip, addr))
2039 		target = plt;
2040 	else
2041 		target = addr;
2042 
2043 	*insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
2044 					    (unsigned long)target,
2045 					    type);
2046 
2047 	return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
2048 }
2049 
2050 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
2051  * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
2052  * or @new_addr is NULL, the old or new instruction is NOP.
2053  *
2054  * When @ip is the bpf prog entry, a bpf trampoline is being attached or
2055  * detached. Since bpf trampoline and bpf prog are allocated separately with
2056  * vmalloc, the address distance may exceed 128MB, the maximum branch range.
2057  * So long jump should be handled.
2058  *
2059  * When a bpf prog is constructed, a plt pointing to empty trampoline
2060  * dummy_tramp is placed at the end:
2061  *
2062  *      bpf_prog:
2063  *              mov x9, lr
2064  *              nop // patchsite
2065  *              ...
2066  *              ret
2067  *
2068  *      plt:
2069  *              ldr x10, target
2070  *              br x10
2071  *      target:
2072  *              .quad dummy_tramp // plt target
2073  *
2074  * This is also the state when no trampoline is attached.
2075  *
2076  * When a short-jump bpf trampoline is attached, the patchsite is patched
2077  * to a bl instruction to the trampoline directly:
2078  *
2079  *      bpf_prog:
2080  *              mov x9, lr
2081  *              bl <short-jump bpf trampoline address> // patchsite
2082  *              ...
2083  *              ret
2084  *
2085  *      plt:
2086  *              ldr x10, target
2087  *              br x10
2088  *      target:
2089  *              .quad dummy_tramp // plt target
2090  *
2091  * When a long-jump bpf trampoline is attached, the plt target is filled with
2092  * the trampoline address and the patchsite is patched to a bl instruction to
2093  * the plt:
2094  *
2095  *      bpf_prog:
2096  *              mov x9, lr
2097  *              bl plt // patchsite
2098  *              ...
2099  *              ret
2100  *
2101  *      plt:
2102  *              ldr x10, target
2103  *              br x10
2104  *      target:
2105  *              .quad <long-jump bpf trampoline address> // plt target
2106  *
2107  * The dummy_tramp is used to prevent another CPU from jumping to unknown
2108  * locations during the patching process, making the patching process easier.
2109  */
2110 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
2111 		       void *old_addr, void *new_addr)
2112 {
2113 	int ret;
2114 	u32 old_insn;
2115 	u32 new_insn;
2116 	u32 replaced;
2117 	struct bpf_plt *plt = NULL;
2118 	unsigned long size = 0UL;
2119 	unsigned long offset = ~0UL;
2120 	enum aarch64_insn_branch_type branch_type;
2121 	char namebuf[KSYM_NAME_LEN];
2122 	void *image = NULL;
2123 	u64 plt_target = 0ULL;
2124 	bool poking_bpf_entry;
2125 
2126 	if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
2127 		/* Only poking bpf text is supported. Since kernel function
2128 		 * entry is set up by ftrace, we reply on ftrace to poke kernel
2129 		 * functions.
2130 		 */
2131 		return -ENOTSUPP;
2132 
2133 	image = ip - offset;
2134 	/* zero offset means we're poking bpf prog entry */
2135 	poking_bpf_entry = (offset == 0UL);
2136 
2137 	/* bpf prog entry, find plt and the real patchsite */
2138 	if (poking_bpf_entry) {
2139 		/* plt locates at the end of bpf prog */
2140 		plt = image + size - PLT_TARGET_OFFSET;
2141 
2142 		/* skip to the nop instruction in bpf prog entry:
2143 		 * bti c // if BTI enabled
2144 		 * mov x9, x30
2145 		 * nop
2146 		 */
2147 		ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
2148 	}
2149 
2150 	/* long jump is only possible at bpf prog entry */
2151 	if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
2152 		    !poking_bpf_entry))
2153 		return -EINVAL;
2154 
2155 	if (poke_type == BPF_MOD_CALL)
2156 		branch_type = AARCH64_INSN_BRANCH_LINK;
2157 	else
2158 		branch_type = AARCH64_INSN_BRANCH_NOLINK;
2159 
2160 	if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
2161 		return -EFAULT;
2162 
2163 	if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
2164 		return -EFAULT;
2165 
2166 	if (is_long_jump(ip, new_addr))
2167 		plt_target = (u64)new_addr;
2168 	else if (is_long_jump(ip, old_addr))
2169 		/* if the old target is a long jump and the new target is not,
2170 		 * restore the plt target to dummy_tramp, so there is always a
2171 		 * legal and harmless address stored in plt target, and we'll
2172 		 * never jump from plt to an unknown place.
2173 		 */
2174 		plt_target = (u64)&dummy_tramp;
2175 
2176 	if (plt_target) {
2177 		/* non-zero plt_target indicates we're patching a bpf prog,
2178 		 * which is read only.
2179 		 */
2180 		if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
2181 			return -EFAULT;
2182 		WRITE_ONCE(plt->target, plt_target);
2183 		set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
2184 		/* since plt target points to either the new trampoline
2185 		 * or dummy_tramp, even if another CPU reads the old plt
2186 		 * target value before fetching the bl instruction to plt,
2187 		 * it will be brought back by dummy_tramp, so no barrier is
2188 		 * required here.
2189 		 */
2190 	}
2191 
2192 	/* if the old target and the new target are both long jumps, no
2193 	 * patching is required
2194 	 */
2195 	if (old_insn == new_insn)
2196 		return 0;
2197 
2198 	mutex_lock(&text_mutex);
2199 	if (aarch64_insn_read(ip, &replaced)) {
2200 		ret = -EFAULT;
2201 		goto out;
2202 	}
2203 
2204 	if (replaced != old_insn) {
2205 		ret = -EFAULT;
2206 		goto out;
2207 	}
2208 
2209 	/* We call aarch64_insn_patch_text_nosync() to replace instruction
2210 	 * atomically, so no other CPUs will fetch a half-new and half-old
2211 	 * instruction. But there is chance that another CPU executes the
2212 	 * old instruction after the patching operation finishes (e.g.,
2213 	 * pipeline not flushed, or icache not synchronized yet).
2214 	 *
2215 	 * 1. when a new trampoline is attached, it is not a problem for
2216 	 *    different CPUs to jump to different trampolines temporarily.
2217 	 *
2218 	 * 2. when an old trampoline is freed, we should wait for all other
2219 	 *    CPUs to exit the trampoline and make sure the trampoline is no
2220 	 *    longer reachable, since bpf_tramp_image_put() function already
2221 	 *    uses percpu_ref and task-based rcu to do the sync, no need to call
2222 	 *    the sync version here, see bpf_tramp_image_put() for details.
2223 	 */
2224 	ret = aarch64_insn_patch_text_nosync(ip, new_insn);
2225 out:
2226 	mutex_unlock(&text_mutex);
2227 
2228 	return ret;
2229 }
2230