xref: /linux/arch/loongarch/net/bpf_jit.c (revision b993744a972722b4e15b8d2afee1a053767f4dd4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include <linux/memory.h>
8 #include "bpf_jit.h"
9 
10 #define LOONGARCH_MAX_REG_ARGS 8
11 
12 #define LOONGARCH_LONG_JUMP_NINSNS 5
13 #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
14 
15 #define LOONGARCH_FENTRY_NINSNS 2
16 #define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4)
17 #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
18 
19 #define REG_TCC		LOONGARCH_GPR_A6
20 #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
21 
22 static const int regmap[] = {
23 	/* return value from in-kernel function, and exit value for eBPF program */
24 	[BPF_REG_0] = LOONGARCH_GPR_A5,
25 	/* arguments from eBPF program to in-kernel function */
26 	[BPF_REG_1] = LOONGARCH_GPR_A0,
27 	[BPF_REG_2] = LOONGARCH_GPR_A1,
28 	[BPF_REG_3] = LOONGARCH_GPR_A2,
29 	[BPF_REG_4] = LOONGARCH_GPR_A3,
30 	[BPF_REG_5] = LOONGARCH_GPR_A4,
31 	/* callee saved registers that in-kernel function will preserve */
32 	[BPF_REG_6] = LOONGARCH_GPR_S0,
33 	[BPF_REG_7] = LOONGARCH_GPR_S1,
34 	[BPF_REG_8] = LOONGARCH_GPR_S2,
35 	[BPF_REG_9] = LOONGARCH_GPR_S3,
36 	/* read-only frame pointer to access stack */
37 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
38 	/* temporary register for blinding constants */
39 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
40 };
41 
prepare_bpf_tail_call_cnt(struct jit_ctx * ctx,int * store_offset)42 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
43 {
44 	const struct bpf_prog *prog = ctx->prog;
45 	const bool is_main_prog = !bpf_is_subprog(prog);
46 
47 	if (is_main_prog) {
48 		/*
49 		 * LOONGARCH_GPR_T3 = MAX_TAIL_CALL_CNT
50 		 * if (REG_TCC > T3 )
51 		 *	std REG_TCC -> LOONGARCH_GPR_SP + store_offset
52 		 * else
53 		 *	std REG_TCC -> LOONGARCH_GPR_SP + store_offset
54 		 *	REG_TCC = LOONGARCH_GPR_SP + store_offset
55 		 *
56 		 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset
57 		 *
58 		 * The purpose of this code is to first push the TCC into stack,
59 		 * and then push the address of TCC into stack.
60 		 * In cases where bpf2bpf and tailcall are used in combination,
61 		 * the value in REG_TCC may be a count or an address,
62 		 * these two cases need to be judged and handled separately.
63 		 */
64 		emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
65 		*store_offset -= sizeof(long);
66 
67 		emit_cond_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);
68 
69 		/*
70 		 * If REG_TCC < MAX_TAIL_CALL_CNT, the value in REG_TCC is a count,
71 		 * push tcc into stack
72 		 */
73 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
74 
75 		/* Push the address of TCC into the REG_TCC */
76 		emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
77 
78 		emit_uncond_jmp(ctx, 2);
79 
80 		/*
81 		 * If REG_TCC > MAX_TAIL_CALL_CNT, the value in REG_TCC is an address,
82 		 * push tcc_ptr into stack
83 		 */
84 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
85 	} else {
86 		*store_offset -= sizeof(long);
87 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
88 	}
89 
90 	/* Push tcc_ptr into stack */
91 	*store_offset -= sizeof(long);
92 	emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
93 }
94 
95 /*
96  * eBPF prog stack layout:
97  *
98  *                                        high
99  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
100  *                            |           $ra           |
101  *                            +-------------------------+
102  *                            |           $fp           |
103  *                            +-------------------------+
104  *                            |           $s0           |
105  *                            +-------------------------+
106  *                            |           $s1           |
107  *                            +-------------------------+
108  *                            |           $s2           |
109  *                            +-------------------------+
110  *                            |           $s3           |
111  *                            +-------------------------+
112  *                            |           $s4           |
113  *                            +-------------------------+
114  *                            |           $s5           |
115  *                            +-------------------------+
116  *                            |           tcc           |
117  *                            +-------------------------+
118  *                            |           tcc_ptr       |
119  *                            +-------------------------+ <--BPF_REG_FP
120  *                            |  prog->aux->stack_depth |
121  *                            |        (optional)       |
122  * current $sp -------------> +-------------------------+
123  *                                        low
124  */
build_prologue(struct jit_ctx * ctx)125 static void build_prologue(struct jit_ctx *ctx)
126 {
127 	int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
128 	const struct bpf_prog *prog = ctx->prog;
129 	const bool is_main_prog = !bpf_is_subprog(prog);
130 
131 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
132 
133 	/* To store ra, fp, s0, s1, s2, s3, s4, s5 */
134 	stack_adjust += sizeof(long) * 8;
135 
136 	/* To store tcc and tcc_ptr */
137 	stack_adjust += sizeof(long) * 2;
138 
139 	stack_adjust = round_up(stack_adjust, 16);
140 	stack_adjust += bpf_stack_adjust;
141 
142 	move_reg(ctx, LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
143 	/* Reserve space for the move_imm + jirl instruction */
144 	for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
145 		emit_insn(ctx, nop);
146 
147 	/*
148 	 * First instruction initializes the tail call count (TCC)
149 	 * register to zero. On tail call we skip this instruction,
150 	 * and the TCC is passed in REG_TCC from the caller.
151 	 */
152 	if (is_main_prog)
153 		emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
154 
155 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
156 
157 	store_offset = stack_adjust - sizeof(long);
158 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
159 
160 	store_offset -= sizeof(long);
161 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
162 
163 	store_offset -= sizeof(long);
164 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
165 
166 	store_offset -= sizeof(long);
167 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
168 
169 	store_offset -= sizeof(long);
170 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
171 
172 	store_offset -= sizeof(long);
173 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
174 
175 	store_offset -= sizeof(long);
176 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
177 
178 	store_offset -= sizeof(long);
179 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
180 
181 	prepare_bpf_tail_call_cnt(ctx, &store_offset);
182 
183 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
184 
185 	if (bpf_stack_adjust)
186 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
187 
188 	ctx->stack_size = stack_adjust;
189 }
190 
__build_epilogue(struct jit_ctx * ctx,bool is_tail_call)191 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
192 {
193 	int stack_adjust = ctx->stack_size;
194 	int load_offset;
195 
196 	load_offset = stack_adjust - sizeof(long);
197 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
198 
199 	load_offset -= sizeof(long);
200 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
201 
202 	load_offset -= sizeof(long);
203 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
204 
205 	load_offset -= sizeof(long);
206 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
207 
208 	load_offset -= sizeof(long);
209 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
210 
211 	load_offset -= sizeof(long);
212 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
213 
214 	load_offset -= sizeof(long);
215 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
216 
217 	load_offset -= sizeof(long);
218 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
219 
220 	/*
221 	 * When push into the stack, follow the order of tcc then tcc_ptr.
222 	 * When pop from the stack, first pop tcc_ptr then followed by tcc.
223 	 */
224 	load_offset -= 2 * sizeof(long);
225 	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
226 
227 	load_offset += sizeof(long);
228 	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
229 
230 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
231 
232 	if (!is_tail_call) {
233 		/* Set return value */
234 		emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0);
235 		/* Return to the caller */
236 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
237 	} else {
238 		/*
239 		 * Call the next bpf prog and skip the first instruction
240 		 * of TCC initialization.
241 		 */
242 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 7);
243 	}
244 }
245 
build_epilogue(struct jit_ctx * ctx)246 static void build_epilogue(struct jit_ctx *ctx)
247 {
248 	__build_epilogue(ctx, false);
249 }
250 
bpf_jit_supports_kfunc_call(void)251 bool bpf_jit_supports_kfunc_call(void)
252 {
253 	return true;
254 }
255 
bpf_jit_supports_far_kfunc_call(void)256 bool bpf_jit_supports_far_kfunc_call(void)
257 {
258 	return true;
259 }
260 
emit_bpf_tail_call(struct jit_ctx * ctx,int insn)261 static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
262 {
263 	int off, tc_ninsn = 0;
264 	int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
265 	u8 a1 = LOONGARCH_GPR_A1;
266 	u8 a2 = LOONGARCH_GPR_A2;
267 	u8 t1 = LOONGARCH_GPR_T1;
268 	u8 t2 = LOONGARCH_GPR_T2;
269 	u8 t3 = LOONGARCH_GPR_T3;
270 	const int idx0 = ctx->idx;
271 
272 #define cur_offset (ctx->idx - idx0)
273 #define jmp_offset (tc_ninsn - (cur_offset))
274 
275 	/*
276 	 * a0: &ctx
277 	 * a1: &array
278 	 * a2: index
279 	 *
280 	 * if (index >= array->map.max_entries)
281 	 *	 goto out;
282 	 */
283 	tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0];
284 	emit_zext_32(ctx, a2, true);
285 
286 	off = offsetof(struct bpf_array, map.max_entries);
287 	emit_insn(ctx, ldwu, t1, a1, off);
288 	/* bgeu $a2, $t1, jmp_offset */
289 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
290 		goto toofar;
291 
292 	/*
293 	 * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
294 	 *      goto out;
295 	 */
296 	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
297 	emit_insn(ctx, ldd, t3, REG_TCC, 0);
298 	emit_insn(ctx, addid, t3, t3, 1);
299 	emit_insn(ctx, std, t3, REG_TCC, 0);
300 	emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
301 	if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
302 		goto toofar;
303 
304 	/*
305 	 * prog = array->ptrs[index];
306 	 * if (!prog)
307 	 *	 goto out;
308 	 */
309 	emit_insn(ctx, alsld, t2, a2, a1, 2);
310 	off = offsetof(struct bpf_array, ptrs);
311 	emit_insn(ctx, ldd, t2, t2, off);
312 	/* beq $t2, $zero, jmp_offset */
313 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
314 		goto toofar;
315 
316 	/* goto *(prog->bpf_func + 4); */
317 	off = offsetof(struct bpf_prog, bpf_func);
318 	emit_insn(ctx, ldd, t3, t2, off);
319 	__build_epilogue(ctx, true);
320 
321 	return 0;
322 
323 toofar:
324 	pr_info_once("tail_call: jump too far\n");
325 	return -1;
326 #undef cur_offset
327 #undef jmp_offset
328 }
329 
emit_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)330 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
331 {
332 	const u8 t1 = LOONGARCH_GPR_T1;
333 	const u8 t2 = LOONGARCH_GPR_T2;
334 	const u8 t3 = LOONGARCH_GPR_T3;
335 	const u8 r0 = regmap[BPF_REG_0];
336 	const u8 src = regmap[insn->src_reg];
337 	const u8 dst = regmap[insn->dst_reg];
338 	const s16 off = insn->off;
339 	const s32 imm = insn->imm;
340 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
341 
342 	move_imm(ctx, t1, off, false);
343 	emit_insn(ctx, addd, t1, dst, t1);
344 	move_reg(ctx, t3, src);
345 
346 	switch (imm) {
347 	/* lock *(size *)(dst + off) <op>= src */
348 	case BPF_ADD:
349 		if (isdw)
350 			emit_insn(ctx, amaddd, t2, t1, src);
351 		else
352 			emit_insn(ctx, amaddw, t2, t1, src);
353 		break;
354 	case BPF_AND:
355 		if (isdw)
356 			emit_insn(ctx, amandd, t2, t1, src);
357 		else
358 			emit_insn(ctx, amandw, t2, t1, src);
359 		break;
360 	case BPF_OR:
361 		if (isdw)
362 			emit_insn(ctx, amord, t2, t1, src);
363 		else
364 			emit_insn(ctx, amorw, t2, t1, src);
365 		break;
366 	case BPF_XOR:
367 		if (isdw)
368 			emit_insn(ctx, amxord, t2, t1, src);
369 		else
370 			emit_insn(ctx, amxorw, t2, t1, src);
371 		break;
372 	/* src = atomic_fetch_<op>(dst + off, src) */
373 	case BPF_ADD | BPF_FETCH:
374 		if (isdw) {
375 			emit_insn(ctx, amaddd, src, t1, t3);
376 		} else {
377 			emit_insn(ctx, amaddw, src, t1, t3);
378 			emit_zext_32(ctx, src, true);
379 		}
380 		break;
381 	case BPF_AND | BPF_FETCH:
382 		if (isdw) {
383 			emit_insn(ctx, amandd, src, t1, t3);
384 		} else {
385 			emit_insn(ctx, amandw, src, t1, t3);
386 			emit_zext_32(ctx, src, true);
387 		}
388 		break;
389 	case BPF_OR | BPF_FETCH:
390 		if (isdw) {
391 			emit_insn(ctx, amord, src, t1, t3);
392 		} else {
393 			emit_insn(ctx, amorw, src, t1, t3);
394 			emit_zext_32(ctx, src, true);
395 		}
396 		break;
397 	case BPF_XOR | BPF_FETCH:
398 		if (isdw) {
399 			emit_insn(ctx, amxord, src, t1, t3);
400 		} else {
401 			emit_insn(ctx, amxorw, src, t1, t3);
402 			emit_zext_32(ctx, src, true);
403 		}
404 		break;
405 	/* src = atomic_xchg(dst + off, src); */
406 	case BPF_XCHG:
407 		if (isdw) {
408 			emit_insn(ctx, amswapd, src, t1, t3);
409 		} else {
410 			emit_insn(ctx, amswapw, src, t1, t3);
411 			emit_zext_32(ctx, src, true);
412 		}
413 		break;
414 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
415 	case BPF_CMPXCHG:
416 		move_reg(ctx, t2, r0);
417 		if (isdw) {
418 			emit_insn(ctx, lld, r0, t1, 0);
419 			emit_insn(ctx, bne, t2, r0, 4);
420 			move_reg(ctx, t3, src);
421 			emit_insn(ctx, scd, t3, t1, 0);
422 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
423 		} else {
424 			emit_insn(ctx, llw, r0, t1, 0);
425 			emit_zext_32(ctx, t2, true);
426 			emit_zext_32(ctx, r0, true);
427 			emit_insn(ctx, bne, t2, r0, 4);
428 			move_reg(ctx, t3, src);
429 			emit_insn(ctx, scw, t3, t1, 0);
430 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
431 			emit_zext_32(ctx, r0, true);
432 		}
433 		break;
434 	}
435 }
436 
is_signed_bpf_cond(u8 cond)437 static bool is_signed_bpf_cond(u8 cond)
438 {
439 	return cond == BPF_JSGT || cond == BPF_JSLT ||
440 	       cond == BPF_JSGE || cond == BPF_JSLE;
441 }
442 
443 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
444 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
445 
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)446 bool ex_handler_bpf(const struct exception_table_entry *ex,
447 		    struct pt_regs *regs)
448 {
449 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
450 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
451 
452 	regs->regs[dst_reg] = 0;
453 	regs->csr_era = (unsigned long)&ex->fixup - offset;
454 
455 	return true;
456 }
457 
458 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)459 static int add_exception_handler(const struct bpf_insn *insn,
460 				 struct jit_ctx *ctx,
461 				 int dst_reg)
462 {
463 	unsigned long pc;
464 	off_t offset;
465 	struct exception_table_entry *ex;
466 
467 	if (!ctx->image || !ctx->prog->aux->extable)
468 		return 0;
469 
470 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
471 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
472 		return 0;
473 
474 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
475 		return -EINVAL;
476 
477 	ex = &ctx->prog->aux->extable[ctx->num_exentries];
478 	pc = (unsigned long)&ctx->image[ctx->idx - 1];
479 
480 	offset = pc - (long)&ex->insn;
481 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
482 		return -ERANGE;
483 
484 	ex->insn = offset;
485 
486 	/*
487 	 * Since the extable follows the program, the fixup offset is always
488 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
489 	 * to keep things simple, and put the destination register in the upper
490 	 * bits. We don't need to worry about buildtime or runtime sort
491 	 * modifying the upper bits because the table is already sorted, and
492 	 * isn't part of the main exception table.
493 	 */
494 	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
495 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
496 		return -ERANGE;
497 
498 	ex->type = EX_TYPE_BPF;
499 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
500 
501 	ctx->num_exentries++;
502 
503 	return 0;
504 }
505 
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)506 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
507 {
508 	u8 tm = -1;
509 	u64 func_addr;
510 	bool func_addr_fixed, sign_extend;
511 	int i = insn - ctx->prog->insnsi;
512 	int ret, jmp_offset, tcc_ptr_off;
513 	const u8 code = insn->code;
514 	const u8 cond = BPF_OP(code);
515 	const u8 t1 = LOONGARCH_GPR_T1;
516 	const u8 t2 = LOONGARCH_GPR_T2;
517 	const u8 src = regmap[insn->src_reg];
518 	const u8 dst = regmap[insn->dst_reg];
519 	const s16 off = insn->off;
520 	const s32 imm = insn->imm;
521 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
522 
523 	switch (code) {
524 	/* dst = src */
525 	case BPF_ALU | BPF_MOV | BPF_X:
526 	case BPF_ALU64 | BPF_MOV | BPF_X:
527 		switch (off) {
528 		case 0:
529 			move_reg(ctx, dst, src);
530 			emit_zext_32(ctx, dst, is32);
531 			break;
532 		case 8:
533 			emit_insn(ctx, extwb, dst, src);
534 			emit_zext_32(ctx, dst, is32);
535 			break;
536 		case 16:
537 			emit_insn(ctx, extwh, dst, src);
538 			emit_zext_32(ctx, dst, is32);
539 			break;
540 		case 32:
541 			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
542 			break;
543 		}
544 		break;
545 
546 	/* dst = imm */
547 	case BPF_ALU | BPF_MOV | BPF_K:
548 	case BPF_ALU64 | BPF_MOV | BPF_K:
549 		move_imm(ctx, dst, imm, is32);
550 		break;
551 
552 	/* dst = dst + src */
553 	case BPF_ALU | BPF_ADD | BPF_X:
554 	case BPF_ALU64 | BPF_ADD | BPF_X:
555 		emit_insn(ctx, addd, dst, dst, src);
556 		emit_zext_32(ctx, dst, is32);
557 		break;
558 
559 	/* dst = dst + imm */
560 	case BPF_ALU | BPF_ADD | BPF_K:
561 	case BPF_ALU64 | BPF_ADD | BPF_K:
562 		if (is_signed_imm12(imm)) {
563 			emit_insn(ctx, addid, dst, dst, imm);
564 		} else {
565 			move_imm(ctx, t1, imm, is32);
566 			emit_insn(ctx, addd, dst, dst, t1);
567 		}
568 		emit_zext_32(ctx, dst, is32);
569 		break;
570 
571 	/* dst = dst - src */
572 	case BPF_ALU | BPF_SUB | BPF_X:
573 	case BPF_ALU64 | BPF_SUB | BPF_X:
574 		emit_insn(ctx, subd, dst, dst, src);
575 		emit_zext_32(ctx, dst, is32);
576 		break;
577 
578 	/* dst = dst - imm */
579 	case BPF_ALU | BPF_SUB | BPF_K:
580 	case BPF_ALU64 | BPF_SUB | BPF_K:
581 		if (is_signed_imm12(-imm)) {
582 			emit_insn(ctx, addid, dst, dst, -imm);
583 		} else {
584 			move_imm(ctx, t1, imm, is32);
585 			emit_insn(ctx, subd, dst, dst, t1);
586 		}
587 		emit_zext_32(ctx, dst, is32);
588 		break;
589 
590 	/* dst = dst * src */
591 	case BPF_ALU | BPF_MUL | BPF_X:
592 	case BPF_ALU64 | BPF_MUL | BPF_X:
593 		emit_insn(ctx, muld, dst, dst, src);
594 		emit_zext_32(ctx, dst, is32);
595 		break;
596 
597 	/* dst = dst * imm */
598 	case BPF_ALU | BPF_MUL | BPF_K:
599 	case BPF_ALU64 | BPF_MUL | BPF_K:
600 		move_imm(ctx, t1, imm, is32);
601 		emit_insn(ctx, muld, dst, dst, t1);
602 		emit_zext_32(ctx, dst, is32);
603 		break;
604 
605 	/* dst = dst / src */
606 	case BPF_ALU | BPF_DIV | BPF_X:
607 	case BPF_ALU64 | BPF_DIV | BPF_X:
608 		if (!off) {
609 			emit_zext_32(ctx, dst, is32);
610 			move_reg(ctx, t1, src);
611 			emit_zext_32(ctx, t1, is32);
612 			emit_insn(ctx, divdu, dst, dst, t1);
613 			emit_zext_32(ctx, dst, is32);
614 		} else {
615 			emit_sext_32(ctx, dst, is32);
616 			move_reg(ctx, t1, src);
617 			emit_sext_32(ctx, t1, is32);
618 			emit_insn(ctx, divd, dst, dst, t1);
619 			emit_sext_32(ctx, dst, is32);
620 		}
621 		break;
622 
623 	/* dst = dst / imm */
624 	case BPF_ALU | BPF_DIV | BPF_K:
625 	case BPF_ALU64 | BPF_DIV | BPF_K:
626 		if (!off) {
627 			move_imm(ctx, t1, imm, is32);
628 			emit_zext_32(ctx, dst, is32);
629 			emit_insn(ctx, divdu, dst, dst, t1);
630 			emit_zext_32(ctx, dst, is32);
631 		} else {
632 			move_imm(ctx, t1, imm, false);
633 			emit_sext_32(ctx, t1, is32);
634 			emit_sext_32(ctx, dst, is32);
635 			emit_insn(ctx, divd, dst, dst, t1);
636 			emit_sext_32(ctx, dst, is32);
637 		}
638 		break;
639 
640 	/* dst = dst % src */
641 	case BPF_ALU | BPF_MOD | BPF_X:
642 	case BPF_ALU64 | BPF_MOD | BPF_X:
643 		if (!off) {
644 			emit_zext_32(ctx, dst, is32);
645 			move_reg(ctx, t1, src);
646 			emit_zext_32(ctx, t1, is32);
647 			emit_insn(ctx, moddu, dst, dst, t1);
648 			emit_zext_32(ctx, dst, is32);
649 		} else {
650 			emit_sext_32(ctx, dst, is32);
651 			move_reg(ctx, t1, src);
652 			emit_sext_32(ctx, t1, is32);
653 			emit_insn(ctx, modd, dst, dst, t1);
654 			emit_sext_32(ctx, dst, is32);
655 		}
656 		break;
657 
658 	/* dst = dst % imm */
659 	case BPF_ALU | BPF_MOD | BPF_K:
660 	case BPF_ALU64 | BPF_MOD | BPF_K:
661 		if (!off) {
662 			move_imm(ctx, t1, imm, is32);
663 			emit_zext_32(ctx, dst, is32);
664 			emit_insn(ctx, moddu, dst, dst, t1);
665 			emit_zext_32(ctx, dst, is32);
666 		} else {
667 			move_imm(ctx, t1, imm, false);
668 			emit_sext_32(ctx, t1, is32);
669 			emit_sext_32(ctx, dst, is32);
670 			emit_insn(ctx, modd, dst, dst, t1);
671 			emit_sext_32(ctx, dst, is32);
672 		}
673 		break;
674 
675 	/* dst = -dst */
676 	case BPF_ALU | BPF_NEG:
677 	case BPF_ALU64 | BPF_NEG:
678 		move_imm(ctx, t1, imm, is32);
679 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
680 		emit_zext_32(ctx, dst, is32);
681 		break;
682 
683 	/* dst = dst & src */
684 	case BPF_ALU | BPF_AND | BPF_X:
685 	case BPF_ALU64 | BPF_AND | BPF_X:
686 		emit_insn(ctx, and, dst, dst, src);
687 		emit_zext_32(ctx, dst, is32);
688 		break;
689 
690 	/* dst = dst & imm */
691 	case BPF_ALU | BPF_AND | BPF_K:
692 	case BPF_ALU64 | BPF_AND | BPF_K:
693 		if (is_unsigned_imm12(imm)) {
694 			emit_insn(ctx, andi, dst, dst, imm);
695 		} else {
696 			move_imm(ctx, t1, imm, is32);
697 			emit_insn(ctx, and, dst, dst, t1);
698 		}
699 		emit_zext_32(ctx, dst, is32);
700 		break;
701 
702 	/* dst = dst | src */
703 	case BPF_ALU | BPF_OR | BPF_X:
704 	case BPF_ALU64 | BPF_OR | BPF_X:
705 		emit_insn(ctx, or, dst, dst, src);
706 		emit_zext_32(ctx, dst, is32);
707 		break;
708 
709 	/* dst = dst | imm */
710 	case BPF_ALU | BPF_OR | BPF_K:
711 	case BPF_ALU64 | BPF_OR | BPF_K:
712 		if (is_unsigned_imm12(imm)) {
713 			emit_insn(ctx, ori, dst, dst, imm);
714 		} else {
715 			move_imm(ctx, t1, imm, is32);
716 			emit_insn(ctx, or, dst, dst, t1);
717 		}
718 		emit_zext_32(ctx, dst, is32);
719 		break;
720 
721 	/* dst = dst ^ src */
722 	case BPF_ALU | BPF_XOR | BPF_X:
723 	case BPF_ALU64 | BPF_XOR | BPF_X:
724 		emit_insn(ctx, xor, dst, dst, src);
725 		emit_zext_32(ctx, dst, is32);
726 		break;
727 
728 	/* dst = dst ^ imm */
729 	case BPF_ALU | BPF_XOR | BPF_K:
730 	case BPF_ALU64 | BPF_XOR | BPF_K:
731 		if (is_unsigned_imm12(imm)) {
732 			emit_insn(ctx, xori, dst, dst, imm);
733 		} else {
734 			move_imm(ctx, t1, imm, is32);
735 			emit_insn(ctx, xor, dst, dst, t1);
736 		}
737 		emit_zext_32(ctx, dst, is32);
738 		break;
739 
740 	/* dst = dst << src (logical) */
741 	case BPF_ALU | BPF_LSH | BPF_X:
742 		emit_insn(ctx, sllw, dst, dst, src);
743 		emit_zext_32(ctx, dst, is32);
744 		break;
745 
746 	case BPF_ALU64 | BPF_LSH | BPF_X:
747 		emit_insn(ctx, slld, dst, dst, src);
748 		break;
749 
750 	/* dst = dst << imm (logical) */
751 	case BPF_ALU | BPF_LSH | BPF_K:
752 		emit_insn(ctx, slliw, dst, dst, imm);
753 		emit_zext_32(ctx, dst, is32);
754 		break;
755 
756 	case BPF_ALU64 | BPF_LSH | BPF_K:
757 		emit_insn(ctx, sllid, dst, dst, imm);
758 		break;
759 
760 	/* dst = dst >> src (logical) */
761 	case BPF_ALU | BPF_RSH | BPF_X:
762 		emit_insn(ctx, srlw, dst, dst, src);
763 		emit_zext_32(ctx, dst, is32);
764 		break;
765 
766 	case BPF_ALU64 | BPF_RSH | BPF_X:
767 		emit_insn(ctx, srld, dst, dst, src);
768 		break;
769 
770 	/* dst = dst >> imm (logical) */
771 	case BPF_ALU | BPF_RSH | BPF_K:
772 		emit_insn(ctx, srliw, dst, dst, imm);
773 		emit_zext_32(ctx, dst, is32);
774 		break;
775 
776 	case BPF_ALU64 | BPF_RSH | BPF_K:
777 		emit_insn(ctx, srlid, dst, dst, imm);
778 		break;
779 
780 	/* dst = dst >> src (arithmetic) */
781 	case BPF_ALU | BPF_ARSH | BPF_X:
782 		emit_insn(ctx, sraw, dst, dst, src);
783 		emit_zext_32(ctx, dst, is32);
784 		break;
785 
786 	case BPF_ALU64 | BPF_ARSH | BPF_X:
787 		emit_insn(ctx, srad, dst, dst, src);
788 		break;
789 
790 	/* dst = dst >> imm (arithmetic) */
791 	case BPF_ALU | BPF_ARSH | BPF_K:
792 		emit_insn(ctx, sraiw, dst, dst, imm);
793 		emit_zext_32(ctx, dst, is32);
794 		break;
795 
796 	case BPF_ALU64 | BPF_ARSH | BPF_K:
797 		emit_insn(ctx, sraid, dst, dst, imm);
798 		break;
799 
800 	/* dst = BSWAP##imm(dst) */
801 	case BPF_ALU | BPF_END | BPF_FROM_LE:
802 		switch (imm) {
803 		case 16:
804 			/* zero-extend 16 bits into 64 bits */
805 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
806 			break;
807 		case 32:
808 			/* zero-extend 32 bits into 64 bits */
809 			emit_zext_32(ctx, dst, is32);
810 			break;
811 		case 64:
812 			/* do nothing */
813 			break;
814 		}
815 		break;
816 
817 	case BPF_ALU | BPF_END | BPF_FROM_BE:
818 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
819 		switch (imm) {
820 		case 16:
821 			emit_insn(ctx, revb2h, dst, dst);
822 			/* zero-extend 16 bits into 64 bits */
823 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
824 			break;
825 		case 32:
826 			emit_insn(ctx, revb2w, dst, dst);
827 			/* clear the upper 32 bits */
828 			emit_zext_32(ctx, dst, true);
829 			break;
830 		case 64:
831 			emit_insn(ctx, revbd, dst, dst);
832 			break;
833 		}
834 		break;
835 
836 	/* PC += off if dst cond src */
837 	case BPF_JMP | BPF_JEQ | BPF_X:
838 	case BPF_JMP | BPF_JNE | BPF_X:
839 	case BPF_JMP | BPF_JGT | BPF_X:
840 	case BPF_JMP | BPF_JGE | BPF_X:
841 	case BPF_JMP | BPF_JLT | BPF_X:
842 	case BPF_JMP | BPF_JLE | BPF_X:
843 	case BPF_JMP | BPF_JSGT | BPF_X:
844 	case BPF_JMP | BPF_JSGE | BPF_X:
845 	case BPF_JMP | BPF_JSLT | BPF_X:
846 	case BPF_JMP | BPF_JSLE | BPF_X:
847 	case BPF_JMP32 | BPF_JEQ | BPF_X:
848 	case BPF_JMP32 | BPF_JNE | BPF_X:
849 	case BPF_JMP32 | BPF_JGT | BPF_X:
850 	case BPF_JMP32 | BPF_JGE | BPF_X:
851 	case BPF_JMP32 | BPF_JLT | BPF_X:
852 	case BPF_JMP32 | BPF_JLE | BPF_X:
853 	case BPF_JMP32 | BPF_JSGT | BPF_X:
854 	case BPF_JMP32 | BPF_JSGE | BPF_X:
855 	case BPF_JMP32 | BPF_JSLT | BPF_X:
856 	case BPF_JMP32 | BPF_JSLE | BPF_X:
857 		jmp_offset = bpf2la_offset(i, off, ctx);
858 		move_reg(ctx, t1, dst);
859 		move_reg(ctx, t2, src);
860 		if (is_signed_bpf_cond(BPF_OP(code))) {
861 			emit_sext_32(ctx, t1, is32);
862 			emit_sext_32(ctx, t2, is32);
863 		} else {
864 			emit_zext_32(ctx, t1, is32);
865 			emit_zext_32(ctx, t2, is32);
866 		}
867 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
868 			goto toofar;
869 		break;
870 
871 	/* PC += off if dst cond imm */
872 	case BPF_JMP | BPF_JEQ | BPF_K:
873 	case BPF_JMP | BPF_JNE | BPF_K:
874 	case BPF_JMP | BPF_JGT | BPF_K:
875 	case BPF_JMP | BPF_JGE | BPF_K:
876 	case BPF_JMP | BPF_JLT | BPF_K:
877 	case BPF_JMP | BPF_JLE | BPF_K:
878 	case BPF_JMP | BPF_JSGT | BPF_K:
879 	case BPF_JMP | BPF_JSGE | BPF_K:
880 	case BPF_JMP | BPF_JSLT | BPF_K:
881 	case BPF_JMP | BPF_JSLE | BPF_K:
882 	case BPF_JMP32 | BPF_JEQ | BPF_K:
883 	case BPF_JMP32 | BPF_JNE | BPF_K:
884 	case BPF_JMP32 | BPF_JGT | BPF_K:
885 	case BPF_JMP32 | BPF_JGE | BPF_K:
886 	case BPF_JMP32 | BPF_JLT | BPF_K:
887 	case BPF_JMP32 | BPF_JLE | BPF_K:
888 	case BPF_JMP32 | BPF_JSGT | BPF_K:
889 	case BPF_JMP32 | BPF_JSGE | BPF_K:
890 	case BPF_JMP32 | BPF_JSLT | BPF_K:
891 	case BPF_JMP32 | BPF_JSLE | BPF_K:
892 		jmp_offset = bpf2la_offset(i, off, ctx);
893 		if (imm) {
894 			move_imm(ctx, t1, imm, false);
895 			tm = t1;
896 		} else {
897 			/* If imm is 0, simply use zero register. */
898 			tm = LOONGARCH_GPR_ZERO;
899 		}
900 		move_reg(ctx, t2, dst);
901 		if (is_signed_bpf_cond(BPF_OP(code))) {
902 			emit_sext_32(ctx, tm, is32);
903 			emit_sext_32(ctx, t2, is32);
904 		} else {
905 			emit_zext_32(ctx, tm, is32);
906 			emit_zext_32(ctx, t2, is32);
907 		}
908 		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
909 			goto toofar;
910 		break;
911 
912 	/* PC += off if dst & src */
913 	case BPF_JMP | BPF_JSET | BPF_X:
914 	case BPF_JMP32 | BPF_JSET | BPF_X:
915 		jmp_offset = bpf2la_offset(i, off, ctx);
916 		emit_insn(ctx, and, t1, dst, src);
917 		emit_zext_32(ctx, t1, is32);
918 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
919 			goto toofar;
920 		break;
921 
922 	/* PC += off if dst & imm */
923 	case BPF_JMP | BPF_JSET | BPF_K:
924 	case BPF_JMP32 | BPF_JSET | BPF_K:
925 		jmp_offset = bpf2la_offset(i, off, ctx);
926 		move_imm(ctx, t1, imm, is32);
927 		emit_insn(ctx, and, t1, dst, t1);
928 		emit_zext_32(ctx, t1, is32);
929 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
930 			goto toofar;
931 		break;
932 
933 	/* PC += off */
934 	case BPF_JMP | BPF_JA:
935 	case BPF_JMP32 | BPF_JA:
936 		if (BPF_CLASS(code) == BPF_JMP)
937 			jmp_offset = bpf2la_offset(i, off, ctx);
938 		else
939 			jmp_offset = bpf2la_offset(i, imm, ctx);
940 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
941 			goto toofar;
942 		break;
943 
944 	/* function call */
945 	case BPF_JMP | BPF_CALL:
946 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
947 					    &func_addr, &func_addr_fixed);
948 		if (ret < 0)
949 			return ret;
950 
951 		if (insn->src_reg == BPF_PSEUDO_CALL) {
952 			tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
953 			emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
954 		}
955 
956 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
957 			const struct btf_func_model *m;
958 			int i;
959 
960 			m = bpf_jit_find_kfunc_model(ctx->prog, insn);
961 			if (!m)
962 				return -EINVAL;
963 
964 			for (i = 0; i < m->nr_args; i++) {
965 				u8 reg = regmap[BPF_REG_1 + i];
966 				bool sign = m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG;
967 
968 				emit_abi_ext(ctx, reg, m->arg_size[i], sign);
969 			}
970 		}
971 
972 		move_addr(ctx, t1, func_addr);
973 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
974 
975 		if (insn->src_reg != BPF_PSEUDO_CALL)
976 			move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
977 
978 		break;
979 
980 	/* tail call */
981 	case BPF_JMP | BPF_TAIL_CALL:
982 		if (emit_bpf_tail_call(ctx, i) < 0)
983 			return -EINVAL;
984 		break;
985 
986 	/* function return */
987 	case BPF_JMP | BPF_EXIT:
988 		if (i == ctx->prog->len - 1)
989 			break;
990 
991 		jmp_offset = epilogue_offset(ctx);
992 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
993 			goto toofar;
994 		break;
995 
996 	/* dst = imm64 */
997 	case BPF_LD | BPF_IMM | BPF_DW:
998 	{
999 		const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
1000 
1001 		if (bpf_pseudo_func(insn))
1002 			move_addr(ctx, dst, imm64);
1003 		else
1004 			move_imm(ctx, dst, imm64, is32);
1005 		return 1;
1006 	}
1007 
1008 	/* dst = *(size *)(src + off) */
1009 	case BPF_LDX | BPF_MEM | BPF_B:
1010 	case BPF_LDX | BPF_MEM | BPF_H:
1011 	case BPF_LDX | BPF_MEM | BPF_W:
1012 	case BPF_LDX | BPF_MEM | BPF_DW:
1013 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1014 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1015 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1016 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1017 	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
1018 	case BPF_LDX | BPF_MEMSX | BPF_B:
1019 	case BPF_LDX | BPF_MEMSX | BPF_H:
1020 	case BPF_LDX | BPF_MEMSX | BPF_W:
1021 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1022 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1023 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1024 		sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
1025 			      BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
1026 		switch (BPF_SIZE(code)) {
1027 		case BPF_B:
1028 			if (is_signed_imm12(off)) {
1029 				if (sign_extend)
1030 					emit_insn(ctx, ldb, dst, src, off);
1031 				else
1032 					emit_insn(ctx, ldbu, dst, src, off);
1033 			} else {
1034 				move_imm(ctx, t1, off, is32);
1035 				if (sign_extend)
1036 					emit_insn(ctx, ldxb, dst, src, t1);
1037 				else
1038 					emit_insn(ctx, ldxbu, dst, src, t1);
1039 			}
1040 			break;
1041 		case BPF_H:
1042 			if (is_signed_imm12(off)) {
1043 				if (sign_extend)
1044 					emit_insn(ctx, ldh, dst, src, off);
1045 				else
1046 					emit_insn(ctx, ldhu, dst, src, off);
1047 			} else {
1048 				move_imm(ctx, t1, off, is32);
1049 				if (sign_extend)
1050 					emit_insn(ctx, ldxh, dst, src, t1);
1051 				else
1052 					emit_insn(ctx, ldxhu, dst, src, t1);
1053 			}
1054 			break;
1055 		case BPF_W:
1056 			if (is_signed_imm12(off)) {
1057 				if (sign_extend)
1058 					emit_insn(ctx, ldw, dst, src, off);
1059 				else
1060 					emit_insn(ctx, ldwu, dst, src, off);
1061 			} else {
1062 				move_imm(ctx, t1, off, is32);
1063 				if (sign_extend)
1064 					emit_insn(ctx, ldxw, dst, src, t1);
1065 				else
1066 					emit_insn(ctx, ldxwu, dst, src, t1);
1067 			}
1068 			break;
1069 		case BPF_DW:
1070 			move_imm(ctx, t1, off, is32);
1071 			emit_insn(ctx, ldxd, dst, src, t1);
1072 			break;
1073 		}
1074 
1075 		ret = add_exception_handler(insn, ctx, dst);
1076 		if (ret)
1077 			return ret;
1078 		break;
1079 
1080 	/* *(size *)(dst + off) = imm */
1081 	case BPF_ST | BPF_MEM | BPF_B:
1082 	case BPF_ST | BPF_MEM | BPF_H:
1083 	case BPF_ST | BPF_MEM | BPF_W:
1084 	case BPF_ST | BPF_MEM | BPF_DW:
1085 		switch (BPF_SIZE(code)) {
1086 		case BPF_B:
1087 			move_imm(ctx, t1, imm, is32);
1088 			if (is_signed_imm12(off)) {
1089 				emit_insn(ctx, stb, t1, dst, off);
1090 			} else {
1091 				move_imm(ctx, t2, off, is32);
1092 				emit_insn(ctx, stxb, t1, dst, t2);
1093 			}
1094 			break;
1095 		case BPF_H:
1096 			move_imm(ctx, t1, imm, is32);
1097 			if (is_signed_imm12(off)) {
1098 				emit_insn(ctx, sth, t1, dst, off);
1099 			} else {
1100 				move_imm(ctx, t2, off, is32);
1101 				emit_insn(ctx, stxh, t1, dst, t2);
1102 			}
1103 			break;
1104 		case BPF_W:
1105 			move_imm(ctx, t1, imm, is32);
1106 			if (is_signed_imm12(off)) {
1107 				emit_insn(ctx, stw, t1, dst, off);
1108 			} else if (is_signed_imm14(off)) {
1109 				emit_insn(ctx, stptrw, t1, dst, off);
1110 			} else {
1111 				move_imm(ctx, t2, off, is32);
1112 				emit_insn(ctx, stxw, t1, dst, t2);
1113 			}
1114 			break;
1115 		case BPF_DW:
1116 			move_imm(ctx, t1, imm, is32);
1117 			if (is_signed_imm12(off)) {
1118 				emit_insn(ctx, std, t1, dst, off);
1119 			} else if (is_signed_imm14(off)) {
1120 				emit_insn(ctx, stptrd, t1, dst, off);
1121 			} else {
1122 				move_imm(ctx, t2, off, is32);
1123 				emit_insn(ctx, stxd, t1, dst, t2);
1124 			}
1125 			break;
1126 		}
1127 		break;
1128 
1129 	/* *(size *)(dst + off) = src */
1130 	case BPF_STX | BPF_MEM | BPF_B:
1131 	case BPF_STX | BPF_MEM | BPF_H:
1132 	case BPF_STX | BPF_MEM | BPF_W:
1133 	case BPF_STX | BPF_MEM | BPF_DW:
1134 		switch (BPF_SIZE(code)) {
1135 		case BPF_B:
1136 			if (is_signed_imm12(off)) {
1137 				emit_insn(ctx, stb, src, dst, off);
1138 			} else {
1139 				move_imm(ctx, t1, off, is32);
1140 				emit_insn(ctx, stxb, src, dst, t1);
1141 			}
1142 			break;
1143 		case BPF_H:
1144 			if (is_signed_imm12(off)) {
1145 				emit_insn(ctx, sth, src, dst, off);
1146 			} else {
1147 				move_imm(ctx, t1, off, is32);
1148 				emit_insn(ctx, stxh, src, dst, t1);
1149 			}
1150 			break;
1151 		case BPF_W:
1152 			if (is_signed_imm12(off)) {
1153 				emit_insn(ctx, stw, src, dst, off);
1154 			} else if (is_signed_imm14(off)) {
1155 				emit_insn(ctx, stptrw, src, dst, off);
1156 			} else {
1157 				move_imm(ctx, t1, off, is32);
1158 				emit_insn(ctx, stxw, src, dst, t1);
1159 			}
1160 			break;
1161 		case BPF_DW:
1162 			if (is_signed_imm12(off)) {
1163 				emit_insn(ctx, std, src, dst, off);
1164 			} else if (is_signed_imm14(off)) {
1165 				emit_insn(ctx, stptrd, src, dst, off);
1166 			} else {
1167 				move_imm(ctx, t1, off, is32);
1168 				emit_insn(ctx, stxd, src, dst, t1);
1169 			}
1170 			break;
1171 		}
1172 		break;
1173 
1174 	case BPF_STX | BPF_ATOMIC | BPF_W:
1175 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1176 		emit_atomic(insn, ctx);
1177 		break;
1178 
1179 	/* Speculation barrier */
1180 	case BPF_ST | BPF_NOSPEC:
1181 		break;
1182 
1183 	default:
1184 		pr_err("bpf_jit: unknown opcode %02x\n", code);
1185 		return -EINVAL;
1186 	}
1187 
1188 	return 0;
1189 
1190 toofar:
1191 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1192 	return -E2BIG;
1193 }
1194 
build_body(struct jit_ctx * ctx,bool extra_pass)1195 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1196 {
1197 	int i;
1198 	const struct bpf_prog *prog = ctx->prog;
1199 
1200 	for (i = 0; i < prog->len; i++) {
1201 		const struct bpf_insn *insn = &prog->insnsi[i];
1202 		int ret;
1203 
1204 		if (ctx->image == NULL)
1205 			ctx->offset[i] = ctx->idx;
1206 
1207 		ret = build_insn(insn, ctx, extra_pass);
1208 		if (ret > 0) {
1209 			i++;
1210 			if (ctx->image == NULL)
1211 				ctx->offset[i] = ctx->idx;
1212 			continue;
1213 		}
1214 		if (ret)
1215 			return ret;
1216 	}
1217 
1218 	if (ctx->image == NULL)
1219 		ctx->offset[i] = ctx->idx;
1220 
1221 	return 0;
1222 }
1223 
1224 /* Fill space with break instructions */
jit_fill_hole(void * area,unsigned int size)1225 static void jit_fill_hole(void *area, unsigned int size)
1226 {
1227 	u32 *ptr;
1228 
1229 	/* We are guaranteed to have aligned memory */
1230 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1231 		*ptr++ = INSN_BREAK;
1232 }
1233 
validate_code(struct jit_ctx * ctx)1234 static int validate_code(struct jit_ctx *ctx)
1235 {
1236 	int i;
1237 	union loongarch_instruction insn;
1238 
1239 	for (i = 0; i < ctx->idx; i++) {
1240 		insn = ctx->image[i];
1241 		/* Check INSN_BREAK */
1242 		if (insn.word == INSN_BREAK)
1243 			return -1;
1244 	}
1245 
1246 	return 0;
1247 }
1248 
validate_ctx(struct jit_ctx * ctx)1249 static int validate_ctx(struct jit_ctx *ctx)
1250 {
1251 	if (validate_code(ctx))
1252 		return -1;
1253 
1254 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1255 		return -1;
1256 
1257 	return 0;
1258 }
1259 
emit_jump_and_link(struct jit_ctx * ctx,u8 rd,u64 target)1260 static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target)
1261 {
1262 	if (!target) {
1263 		pr_err("bpf_jit: jump target address is error\n");
1264 		return -EFAULT;
1265 	}
1266 
1267 	move_imm(ctx, LOONGARCH_GPR_T1, target, false);
1268 	emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0);
1269 
1270 	return 0;
1271 }
1272 
emit_jump_or_nops(void * target,void * ip,u32 * insns,bool is_call)1273 static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
1274 {
1275 	int i;
1276 	struct jit_ctx ctx;
1277 
1278 	ctx.idx = 0;
1279 	ctx.image = (union loongarch_instruction *)insns;
1280 
1281 	if (!target) {
1282 		for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
1283 			emit_insn((&ctx), nop);
1284 		return 0;
1285 	}
1286 
1287 	return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_RA : LOONGARCH_GPR_ZERO, (u64)target);
1288 }
1289 
emit_call(struct jit_ctx * ctx,u64 addr)1290 static int emit_call(struct jit_ctx *ctx, u64 addr)
1291 {
1292 	return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr);
1293 }
1294 
bpf_arch_text_copy(void * dst,void * src,size_t len)1295 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
1296 {
1297 	int ret;
1298 
1299 	mutex_lock(&text_mutex);
1300 	ret = larch_insn_text_copy(dst, src, len);
1301 	mutex_unlock(&text_mutex);
1302 
1303 	return ret ? ERR_PTR(-EINVAL) : dst;
1304 }
1305 
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type old_t,enum bpf_text_poke_type new_t,void * old_addr,void * new_addr)1306 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
1307 		       enum bpf_text_poke_type new_t, void *old_addr,
1308 		       void *new_addr)
1309 {
1310 	int ret;
1311 	bool is_call;
1312 	unsigned long size = 0;
1313 	unsigned long offset = 0;
1314 	void *image = NULL;
1315 	char namebuf[KSYM_NAME_LEN];
1316 	u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
1317 	u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
1318 
1319 	/* Only poking bpf text is supported. Since kernel function entry
1320 	 * is set up by ftrace, we rely on ftrace to poke kernel functions.
1321 	 */
1322 	if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
1323 		return -ENOTSUPP;
1324 
1325 	image = ip - offset;
1326 
1327 	/* zero offset means we're poking bpf prog entry */
1328 	if (offset == 0) {
1329 		/* skip to the nop instruction in bpf prog entry:
1330 		 * move t0, ra
1331 		 * nop
1332 		 */
1333 		ip = image + LOONGARCH_INSN_SIZE;
1334 	}
1335 
1336 	is_call = old_t == BPF_MOD_CALL;
1337 	ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
1338 	if (ret)
1339 		return ret;
1340 
1341 	if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
1342 		return -EFAULT;
1343 
1344 	is_call = new_t == BPF_MOD_CALL;
1345 	ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
1346 	if (ret)
1347 		return ret;
1348 
1349 	mutex_lock(&text_mutex);
1350 	if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES))
1351 		ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES);
1352 	mutex_unlock(&text_mutex);
1353 
1354 	return ret;
1355 }
1356 
bpf_arch_text_invalidate(void * dst,size_t len)1357 int bpf_arch_text_invalidate(void *dst, size_t len)
1358 {
1359 	int i;
1360 	int ret = 0;
1361 	u32 *inst;
1362 
1363 	inst = kvmalloc(len, GFP_KERNEL);
1364 	if (!inst)
1365 		return -ENOMEM;
1366 
1367 	for (i = 0; i < (len / sizeof(u32)); i++)
1368 		inst[i] = INSN_BREAK;
1369 
1370 	mutex_lock(&text_mutex);
1371 	if (larch_insn_text_copy(dst, inst, len))
1372 		ret = -EINVAL;
1373 	mutex_unlock(&text_mutex);
1374 
1375 	kvfree(inst);
1376 
1377 	return ret;
1378 }
1379 
store_args(struct jit_ctx * ctx,int nargs,int args_off)1380 static void store_args(struct jit_ctx *ctx, int nargs, int args_off)
1381 {
1382 	int i;
1383 
1384 	for (i = 0; i < nargs; i++) {
1385 		emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
1386 		args_off -= 8;
1387 	}
1388 }
1389 
restore_args(struct jit_ctx * ctx,int nargs,int args_off)1390 static void restore_args(struct jit_ctx *ctx, int nargs, int args_off)
1391 {
1392 	int i;
1393 
1394 	for (i = 0; i < nargs; i++) {
1395 		emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
1396 		args_off -= 8;
1397 	}
1398 }
1399 
invoke_bpf_prog(struct jit_ctx * ctx,struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret)1400 static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
1401 			   int args_off, int retval_off, int run_ctx_off, bool save_ret)
1402 {
1403 	int ret;
1404 	u32 *branch;
1405 	struct bpf_prog *p = l->link.prog;
1406 	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1407 
1408 	if (l->cookie) {
1409 		move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false);
1410 		emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
1411 	} else {
1412 		emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
1413 	}
1414 
1415 	/* arg1: prog */
1416 	move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
1417 	/* arg2: &run_ctx */
1418 	emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off);
1419 	ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p));
1420 	if (ret)
1421 		return ret;
1422 
1423 	/* store prog start time */
1424 	move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0);
1425 
1426 	/*
1427 	 * if (__bpf_prog_enter(prog) == 0)
1428 	 *      goto skip_exec_of_prog;
1429 	 */
1430 	branch = (u32 *)ctx->image + ctx->idx;
1431 	/* nop reserved for conditional jump */
1432 	emit_insn(ctx, nop);
1433 
1434 	/* arg1: &args_off */
1435 	emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off);
1436 	if (!p->jited)
1437 		move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false);
1438 	ret = emit_call(ctx, (const u64)p->bpf_func);
1439 	if (ret)
1440 		return ret;
1441 
1442 	if (save_ret) {
1443 		emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
1444 		emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
1445 	}
1446 
1447 	/* update branch with beqz */
1448 	if (ctx->image) {
1449 		int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch;
1450 		*branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset);
1451 	}
1452 
1453 	/* arg1: prog */
1454 	move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
1455 	/* arg2: prog start time */
1456 	move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1);
1457 	/* arg3: &run_ctx */
1458 	emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off);
1459 	ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p));
1460 
1461 	return ret;
1462 }
1463 
invoke_bpf_mod_ret(struct jit_ctx * ctx,struct bpf_tramp_links * tl,int args_off,int retval_off,int run_ctx_off,u32 ** branches)1464 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
1465 			       int args_off, int retval_off, int run_ctx_off, u32 **branches)
1466 {
1467 	int i;
1468 
1469 	emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
1470 	for (i = 0; i < tl->nr_links; i++) {
1471 		invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true);
1472 		emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
1473 		branches[i] = (u32 *)ctx->image + ctx->idx;
1474 		emit_insn(ctx, nop);
1475 	}
1476 }
1477 
arch_alloc_bpf_trampoline(unsigned int size)1478 void *arch_alloc_bpf_trampoline(unsigned int size)
1479 {
1480 	return bpf_prog_pack_alloc(size, jit_fill_hole);
1481 }
1482 
arch_free_bpf_trampoline(void * image,unsigned int size)1483 void arch_free_bpf_trampoline(void *image, unsigned int size)
1484 {
1485 	bpf_prog_pack_free(image, size);
1486 }
1487 
1488 /*
1489  * Sign-extend the register if necessary
1490  */
sign_extend(struct jit_ctx * ctx,int rd,int rj,u8 size,bool sign)1491 static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign)
1492 {
1493 	/* ABI requires unsigned char/short to be zero-extended */
1494 	if (!sign && (size == 1 || size == 2)) {
1495 		if (rd != rj)
1496 			move_reg(ctx, rd, rj);
1497 		return;
1498 	}
1499 
1500 	switch (size) {
1501 	case 1:
1502 		emit_insn(ctx, extwb, rd, rj);
1503 		break;
1504 	case 2:
1505 		emit_insn(ctx, extwh, rd, rj);
1506 		break;
1507 	case 4:
1508 		emit_insn(ctx, addiw, rd, rj, 0);
1509 		break;
1510 	case 8:
1511 		if (rd != rj)
1512 			move_reg(ctx, rd, rj);
1513 		break;
1514 	default:
1515 		pr_warn("bpf_jit: invalid size %d for sign_extend\n", size);
1516 	}
1517 }
1518 
__arch_prepare_bpf_trampoline(struct jit_ctx * ctx,struct bpf_tramp_image * im,const struct btf_func_model * m,struct bpf_tramp_links * tlinks,void * func_addr,u32 flags)1519 static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
1520 					 const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
1521 					 void *func_addr, u32 flags)
1522 {
1523 	int i, ret, save_ret;
1524 	int stack_size, nargs;
1525 	int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off;
1526 	bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
1527 	void *orig_call = func_addr;
1528 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
1529 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
1530 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
1531 	u32 **branches = NULL;
1532 
1533 	/*
1534 	 * FP + 8       [ RA to parent func ] return address to parent
1535 	 *                    function
1536 	 * FP + 0       [ FP of parent func ] frame pointer of parent
1537 	 *                    function
1538 	 * FP - 8       [ T0 to traced func ] return address of traced
1539 	 *                    function
1540 	 * FP - 16      [ FP of traced func ] frame pointer of traced
1541 	 *                    function
1542 	 *
1543 	 * FP - retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
1544 	 *                    BPF_TRAMP_F_RET_FENTRY_RET
1545 	 *                  [ argN              ]
1546 	 *                  [ ...               ]
1547 	 * FP - args_off    [ arg1              ]
1548 	 *
1549 	 * FP - nargs_off   [ regs count        ]
1550 	 *
1551 	 * FP - ip_off      [ traced func   ] BPF_TRAMP_F_IP_ARG
1552 	 *
1553 	 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
1554 	 *
1555 	 * FP - sreg_off    [ callee saved reg  ]
1556 	 *
1557 	 * FP - tcc_ptr_off [ tail_call_cnt_ptr ]
1558 	 */
1559 
1560 	if (m->nr_args > LOONGARCH_MAX_REG_ARGS)
1561 		return -ENOTSUPP;
1562 
1563 	/* FIXME: No support of struct argument */
1564 	for (i = 0; i < m->nr_args; i++) {
1565 		if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
1566 			return -ENOTSUPP;
1567 	}
1568 
1569 	if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
1570 		return -ENOTSUPP;
1571 
1572 	/* Room of trampoline frame to store return address and frame pointer */
1573 	stack_size = 16;
1574 
1575 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
1576 	if (save_ret)
1577 		stack_size += 16; /* Save BPF R0 and A0 */
1578 
1579 	retval_off = stack_size;
1580 
1581 	/* Room of trampoline frame to store args */
1582 	nargs = m->nr_args;
1583 	stack_size += nargs * 8;
1584 	args_off = stack_size;
1585 
1586 	/* Room of trampoline frame to store args number */
1587 	stack_size += 8;
1588 	nargs_off = stack_size;
1589 
1590 	/* Room of trampoline frame to store ip address */
1591 	if (flags & BPF_TRAMP_F_IP_ARG) {
1592 		stack_size += 8;
1593 		ip_off = stack_size;
1594 	}
1595 
1596 	/* Room of trampoline frame to store struct bpf_tramp_run_ctx */
1597 	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
1598 	run_ctx_off = stack_size;
1599 
1600 	stack_size += 8;
1601 	sreg_off = stack_size;
1602 
1603 	/* Room of trampoline frame to store tail_call_cnt_ptr */
1604 	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
1605 		stack_size += 8;
1606 		tcc_ptr_off = stack_size;
1607 	}
1608 
1609 	stack_size = round_up(stack_size, 16);
1610 
1611 	if (is_struct_ops) {
1612 		/*
1613 		 * For the trampoline called directly, just handle
1614 		 * the frame of trampoline.
1615 		 */
1616 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
1617 		emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8);
1618 		emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1619 		emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
1620 	} else {
1621 		/*
1622 		 * For the trampoline called from function entry,
1623 		 * the frame of traced function and the frame of
1624 		 * trampoline need to be considered.
1625 		 */
1626 		/* RA and FP for parent function */
1627 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16);
1628 		emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
1629 		emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
1630 		emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16);
1631 
1632 		/* RA and FP for traced function */
1633 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
1634 		emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
1635 		emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1636 		emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
1637 	}
1638 
1639 	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
1640 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
1641 
1642 	/* callee saved register S1 to pass start time */
1643 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
1644 
1645 	/* store ip address of the traced function */
1646 	if (flags & BPF_TRAMP_F_IP_ARG) {
1647 		move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false);
1648 		emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off);
1649 	}
1650 
1651 	/* store nargs number */
1652 	move_imm(ctx, LOONGARCH_GPR_T1, nargs, false);
1653 	emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off);
1654 
1655 	store_args(ctx, nargs, args_off);
1656 
1657 	/* To traced function */
1658 	/* Ftrace jump skips 2 NOP instructions */
1659 	if (is_kernel_text((unsigned long)orig_call) ||
1660 	    is_module_text_address((unsigned long)orig_call))
1661 		orig_call += LOONGARCH_FENTRY_NBYTES;
1662 	/* Direct jump skips 5 NOP instructions */
1663 	else if (is_bpf_text_address((unsigned long)orig_call))
1664 		orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
1665 
1666 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1667 		move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
1668 		ret = emit_call(ctx, (const u64)__bpf_tramp_enter);
1669 		if (ret)
1670 			return ret;
1671 	}
1672 
1673 	for (i = 0; i < fentry->nr_links; i++) {
1674 		ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off,
1675 				      run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
1676 		if (ret)
1677 			return ret;
1678 	}
1679 	if (fmod_ret->nr_links) {
1680 		branches  = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
1681 		if (!branches)
1682 			return -ENOMEM;
1683 
1684 		invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches);
1685 	}
1686 
1687 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1688 		restore_args(ctx, m->nr_args, args_off);
1689 
1690 		if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
1691 			emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
1692 
1693 		ret = emit_call(ctx, (const u64)orig_call);
1694 		if (ret)
1695 			goto out;
1696 		emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
1697 		emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
1698 		im->ip_after_call = ctx->ro_image + ctx->idx;
1699 		/* Reserve space for the move_imm + jirl instruction */
1700 		for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
1701 			emit_insn(ctx, nop);
1702 	}
1703 
1704 	for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
1705 		int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
1706 		*branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
1707 	}
1708 
1709 	for (i = 0; i < fexit->nr_links; i++) {
1710 		ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false);
1711 		if (ret)
1712 			goto out;
1713 	}
1714 
1715 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1716 		im->ip_epilogue = ctx->ro_image + ctx->idx;
1717 		move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
1718 		ret = emit_call(ctx, (const u64)__bpf_tramp_exit);
1719 		if (ret)
1720 			goto out;
1721 	}
1722 
1723 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
1724 		restore_args(ctx, m->nr_args, args_off);
1725 
1726 	if (save_ret) {
1727 		emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
1728 		if (is_struct_ops)
1729 			sign_extend(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0],
1730 				    m->ret_size, m->ret_flags & BTF_FMODEL_SIGNED_ARG);
1731 		else
1732 			emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
1733 	}
1734 
1735 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
1736 
1737 	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
1738 		emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
1739 
1740 	if (is_struct_ops) {
1741 		/* trampoline called directly */
1742 		emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8);
1743 		emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1744 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
1745 
1746 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
1747 	} else {
1748 		/* trampoline called from function entry */
1749 		emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
1750 		emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1751 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
1752 
1753 		emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
1754 		emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
1755 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16);
1756 
1757 		if (flags & BPF_TRAMP_F_SKIP_FRAME) {
1758 			/* return to parent function */
1759 			move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0);
1760 			emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0);
1761 		} else {
1762 			/* return to traced function */
1763 			move_reg(ctx, LOONGARCH_GPR_T1, LOONGARCH_GPR_RA);
1764 			move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0);
1765 			emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, 0);
1766 		}
1767 	}
1768 
1769 	ret = ctx->idx;
1770 out:
1771 	kfree(branches);
1772 
1773 	return ret;
1774 }
1775 
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * ro_image,void * ro_image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)1776 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
1777 				void *ro_image_end, const struct btf_func_model *m,
1778 				u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
1779 {
1780 	int ret, size;
1781 	void *image, *tmp;
1782 	struct jit_ctx ctx;
1783 
1784 	size = ro_image_end - ro_image;
1785 	image = kvmalloc(size, GFP_KERNEL);
1786 	if (!image)
1787 		return -ENOMEM;
1788 
1789 	ctx.image = (union loongarch_instruction *)image;
1790 	ctx.ro_image = (union loongarch_instruction *)ro_image;
1791 	ctx.idx = 0;
1792 
1793 	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
1794 	ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
1795 	if (ret < 0)
1796 		goto out;
1797 
1798 	if (validate_code(&ctx) < 0) {
1799 		ret = -EINVAL;
1800 		goto out;
1801 	}
1802 
1803 	tmp = bpf_arch_text_copy(ro_image, image, size);
1804 	if (IS_ERR(tmp)) {
1805 		ret = PTR_ERR(tmp);
1806 		goto out;
1807 	}
1808 
1809 out:
1810 	kvfree(image);
1811 	return ret < 0 ? ret : size;
1812 }
1813 
arch_bpf_trampoline_size(const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)1814 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
1815 			     struct bpf_tramp_links *tlinks, void *func_addr)
1816 {
1817 	int ret;
1818 	struct jit_ctx ctx;
1819 	struct bpf_tramp_image im;
1820 
1821 	ctx.image = NULL;
1822 	ctx.idx = 0;
1823 
1824 	ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
1825 
1826 	return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
1827 }
1828 
bpf_int_jit_compile(struct bpf_prog * prog)1829 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1830 {
1831 	bool tmp_blinded = false, extra_pass = false;
1832 	u8 *image_ptr;
1833 	int image_size, prog_size, extable_size;
1834 	struct jit_ctx ctx;
1835 	struct jit_data *jit_data;
1836 	struct bpf_binary_header *header;
1837 	struct bpf_prog *tmp, *orig_prog = prog;
1838 
1839 	/*
1840 	 * If BPF JIT was not enabled then we must fall back to
1841 	 * the interpreter.
1842 	 */
1843 	if (!prog->jit_requested)
1844 		return orig_prog;
1845 
1846 	tmp = bpf_jit_blind_constants(prog);
1847 	/*
1848 	 * If blinding was requested and we failed during blinding,
1849 	 * we must fall back to the interpreter. Otherwise, we save
1850 	 * the new JITed code.
1851 	 */
1852 	if (IS_ERR(tmp))
1853 		return orig_prog;
1854 
1855 	if (tmp != prog) {
1856 		tmp_blinded = true;
1857 		prog = tmp;
1858 	}
1859 
1860 	jit_data = prog->aux->jit_data;
1861 	if (!jit_data) {
1862 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1863 		if (!jit_data) {
1864 			prog = orig_prog;
1865 			goto out;
1866 		}
1867 		prog->aux->jit_data = jit_data;
1868 	}
1869 	if (jit_data->ctx.offset) {
1870 		ctx = jit_data->ctx;
1871 		image_ptr = jit_data->image;
1872 		header = jit_data->header;
1873 		extra_pass = true;
1874 		prog_size = sizeof(u32) * ctx.idx;
1875 		goto skip_init_ctx;
1876 	}
1877 
1878 	memset(&ctx, 0, sizeof(ctx));
1879 	ctx.prog = prog;
1880 
1881 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1882 	if (ctx.offset == NULL) {
1883 		prog = orig_prog;
1884 		goto out_offset;
1885 	}
1886 
1887 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1888 	build_prologue(&ctx);
1889 	if (build_body(&ctx, extra_pass)) {
1890 		prog = orig_prog;
1891 		goto out_offset;
1892 	}
1893 	ctx.epilogue_offset = ctx.idx;
1894 	build_epilogue(&ctx);
1895 
1896 	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1897 
1898 	/* Now we know the actual image size.
1899 	 * As each LoongArch instruction is of length 32bit,
1900 	 * we are translating number of JITed intructions into
1901 	 * the size required to store these JITed code.
1902 	 */
1903 	prog_size = sizeof(u32) * ctx.idx;
1904 	image_size = prog_size + extable_size;
1905 	/* Now we know the size of the structure to make */
1906 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1907 				      sizeof(u32), jit_fill_hole);
1908 	if (header == NULL) {
1909 		prog = orig_prog;
1910 		goto out_offset;
1911 	}
1912 
1913 	/* 2. Now, the actual pass to generate final JIT code */
1914 	ctx.image = (union loongarch_instruction *)image_ptr;
1915 	if (extable_size)
1916 		prog->aux->extable = (void *)image_ptr + prog_size;
1917 
1918 skip_init_ctx:
1919 	ctx.idx = 0;
1920 	ctx.num_exentries = 0;
1921 
1922 	build_prologue(&ctx);
1923 	if (build_body(&ctx, extra_pass)) {
1924 		bpf_jit_binary_free(header);
1925 		prog = orig_prog;
1926 		goto out_offset;
1927 	}
1928 	build_epilogue(&ctx);
1929 
1930 	/* 3. Extra pass to validate JITed code */
1931 	if (validate_ctx(&ctx)) {
1932 		bpf_jit_binary_free(header);
1933 		prog = orig_prog;
1934 		goto out_offset;
1935 	}
1936 
1937 	/* And we're done */
1938 	if (bpf_jit_enable > 1)
1939 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1940 
1941 	/* Update the icache */
1942 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1943 
1944 	if (!prog->is_func || extra_pass) {
1945 		int err;
1946 
1947 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1948 			pr_err_once("multi-func JIT bug %d != %d\n",
1949 				    ctx.idx, jit_data->ctx.idx);
1950 			goto out_free;
1951 		}
1952 		err = bpf_jit_binary_lock_ro(header);
1953 		if (err) {
1954 			pr_err_once("bpf_jit_binary_lock_ro() returned %d\n",
1955 				    err);
1956 			goto out_free;
1957 		}
1958 	} else {
1959 		jit_data->ctx = ctx;
1960 		jit_data->image = image_ptr;
1961 		jit_data->header = header;
1962 	}
1963 	prog->jited = 1;
1964 	prog->jited_len = prog_size;
1965 	prog->bpf_func = (void *)ctx.image;
1966 
1967 	if (!prog->is_func || extra_pass) {
1968 		int i;
1969 
1970 		/* offset[prog->len] is the size of program */
1971 		for (i = 0; i <= prog->len; i++)
1972 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1973 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1974 
1975 out_offset:
1976 		kvfree(ctx.offset);
1977 		kfree(jit_data);
1978 		prog->aux->jit_data = NULL;
1979 	}
1980 
1981 out:
1982 	if (tmp_blinded)
1983 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1984 
1985 
1986 	return prog;
1987 
1988 out_free:
1989 	bpf_jit_binary_free(header);
1990 	prog->bpf_func = NULL;
1991 	prog->jited = 0;
1992 	prog->jited_len = 0;
1993 	goto out_offset;
1994 }
1995 
bpf_jit_bypass_spec_v1(void)1996 bool bpf_jit_bypass_spec_v1(void)
1997 {
1998 	return true;
1999 }
2000 
bpf_jit_bypass_spec_v4(void)2001 bool bpf_jit_bypass_spec_v4(void)
2002 {
2003 	return true;
2004 }
2005 
2006 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)2007 bool bpf_jit_supports_subprog_tailcalls(void)
2008 {
2009 	return true;
2010 }
2011