xref: /linux/arch/loongarch/net/bpf_jit.c (revision ff57d59200baadfdb41f94a49fed7d161a9a8124)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include <linux/memory.h>
8 #include "bpf_jit.h"
9 
10 #define LOONGARCH_MAX_REG_ARGS 8
11 
12 #define LOONGARCH_LONG_JUMP_NINSNS 5
13 #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
14 
15 #define LOONGARCH_FENTRY_NINSNS 2
16 #define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4)
17 #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
18 
19 #define REG_TCC		LOONGARCH_GPR_A6
20 #define REG_ARENA	LOONGARCH_GPR_S6 /* For storing arena_vm_start */
21 #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
22 
23 static const int regmap[] = {
24 	/* return value from in-kernel function, and exit value for eBPF program */
25 	[BPF_REG_0] = LOONGARCH_GPR_A5,
26 	/* arguments from eBPF program to in-kernel function */
27 	[BPF_REG_1] = LOONGARCH_GPR_A0,
28 	[BPF_REG_2] = LOONGARCH_GPR_A1,
29 	[BPF_REG_3] = LOONGARCH_GPR_A2,
30 	[BPF_REG_4] = LOONGARCH_GPR_A3,
31 	[BPF_REG_5] = LOONGARCH_GPR_A4,
32 	/* callee saved registers that in-kernel function will preserve */
33 	[BPF_REG_6] = LOONGARCH_GPR_S0,
34 	[BPF_REG_7] = LOONGARCH_GPR_S1,
35 	[BPF_REG_8] = LOONGARCH_GPR_S2,
36 	[BPF_REG_9] = LOONGARCH_GPR_S3,
37 	/* read-only frame pointer to access stack */
38 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
39 	/* temporary register for blinding constants */
40 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
41 };
42 
prepare_bpf_tail_call_cnt(struct jit_ctx * ctx,int * store_offset)43 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
44 {
45 	const struct bpf_prog *prog = ctx->prog;
46 	const bool is_main_prog = !bpf_is_subprog(prog);
47 
48 	if (is_main_prog) {
49 		/*
50 		 * LOONGARCH_GPR_T3 = MAX_TAIL_CALL_CNT
51 		 * if (REG_TCC > T3 )
52 		 *	std REG_TCC -> LOONGARCH_GPR_SP + store_offset
53 		 * else
54 		 *	std REG_TCC -> LOONGARCH_GPR_SP + store_offset
55 		 *	REG_TCC = LOONGARCH_GPR_SP + store_offset
56 		 *
57 		 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset
58 		 *
59 		 * The purpose of this code is to first push the TCC into stack,
60 		 * and then push the address of TCC into stack.
61 		 * In cases where bpf2bpf and tailcall are used in combination,
62 		 * the value in REG_TCC may be a count or an address,
63 		 * these two cases need to be judged and handled separately.
64 		 */
65 		emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
66 		*store_offset -= sizeof(long);
67 
68 		emit_cond_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);
69 
70 		/*
71 		 * If REG_TCC < MAX_TAIL_CALL_CNT, the value in REG_TCC is a count,
72 		 * push tcc into stack
73 		 */
74 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
75 
76 		/* Push the address of TCC into the REG_TCC */
77 		emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
78 
79 		emit_uncond_jmp(ctx, 2);
80 
81 		/*
82 		 * If REG_TCC > MAX_TAIL_CALL_CNT, the value in REG_TCC is an address,
83 		 * push tcc_ptr into stack
84 		 */
85 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
86 	} else {
87 		*store_offset -= sizeof(long);
88 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
89 	}
90 
91 	/* Push tcc_ptr into stack */
92 	*store_offset -= sizeof(long);
93 	emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
94 }
95 
96 /*
97  * eBPF prog stack layout:
98  *
99  *                                        high
100  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
101  *                            |           $ra           |
102  *                            +-------------------------+
103  *                            |           $fp           |
104  *                            +-------------------------+
105  *                            |           $s0           |
106  *                            +-------------------------+
107  *                            |           $s1           |
108  *                            +-------------------------+
109  *                            |           $s2           |
110  *                            +-------------------------+
111  *                            |           $s3           |
112  *                            +-------------------------+
113  *                            |           $s4           |
114  *                            +-------------------------+
115  *                            |           $s5           |
116  *                            +-------------------------+
117  *                            |           tcc           |
118  *                            +-------------------------+
119  *                            |           tcc_ptr       |
120  *                            +-------------------------+ <--BPF_REG_FP
121  *                            |  prog->aux->stack_depth |
122  *                            |        (optional)       |
123  * current $sp -------------> +-------------------------+
124  *                                        low
125  */
build_prologue(struct jit_ctx * ctx)126 static void build_prologue(struct jit_ctx *ctx)
127 {
128 	int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
129 	const struct bpf_prog *prog = ctx->prog;
130 	const bool is_main_prog = !bpf_is_subprog(prog);
131 
132 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
133 
134 	/* To store ra, fp, s0, s1, s2, s3, s4, s5 */
135 	stack_adjust += sizeof(long) * 8;
136 
137 	/* To store tcc and tcc_ptr */
138 	stack_adjust += sizeof(long) * 2;
139 
140 	if (ctx->arena_vm_start)
141 		stack_adjust += 8;
142 
143 	stack_adjust = round_up(stack_adjust, 16);
144 	stack_adjust += bpf_stack_adjust;
145 
146 	move_reg(ctx, LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
147 	/* Reserve space for the move_imm + jirl instruction */
148 	for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
149 		emit_insn(ctx, nop);
150 
151 	/*
152 	 * First instruction initializes the tail call count (TCC)
153 	 * register to zero. On tail call we skip this instruction,
154 	 * and the TCC is passed in REG_TCC from the caller.
155 	 */
156 	if (is_main_prog)
157 		emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
158 
159 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
160 
161 	store_offset = stack_adjust - sizeof(long);
162 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
163 
164 	store_offset -= sizeof(long);
165 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
166 
167 	store_offset -= sizeof(long);
168 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
169 
170 	store_offset -= sizeof(long);
171 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
172 
173 	store_offset -= sizeof(long);
174 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
175 
176 	store_offset -= sizeof(long);
177 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
178 
179 	store_offset -= sizeof(long);
180 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
181 
182 	store_offset -= sizeof(long);
183 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
184 
185 	if (ctx->arena_vm_start) {
186 		store_offset -= sizeof(long);
187 		emit_insn(ctx, std, REG_ARENA, LOONGARCH_GPR_SP, store_offset);
188 	}
189 
190 	prepare_bpf_tail_call_cnt(ctx, &store_offset);
191 
192 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
193 
194 	if (bpf_stack_adjust)
195 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
196 
197 	ctx->stack_size = stack_adjust;
198 
199 	if (ctx->arena_vm_start)
200 		move_imm(ctx, REG_ARENA, ctx->arena_vm_start, false);
201 }
202 
__build_epilogue(struct jit_ctx * ctx,bool is_tail_call)203 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
204 {
205 	int stack_adjust = ctx->stack_size;
206 	int load_offset;
207 
208 	load_offset = stack_adjust - sizeof(long);
209 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
210 
211 	load_offset -= sizeof(long);
212 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
213 
214 	load_offset -= sizeof(long);
215 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
216 
217 	load_offset -= sizeof(long);
218 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
219 
220 	load_offset -= sizeof(long);
221 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
222 
223 	load_offset -= sizeof(long);
224 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
225 
226 	load_offset -= sizeof(long);
227 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
228 
229 	load_offset -= sizeof(long);
230 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
231 
232 	if (ctx->arena_vm_start) {
233 		load_offset -= sizeof(long);
234 		emit_insn(ctx, ldd, REG_ARENA, LOONGARCH_GPR_SP, load_offset);
235 	}
236 
237 	/*
238 	 * When push into the stack, follow the order of tcc then tcc_ptr.
239 	 * When pop from the stack, first pop tcc_ptr then followed by tcc.
240 	 */
241 	load_offset -= 2 * sizeof(long);
242 	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
243 
244 	load_offset += sizeof(long);
245 	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
246 
247 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
248 
249 	if (!is_tail_call) {
250 		/* Set return value */
251 		emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0);
252 		/* Return to the caller */
253 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
254 	} else {
255 		/*
256 		 * Call the next bpf prog and skip the first instruction
257 		 * of TCC initialization.
258 		 */
259 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 7);
260 	}
261 }
262 
build_epilogue(struct jit_ctx * ctx)263 static void build_epilogue(struct jit_ctx *ctx)
264 {
265 	__build_epilogue(ctx, false);
266 }
267 
bpf_jit_supports_kfunc_call(void)268 bool bpf_jit_supports_kfunc_call(void)
269 {
270 	return true;
271 }
272 
bpf_jit_supports_far_kfunc_call(void)273 bool bpf_jit_supports_far_kfunc_call(void)
274 {
275 	return true;
276 }
277 
emit_bpf_tail_call(struct jit_ctx * ctx,int insn)278 static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
279 {
280 	int off, tc_ninsn = 0;
281 	int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
282 	u8 a1 = LOONGARCH_GPR_A1;
283 	u8 a2 = LOONGARCH_GPR_A2;
284 	u8 t1 = LOONGARCH_GPR_T1;
285 	u8 t2 = LOONGARCH_GPR_T2;
286 	u8 t3 = LOONGARCH_GPR_T3;
287 	const int idx0 = ctx->idx;
288 
289 #define cur_offset (ctx->idx - idx0)
290 #define jmp_offset (tc_ninsn - (cur_offset))
291 
292 	/*
293 	 * a0: &ctx
294 	 * a1: &array
295 	 * a2: index
296 	 *
297 	 * if (index >= array->map.max_entries)
298 	 *	 goto out;
299 	 */
300 	tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0];
301 	emit_zext_32(ctx, a2, true);
302 
303 	off = offsetof(struct bpf_array, map.max_entries);
304 	emit_insn(ctx, ldwu, t1, a1, off);
305 	/* bgeu $a2, $t1, jmp_offset */
306 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
307 		goto toofar;
308 
309 	/*
310 	 * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
311 	 *      goto out;
312 	 */
313 	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
314 	emit_insn(ctx, ldd, t3, REG_TCC, 0);
315 	emit_insn(ctx, addid, t3, t3, 1);
316 	emit_insn(ctx, std, t3, REG_TCC, 0);
317 	emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
318 	if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
319 		goto toofar;
320 
321 	/*
322 	 * prog = array->ptrs[index];
323 	 * if (!prog)
324 	 *	 goto out;
325 	 */
326 	emit_insn(ctx, alsld, t2, a2, a1, 2);
327 	off = offsetof(struct bpf_array, ptrs);
328 	emit_insn(ctx, ldd, t2, t2, off);
329 	/* beq $t2, $zero, jmp_offset */
330 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
331 		goto toofar;
332 
333 	/* goto *(prog->bpf_func + 4); */
334 	off = offsetof(struct bpf_prog, bpf_func);
335 	emit_insn(ctx, ldd, t3, t2, off);
336 	__build_epilogue(ctx, true);
337 
338 	return 0;
339 
340 toofar:
341 	pr_info_once("tail_call: jump too far\n");
342 	return -1;
343 #undef cur_offset
344 #undef jmp_offset
345 }
346 
emit_store_stack_imm64(struct jit_ctx * ctx,int reg,int stack_off,u64 imm64)347 static void emit_store_stack_imm64(struct jit_ctx *ctx, int reg, int stack_off, u64 imm64)
348 {
349 	move_imm(ctx, reg, imm64, false);
350 	emit_insn(ctx, std, reg, LOONGARCH_GPR_FP, stack_off);
351 }
352 
emit_atomic_rmw(const struct bpf_insn * insn,struct jit_ctx * ctx)353 static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx)
354 {
355 	const u8 t1 = LOONGARCH_GPR_T1;
356 	const u8 t2 = LOONGARCH_GPR_T2;
357 	const u8 t3 = LOONGARCH_GPR_T3;
358 	const u8 r0 = regmap[BPF_REG_0];
359 	const u8 src = regmap[insn->src_reg];
360 	const u8 dst = regmap[insn->dst_reg];
361 	const s16 off = insn->off;
362 	const s32 imm = insn->imm;
363 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
364 
365 	move_imm(ctx, t1, off, false);
366 	emit_insn(ctx, addd, t1, dst, t1);
367 	move_reg(ctx, t3, src);
368 
369 	switch (imm) {
370 	/* lock *(size *)(dst + off) <op>= src */
371 	case BPF_ADD:
372 		switch (BPF_SIZE(insn->code)) {
373 		case BPF_B:
374 			if (!cpu_has_lam_bh) {
375 				pr_err_once("bpf-jit: amadd.b instruction is not supported\n");
376 				return -EINVAL;
377 			}
378 			emit_insn(ctx, amaddb, t2, t1, src);
379 			break;
380 		case BPF_H:
381 			if (!cpu_has_lam_bh) {
382 				pr_err_once("bpf-jit: amadd.h instruction is not supported\n");
383 				return -EINVAL;
384 			}
385 			emit_insn(ctx, amaddh, t2, t1, src);
386 			break;
387 		case BPF_W:
388 			emit_insn(ctx, amaddw, t2, t1, src);
389 			break;
390 		case BPF_DW:
391 			emit_insn(ctx, amaddd, t2, t1, src);
392 			break;
393 		}
394 		break;
395 	case BPF_AND:
396 		if (isdw)
397 			emit_insn(ctx, amandd, t2, t1, src);
398 		else
399 			emit_insn(ctx, amandw, t2, t1, src);
400 		break;
401 	case BPF_OR:
402 		if (isdw)
403 			emit_insn(ctx, amord, t2, t1, src);
404 		else
405 			emit_insn(ctx, amorw, t2, t1, src);
406 		break;
407 	case BPF_XOR:
408 		if (isdw)
409 			emit_insn(ctx, amxord, t2, t1, src);
410 		else
411 			emit_insn(ctx, amxorw, t2, t1, src);
412 		break;
413 	/* src = atomic_fetch_<op>(dst + off, src) */
414 	case BPF_ADD | BPF_FETCH:
415 		switch (BPF_SIZE(insn->code)) {
416 		case BPF_B:
417 			if (!cpu_has_lam_bh) {
418 				pr_err_once("bpf-jit: amadd.b instruction is not supported\n");
419 				return -EINVAL;
420 			}
421 			emit_insn(ctx, amaddb, src, t1, t3);
422 			emit_zext_32(ctx, src, true);
423 			break;
424 		case BPF_H:
425 			if (!cpu_has_lam_bh) {
426 				pr_err_once("bpf-jit: amadd.h instruction is not supported\n");
427 				return -EINVAL;
428 			}
429 			emit_insn(ctx, amaddh, src, t1, t3);
430 			emit_zext_32(ctx, src, true);
431 			break;
432 		case BPF_W:
433 			emit_insn(ctx, amaddw, src, t1, t3);
434 			emit_zext_32(ctx, src, true);
435 			break;
436 		case BPF_DW:
437 			emit_insn(ctx, amaddd, src, t1, t3);
438 			break;
439 		}
440 		break;
441 	case BPF_AND | BPF_FETCH:
442 		if (isdw) {
443 			emit_insn(ctx, amandd, src, t1, t3);
444 		} else {
445 			emit_insn(ctx, amandw, src, t1, t3);
446 			emit_zext_32(ctx, src, true);
447 		}
448 		break;
449 	case BPF_OR | BPF_FETCH:
450 		if (isdw) {
451 			emit_insn(ctx, amord, src, t1, t3);
452 		} else {
453 			emit_insn(ctx, amorw, src, t1, t3);
454 			emit_zext_32(ctx, src, true);
455 		}
456 		break;
457 	case BPF_XOR | BPF_FETCH:
458 		if (isdw) {
459 			emit_insn(ctx, amxord, src, t1, t3);
460 		} else {
461 			emit_insn(ctx, amxorw, src, t1, t3);
462 			emit_zext_32(ctx, src, true);
463 		}
464 		break;
465 	/* src = atomic_xchg(dst + off, src); */
466 	case BPF_XCHG:
467 		switch (BPF_SIZE(insn->code)) {
468 		case BPF_B:
469 			if (!cpu_has_lam_bh) {
470 				pr_err_once("bpf-jit: amswap.b instruction is not supported\n");
471 				return -EINVAL;
472 			}
473 			emit_insn(ctx, amswapb, src, t1, t3);
474 			emit_zext_32(ctx, src, true);
475 			break;
476 		case BPF_H:
477 			if (!cpu_has_lam_bh) {
478 				pr_err_once("bpf-jit: amswap.h instruction is not supported\n");
479 				return -EINVAL;
480 			}
481 			emit_insn(ctx, amswaph, src, t1, t3);
482 			emit_zext_32(ctx, src, true);
483 			break;
484 		case BPF_W:
485 			emit_insn(ctx, amswapw, src, t1, t3);
486 			emit_zext_32(ctx, src, true);
487 			break;
488 		case BPF_DW:
489 			emit_insn(ctx, amswapd, src, t1, t3);
490 			break;
491 		}
492 		break;
493 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
494 	case BPF_CMPXCHG:
495 		move_reg(ctx, t2, r0);
496 		if (isdw) {
497 			emit_insn(ctx, lld, r0, t1, 0);
498 			emit_insn(ctx, bne, t2, r0, 4);
499 			move_reg(ctx, t3, src);
500 			emit_insn(ctx, scd, t3, t1, 0);
501 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
502 		} else {
503 			emit_insn(ctx, llw, r0, t1, 0);
504 			emit_zext_32(ctx, t2, true);
505 			emit_zext_32(ctx, r0, true);
506 			emit_insn(ctx, bne, t2, r0, 4);
507 			move_reg(ctx, t3, src);
508 			emit_insn(ctx, scw, t3, t1, 0);
509 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
510 			emit_zext_32(ctx, r0, true);
511 		}
512 		break;
513 	default:
514 		pr_err_once("bpf-jit: invalid atomic read-modify-write opcode %02x\n", imm);
515 		return -EINVAL;
516 	}
517 
518 	return 0;
519 }
520 
emit_atomic_ld_st(const struct bpf_insn * insn,struct jit_ctx * ctx)521 static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
522 {
523 	const u8 t1 = LOONGARCH_GPR_T1;
524 	const u8 src = regmap[insn->src_reg];
525 	const u8 dst = regmap[insn->dst_reg];
526 	const s16 off = insn->off;
527 	const s32 imm = insn->imm;
528 
529 	switch (imm) {
530 	/* dst_reg = load_acquire(src_reg + off16) */
531 	case BPF_LOAD_ACQ:
532 		switch (BPF_SIZE(insn->code)) {
533 		case BPF_B:
534 			if (is_signed_imm12(off)) {
535 				emit_insn(ctx, ldbu, dst, src, off);
536 			} else {
537 				move_imm(ctx, t1, off, false);
538 				emit_insn(ctx, ldxbu, dst, src, t1);
539 			}
540 			break;
541 		case BPF_H:
542 			if (is_signed_imm12(off)) {
543 				emit_insn(ctx, ldhu, dst, src, off);
544 			} else {
545 				move_imm(ctx, t1, off, false);
546 				emit_insn(ctx, ldxhu, dst, src, t1);
547 			}
548 			break;
549 		case BPF_W:
550 			if (is_signed_imm12(off)) {
551 				emit_insn(ctx, ldwu, dst, src, off);
552 			} else {
553 				move_imm(ctx, t1, off, false);
554 				emit_insn(ctx, ldxwu, dst, src, t1);
555 			}
556 			break;
557 		case BPF_DW:
558 			if (is_signed_imm12(off)) {
559 				emit_insn(ctx, ldd, dst, src, off);
560 			} else {
561 				move_imm(ctx, t1, off, false);
562 				emit_insn(ctx, ldxd, dst, src, t1);
563 			}
564 			break;
565 		}
566 		emit_insn(ctx, dbar, 0b10100);
567 		break;
568 	/* store_release(dst_reg + off16, src_reg) */
569 	case BPF_STORE_REL:
570 		emit_insn(ctx, dbar, 0b10010);
571 		switch (BPF_SIZE(insn->code)) {
572 		case BPF_B:
573 			if (is_signed_imm12(off)) {
574 				emit_insn(ctx, stb, src, dst, off);
575 			} else {
576 				move_imm(ctx, t1, off, false);
577 				emit_insn(ctx, stxb, src, dst, t1);
578 			}
579 			break;
580 		case BPF_H:
581 			if (is_signed_imm12(off)) {
582 				emit_insn(ctx, sth, src, dst, off);
583 			} else {
584 				move_imm(ctx, t1, off, false);
585 				emit_insn(ctx, stxh, src, dst, t1);
586 			}
587 			break;
588 		case BPF_W:
589 			if (is_signed_imm12(off)) {
590 				emit_insn(ctx, stw, src, dst, off);
591 			} else {
592 				move_imm(ctx, t1, off, false);
593 				emit_insn(ctx, stxw, src, dst, t1);
594 			}
595 			break;
596 		case BPF_DW:
597 			if (is_signed_imm12(off)) {
598 				emit_insn(ctx, std, src, dst, off);
599 			} else {
600 				move_imm(ctx, t1, off, false);
601 				emit_insn(ctx, stxd, src, dst, t1);
602 			}
603 			break;
604 		}
605 		break;
606 	default:
607 		pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
608 		return -EINVAL;
609 	}
610 
611 	return 0;
612 }
613 
is_signed_bpf_cond(u8 cond)614 static bool is_signed_bpf_cond(u8 cond)
615 {
616 	return cond == BPF_JSGT || cond == BPF_JSLT ||
617 	       cond == BPF_JSGE || cond == BPF_JSLE;
618 }
619 
620 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
621 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
622 #define REG_DONT_CLEAR_MARKER	0
623 
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)624 bool ex_handler_bpf(const struct exception_table_entry *ex,
625 		    struct pt_regs *regs)
626 {
627 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
628 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
629 
630 	if (dst_reg != REG_DONT_CLEAR_MARKER)
631 		regs->regs[dst_reg] = 0;
632 	regs->csr_era = (unsigned long)&ex->fixup - offset;
633 
634 	return true;
635 }
636 
637 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)638 static int add_exception_handler(const struct bpf_insn *insn,
639 				 struct jit_ctx *ctx,
640 				 int dst_reg)
641 {
642 	unsigned long pc;
643 	off_t ins_offset, fixup_offset;
644 	struct exception_table_entry *ex;
645 
646 	if (!ctx->image || !ctx->ro_image || !ctx->prog->aux->extable)
647 		return 0;
648 
649 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
650 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
651 	    BPF_MODE(insn->code) != BPF_PROBE_MEM32)
652 		return 0;
653 
654 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
655 		return -EINVAL;
656 
657 	ex = &ctx->prog->aux->extable[ctx->num_exentries];
658 	pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
659 
660 	/*
661 	 * This is the relative offset of the instruction that may fault from
662 	 * the exception table itself. This will be written to the exception
663 	 * table and if this instruction faults, the destination register will
664 	 * be set to '0' and the execution will jump to the next instruction.
665 	 */
666 	ins_offset = pc - (long)&ex->insn;
667 	if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
668 		return -ERANGE;
669 
670 	/*
671 	 * Since the extable follows the program, the fixup offset is always
672 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
673 	 * to keep things simple, and put the destination register in the upper
674 	 * bits. We don't need to worry about buildtime or runtime sort
675 	 * modifying the upper bits because the table is already sorted, and
676 	 * isn't part of the main exception table.
677 	 *
678 	 * The fixup_offset is set to the next instruction from the instruction
679 	 * that may fault. The execution will jump to this after handling the fault.
680 	 */
681 	fixup_offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
682 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
683 		return -ERANGE;
684 
685 	/*
686 	 * The offsets above have been calculated using the RO buffer but we
687 	 * need to use the R/W buffer for writes. Switch ex to rw buffer for writing.
688 	 */
689 	ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
690 	ex->insn = ins_offset;
691 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
692 		    FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
693 	ex->type = EX_TYPE_BPF;
694 
695 	ctx->num_exentries++;
696 
697 	return 0;
698 }
699 
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)700 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
701 {
702 	u8 tm = -1;
703 	u64 func_addr;
704 	bool func_addr_fixed, sign_extend;
705 	int i = insn - ctx->prog->insnsi;
706 	int ret, jmp_offset, tcc_ptr_off;
707 	const u8 code = insn->code;
708 	const u8 cond = BPF_OP(code);
709 	const u8 t1 = LOONGARCH_GPR_T1;
710 	const u8 t2 = LOONGARCH_GPR_T2;
711 	const u8 t3 = LOONGARCH_GPR_T3;
712 	u8 src = regmap[insn->src_reg];
713 	u8 dst = regmap[insn->dst_reg];
714 	const s16 off = insn->off;
715 	const s32 imm = insn->imm;
716 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
717 
718 	switch (code) {
719 	/* dst = src */
720 	case BPF_ALU | BPF_MOV | BPF_X:
721 	case BPF_ALU64 | BPF_MOV | BPF_X:
722 		if (insn_is_cast_user(insn)) {
723 			move_reg(ctx, t1, src);
724 			emit_zext_32(ctx, t1, true);
725 			move_imm(ctx, dst, (ctx->user_vm_start >> 32) << 32, false);
726 			emit_insn(ctx, beq, t1, LOONGARCH_GPR_ZERO, 1);
727 			emit_insn(ctx, or, t1, dst, t1);
728 			move_reg(ctx, dst, t1);
729 			break;
730 		}
731 		switch (off) {
732 		case 0:
733 			move_reg(ctx, dst, src);
734 			emit_zext_32(ctx, dst, is32);
735 			break;
736 		case 8:
737 			emit_insn(ctx, extwb, dst, src);
738 			emit_zext_32(ctx, dst, is32);
739 			break;
740 		case 16:
741 			emit_insn(ctx, extwh, dst, src);
742 			emit_zext_32(ctx, dst, is32);
743 			break;
744 		case 32:
745 			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
746 			break;
747 		}
748 		break;
749 
750 	/* dst = imm */
751 	case BPF_ALU | BPF_MOV | BPF_K:
752 	case BPF_ALU64 | BPF_MOV | BPF_K:
753 		move_imm(ctx, dst, imm, is32);
754 		break;
755 
756 	/* dst = dst + src */
757 	case BPF_ALU | BPF_ADD | BPF_X:
758 	case BPF_ALU64 | BPF_ADD | BPF_X:
759 		emit_insn(ctx, addd, dst, dst, src);
760 		emit_zext_32(ctx, dst, is32);
761 		break;
762 
763 	/* dst = dst + imm */
764 	case BPF_ALU | BPF_ADD | BPF_K:
765 	case BPF_ALU64 | BPF_ADD | BPF_K:
766 		if (is_signed_imm12(imm)) {
767 			emit_insn(ctx, addid, dst, dst, imm);
768 		} else {
769 			move_imm(ctx, t1, imm, is32);
770 			emit_insn(ctx, addd, dst, dst, t1);
771 		}
772 		emit_zext_32(ctx, dst, is32);
773 		break;
774 
775 	/* dst = dst - src */
776 	case BPF_ALU | BPF_SUB | BPF_X:
777 	case BPF_ALU64 | BPF_SUB | BPF_X:
778 		emit_insn(ctx, subd, dst, dst, src);
779 		emit_zext_32(ctx, dst, is32);
780 		break;
781 
782 	/* dst = dst - imm */
783 	case BPF_ALU | BPF_SUB | BPF_K:
784 	case BPF_ALU64 | BPF_SUB | BPF_K:
785 		if (is_signed_imm12(-imm)) {
786 			emit_insn(ctx, addid, dst, dst, -imm);
787 		} else {
788 			move_imm(ctx, t1, imm, is32);
789 			emit_insn(ctx, subd, dst, dst, t1);
790 		}
791 		emit_zext_32(ctx, dst, is32);
792 		break;
793 
794 	/* dst = dst * src */
795 	case BPF_ALU | BPF_MUL | BPF_X:
796 	case BPF_ALU64 | BPF_MUL | BPF_X:
797 		emit_insn(ctx, muld, dst, dst, src);
798 		emit_zext_32(ctx, dst, is32);
799 		break;
800 
801 	/* dst = dst * imm */
802 	case BPF_ALU | BPF_MUL | BPF_K:
803 	case BPF_ALU64 | BPF_MUL | BPF_K:
804 		move_imm(ctx, t1, imm, is32);
805 		emit_insn(ctx, muld, dst, dst, t1);
806 		emit_zext_32(ctx, dst, is32);
807 		break;
808 
809 	/* dst = dst / src */
810 	case BPF_ALU | BPF_DIV | BPF_X:
811 	case BPF_ALU64 | BPF_DIV | BPF_X:
812 		if (!off) {
813 			emit_zext_32(ctx, dst, is32);
814 			move_reg(ctx, t1, src);
815 			emit_zext_32(ctx, t1, is32);
816 			emit_insn(ctx, divdu, dst, dst, t1);
817 			emit_zext_32(ctx, dst, is32);
818 		} else {
819 			emit_sext_32(ctx, dst, is32);
820 			move_reg(ctx, t1, src);
821 			emit_sext_32(ctx, t1, is32);
822 			emit_insn(ctx, divd, dst, dst, t1);
823 			emit_sext_32(ctx, dst, is32);
824 		}
825 		break;
826 
827 	/* dst = dst / imm */
828 	case BPF_ALU | BPF_DIV | BPF_K:
829 	case BPF_ALU64 | BPF_DIV | BPF_K:
830 		if (!off) {
831 			move_imm(ctx, t1, imm, is32);
832 			emit_zext_32(ctx, dst, is32);
833 			emit_insn(ctx, divdu, dst, dst, t1);
834 			emit_zext_32(ctx, dst, is32);
835 		} else {
836 			move_imm(ctx, t1, imm, false);
837 			emit_sext_32(ctx, t1, is32);
838 			emit_sext_32(ctx, dst, is32);
839 			emit_insn(ctx, divd, dst, dst, t1);
840 			emit_sext_32(ctx, dst, is32);
841 		}
842 		break;
843 
844 	/* dst = dst % src */
845 	case BPF_ALU | BPF_MOD | BPF_X:
846 	case BPF_ALU64 | BPF_MOD | BPF_X:
847 		if (!off) {
848 			emit_zext_32(ctx, dst, is32);
849 			move_reg(ctx, t1, src);
850 			emit_zext_32(ctx, t1, is32);
851 			emit_insn(ctx, moddu, dst, dst, t1);
852 			emit_zext_32(ctx, dst, is32);
853 		} else {
854 			emit_sext_32(ctx, dst, is32);
855 			move_reg(ctx, t1, src);
856 			emit_sext_32(ctx, t1, is32);
857 			emit_insn(ctx, modd, dst, dst, t1);
858 			emit_sext_32(ctx, dst, is32);
859 		}
860 		break;
861 
862 	/* dst = dst % imm */
863 	case BPF_ALU | BPF_MOD | BPF_K:
864 	case BPF_ALU64 | BPF_MOD | BPF_K:
865 		if (!off) {
866 			move_imm(ctx, t1, imm, is32);
867 			emit_zext_32(ctx, dst, is32);
868 			emit_insn(ctx, moddu, dst, dst, t1);
869 			emit_zext_32(ctx, dst, is32);
870 		} else {
871 			move_imm(ctx, t1, imm, false);
872 			emit_sext_32(ctx, t1, is32);
873 			emit_sext_32(ctx, dst, is32);
874 			emit_insn(ctx, modd, dst, dst, t1);
875 			emit_sext_32(ctx, dst, is32);
876 		}
877 		break;
878 
879 	/* dst = -dst */
880 	case BPF_ALU | BPF_NEG:
881 	case BPF_ALU64 | BPF_NEG:
882 		move_imm(ctx, t1, imm, is32);
883 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
884 		emit_zext_32(ctx, dst, is32);
885 		break;
886 
887 	/* dst = dst & src */
888 	case BPF_ALU | BPF_AND | BPF_X:
889 	case BPF_ALU64 | BPF_AND | BPF_X:
890 		emit_insn(ctx, and, dst, dst, src);
891 		emit_zext_32(ctx, dst, is32);
892 		break;
893 
894 	/* dst = dst & imm */
895 	case BPF_ALU | BPF_AND | BPF_K:
896 	case BPF_ALU64 | BPF_AND | BPF_K:
897 		if (is_unsigned_imm12(imm)) {
898 			emit_insn(ctx, andi, dst, dst, imm);
899 		} else {
900 			move_imm(ctx, t1, imm, is32);
901 			emit_insn(ctx, and, dst, dst, t1);
902 		}
903 		emit_zext_32(ctx, dst, is32);
904 		break;
905 
906 	/* dst = dst | src */
907 	case BPF_ALU | BPF_OR | BPF_X:
908 	case BPF_ALU64 | BPF_OR | BPF_X:
909 		emit_insn(ctx, or, dst, dst, src);
910 		emit_zext_32(ctx, dst, is32);
911 		break;
912 
913 	/* dst = dst | imm */
914 	case BPF_ALU | BPF_OR | BPF_K:
915 	case BPF_ALU64 | BPF_OR | BPF_K:
916 		if (is_unsigned_imm12(imm)) {
917 			emit_insn(ctx, ori, dst, dst, imm);
918 		} else {
919 			move_imm(ctx, t1, imm, is32);
920 			emit_insn(ctx, or, dst, dst, t1);
921 		}
922 		emit_zext_32(ctx, dst, is32);
923 		break;
924 
925 	/* dst = dst ^ src */
926 	case BPF_ALU | BPF_XOR | BPF_X:
927 	case BPF_ALU64 | BPF_XOR | BPF_X:
928 		emit_insn(ctx, xor, dst, dst, src);
929 		emit_zext_32(ctx, dst, is32);
930 		break;
931 
932 	/* dst = dst ^ imm */
933 	case BPF_ALU | BPF_XOR | BPF_K:
934 	case BPF_ALU64 | BPF_XOR | BPF_K:
935 		if (is_unsigned_imm12(imm)) {
936 			emit_insn(ctx, xori, dst, dst, imm);
937 		} else {
938 			move_imm(ctx, t1, imm, is32);
939 			emit_insn(ctx, xor, dst, dst, t1);
940 		}
941 		emit_zext_32(ctx, dst, is32);
942 		break;
943 
944 	/* dst = dst << src (logical) */
945 	case BPF_ALU | BPF_LSH | BPF_X:
946 		emit_insn(ctx, sllw, dst, dst, src);
947 		emit_zext_32(ctx, dst, is32);
948 		break;
949 
950 	case BPF_ALU64 | BPF_LSH | BPF_X:
951 		emit_insn(ctx, slld, dst, dst, src);
952 		break;
953 
954 	/* dst = dst << imm (logical) */
955 	case BPF_ALU | BPF_LSH | BPF_K:
956 		emit_insn(ctx, slliw, dst, dst, imm);
957 		emit_zext_32(ctx, dst, is32);
958 		break;
959 
960 	case BPF_ALU64 | BPF_LSH | BPF_K:
961 		emit_insn(ctx, sllid, dst, dst, imm);
962 		break;
963 
964 	/* dst = dst >> src (logical) */
965 	case BPF_ALU | BPF_RSH | BPF_X:
966 		emit_insn(ctx, srlw, dst, dst, src);
967 		emit_zext_32(ctx, dst, is32);
968 		break;
969 
970 	case BPF_ALU64 | BPF_RSH | BPF_X:
971 		emit_insn(ctx, srld, dst, dst, src);
972 		break;
973 
974 	/* dst = dst >> imm (logical) */
975 	case BPF_ALU | BPF_RSH | BPF_K:
976 		emit_insn(ctx, srliw, dst, dst, imm);
977 		emit_zext_32(ctx, dst, is32);
978 		break;
979 
980 	case BPF_ALU64 | BPF_RSH | BPF_K:
981 		emit_insn(ctx, srlid, dst, dst, imm);
982 		break;
983 
984 	/* dst = dst >> src (arithmetic) */
985 	case BPF_ALU | BPF_ARSH | BPF_X:
986 		emit_insn(ctx, sraw, dst, dst, src);
987 		emit_zext_32(ctx, dst, is32);
988 		break;
989 
990 	case BPF_ALU64 | BPF_ARSH | BPF_X:
991 		emit_insn(ctx, srad, dst, dst, src);
992 		break;
993 
994 	/* dst = dst >> imm (arithmetic) */
995 	case BPF_ALU | BPF_ARSH | BPF_K:
996 		emit_insn(ctx, sraiw, dst, dst, imm);
997 		emit_zext_32(ctx, dst, is32);
998 		break;
999 
1000 	case BPF_ALU64 | BPF_ARSH | BPF_K:
1001 		emit_insn(ctx, sraid, dst, dst, imm);
1002 		break;
1003 
1004 	/* dst = BSWAP##imm(dst) */
1005 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1006 		switch (imm) {
1007 		case 16:
1008 			/* zero-extend 16 bits into 64 bits */
1009 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
1010 			break;
1011 		case 32:
1012 			/* zero-extend 32 bits into 64 bits */
1013 			emit_zext_32(ctx, dst, is32);
1014 			break;
1015 		case 64:
1016 			/* do nothing */
1017 			break;
1018 		}
1019 		break;
1020 
1021 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1022 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1023 		switch (imm) {
1024 		case 16:
1025 			emit_insn(ctx, revb2h, dst, dst);
1026 			/* zero-extend 16 bits into 64 bits */
1027 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
1028 			break;
1029 		case 32:
1030 			emit_insn(ctx, revb2w, dst, dst);
1031 			/* clear the upper 32 bits */
1032 			emit_zext_32(ctx, dst, true);
1033 			break;
1034 		case 64:
1035 			emit_insn(ctx, revbd, dst, dst);
1036 			break;
1037 		}
1038 		break;
1039 
1040 	/* PC += off if dst cond src */
1041 	case BPF_JMP | BPF_JEQ | BPF_X:
1042 	case BPF_JMP | BPF_JNE | BPF_X:
1043 	case BPF_JMP | BPF_JGT | BPF_X:
1044 	case BPF_JMP | BPF_JGE | BPF_X:
1045 	case BPF_JMP | BPF_JLT | BPF_X:
1046 	case BPF_JMP | BPF_JLE | BPF_X:
1047 	case BPF_JMP | BPF_JSGT | BPF_X:
1048 	case BPF_JMP | BPF_JSGE | BPF_X:
1049 	case BPF_JMP | BPF_JSLT | BPF_X:
1050 	case BPF_JMP | BPF_JSLE | BPF_X:
1051 	case BPF_JMP32 | BPF_JEQ | BPF_X:
1052 	case BPF_JMP32 | BPF_JNE | BPF_X:
1053 	case BPF_JMP32 | BPF_JGT | BPF_X:
1054 	case BPF_JMP32 | BPF_JGE | BPF_X:
1055 	case BPF_JMP32 | BPF_JLT | BPF_X:
1056 	case BPF_JMP32 | BPF_JLE | BPF_X:
1057 	case BPF_JMP32 | BPF_JSGT | BPF_X:
1058 	case BPF_JMP32 | BPF_JSGE | BPF_X:
1059 	case BPF_JMP32 | BPF_JSLT | BPF_X:
1060 	case BPF_JMP32 | BPF_JSLE | BPF_X:
1061 		jmp_offset = bpf2la_offset(i, off, ctx);
1062 		move_reg(ctx, t1, dst);
1063 		move_reg(ctx, t2, src);
1064 		if (is_signed_bpf_cond(BPF_OP(code))) {
1065 			emit_sext_32(ctx, t1, is32);
1066 			emit_sext_32(ctx, t2, is32);
1067 		} else {
1068 			emit_zext_32(ctx, t1, is32);
1069 			emit_zext_32(ctx, t2, is32);
1070 		}
1071 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
1072 			goto toofar;
1073 		break;
1074 
1075 	/* PC += off if dst cond imm */
1076 	case BPF_JMP | BPF_JEQ | BPF_K:
1077 	case BPF_JMP | BPF_JNE | BPF_K:
1078 	case BPF_JMP | BPF_JGT | BPF_K:
1079 	case BPF_JMP | BPF_JGE | BPF_K:
1080 	case BPF_JMP | BPF_JLT | BPF_K:
1081 	case BPF_JMP | BPF_JLE | BPF_K:
1082 	case BPF_JMP | BPF_JSGT | BPF_K:
1083 	case BPF_JMP | BPF_JSGE | BPF_K:
1084 	case BPF_JMP | BPF_JSLT | BPF_K:
1085 	case BPF_JMP | BPF_JSLE | BPF_K:
1086 	case BPF_JMP32 | BPF_JEQ | BPF_K:
1087 	case BPF_JMP32 | BPF_JNE | BPF_K:
1088 	case BPF_JMP32 | BPF_JGT | BPF_K:
1089 	case BPF_JMP32 | BPF_JGE | BPF_K:
1090 	case BPF_JMP32 | BPF_JLT | BPF_K:
1091 	case BPF_JMP32 | BPF_JLE | BPF_K:
1092 	case BPF_JMP32 | BPF_JSGT | BPF_K:
1093 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1094 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1095 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1096 		jmp_offset = bpf2la_offset(i, off, ctx);
1097 		if (imm) {
1098 			move_imm(ctx, t1, imm, false);
1099 			tm = t1;
1100 		} else {
1101 			/* If imm is 0, simply use zero register. */
1102 			tm = LOONGARCH_GPR_ZERO;
1103 		}
1104 		move_reg(ctx, t2, dst);
1105 		if (is_signed_bpf_cond(BPF_OP(code))) {
1106 			emit_sext_32(ctx, tm, is32);
1107 			emit_sext_32(ctx, t2, is32);
1108 		} else {
1109 			emit_zext_32(ctx, tm, is32);
1110 			emit_zext_32(ctx, t2, is32);
1111 		}
1112 		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
1113 			goto toofar;
1114 		break;
1115 
1116 	/* PC += off if dst & src */
1117 	case BPF_JMP | BPF_JSET | BPF_X:
1118 	case BPF_JMP32 | BPF_JSET | BPF_X:
1119 		jmp_offset = bpf2la_offset(i, off, ctx);
1120 		emit_insn(ctx, and, t1, dst, src);
1121 		emit_zext_32(ctx, t1, is32);
1122 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
1123 			goto toofar;
1124 		break;
1125 
1126 	/* PC += off if dst & imm */
1127 	case BPF_JMP | BPF_JSET | BPF_K:
1128 	case BPF_JMP32 | BPF_JSET | BPF_K:
1129 		jmp_offset = bpf2la_offset(i, off, ctx);
1130 		move_imm(ctx, t1, imm, is32);
1131 		emit_insn(ctx, and, t1, dst, t1);
1132 		emit_zext_32(ctx, t1, is32);
1133 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
1134 			goto toofar;
1135 		break;
1136 
1137 	/* PC += off */
1138 	case BPF_JMP | BPF_JA:
1139 	case BPF_JMP32 | BPF_JA:
1140 		if (BPF_CLASS(code) == BPF_JMP)
1141 			jmp_offset = bpf2la_offset(i, off, ctx);
1142 		else
1143 			jmp_offset = bpf2la_offset(i, imm, ctx);
1144 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
1145 			goto toofar;
1146 		break;
1147 
1148 	/* function call */
1149 	case BPF_JMP | BPF_CALL:
1150 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1151 					    &func_addr, &func_addr_fixed);
1152 		if (ret < 0)
1153 			return ret;
1154 
1155 		if (insn->src_reg == BPF_PSEUDO_CALL) {
1156 			tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
1157 			emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
1158 		}
1159 
1160 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1161 			const struct btf_func_model *m;
1162 			int i;
1163 
1164 			m = bpf_jit_find_kfunc_model(ctx->prog, insn);
1165 			if (!m)
1166 				return -EINVAL;
1167 
1168 			for (i = 0; i < m->nr_args; i++) {
1169 				u8 reg = regmap[BPF_REG_1 + i];
1170 				bool sign = m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG;
1171 
1172 				emit_abi_ext(ctx, reg, m->arg_size[i], sign);
1173 			}
1174 		}
1175 
1176 		move_addr(ctx, t1, func_addr);
1177 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
1178 
1179 		if (insn->src_reg != BPF_PSEUDO_CALL)
1180 			move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
1181 
1182 		break;
1183 
1184 	/* tail call */
1185 	case BPF_JMP | BPF_TAIL_CALL:
1186 		if (emit_bpf_tail_call(ctx, i) < 0)
1187 			return -EINVAL;
1188 		break;
1189 
1190 	/* function return */
1191 	case BPF_JMP | BPF_EXIT:
1192 		if (i == ctx->prog->len - 1)
1193 			break;
1194 
1195 		jmp_offset = epilogue_offset(ctx);
1196 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
1197 			goto toofar;
1198 		break;
1199 
1200 	/* dst = imm64 */
1201 	case BPF_LD | BPF_IMM | BPF_DW:
1202 	{
1203 		const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
1204 
1205 		if (bpf_pseudo_func(insn))
1206 			move_addr(ctx, dst, imm64);
1207 		else
1208 			move_imm(ctx, dst, imm64, is32);
1209 		return 1;
1210 	}
1211 
1212 	/* dst = *(size *)(src + off) */
1213 	case BPF_LDX | BPF_MEM | BPF_B:
1214 	case BPF_LDX | BPF_MEM | BPF_H:
1215 	case BPF_LDX | BPF_MEM | BPF_W:
1216 	case BPF_LDX | BPF_MEM | BPF_DW:
1217 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1218 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1219 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1220 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1221 	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
1222 	case BPF_LDX | BPF_MEMSX | BPF_B:
1223 	case BPF_LDX | BPF_MEMSX | BPF_H:
1224 	case BPF_LDX | BPF_MEMSX | BPF_W:
1225 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1226 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1227 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1228 	/* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + REG_ARENA + off) */
1229 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1230 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1231 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1232 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1233 		sign_extend = BPF_MODE(code) == BPF_MEMSX ||
1234 			      BPF_MODE(code) == BPF_PROBE_MEMSX;
1235 
1236 		if (BPF_MODE(code) == BPF_PROBE_MEM32) {
1237 			emit_insn(ctx, addd, t2, src, REG_ARENA);
1238 			src = t2;
1239 		}
1240 
1241 		switch (BPF_SIZE(code)) {
1242 		case BPF_B:
1243 			if (is_signed_imm12(off)) {
1244 				if (sign_extend)
1245 					emit_insn(ctx, ldb, dst, src, off);
1246 				else
1247 					emit_insn(ctx, ldbu, dst, src, off);
1248 			} else {
1249 				move_imm(ctx, t1, off, is32);
1250 				if (sign_extend)
1251 					emit_insn(ctx, ldxb, dst, src, t1);
1252 				else
1253 					emit_insn(ctx, ldxbu, dst, src, t1);
1254 			}
1255 			break;
1256 		case BPF_H:
1257 			if (is_signed_imm12(off)) {
1258 				if (sign_extend)
1259 					emit_insn(ctx, ldh, dst, src, off);
1260 				else
1261 					emit_insn(ctx, ldhu, dst, src, off);
1262 			} else {
1263 				move_imm(ctx, t1, off, is32);
1264 				if (sign_extend)
1265 					emit_insn(ctx, ldxh, dst, src, t1);
1266 				else
1267 					emit_insn(ctx, ldxhu, dst, src, t1);
1268 			}
1269 			break;
1270 		case BPF_W:
1271 			if (is_signed_imm12(off)) {
1272 				if (sign_extend)
1273 					emit_insn(ctx, ldw, dst, src, off);
1274 				else
1275 					emit_insn(ctx, ldwu, dst, src, off);
1276 			} else {
1277 				move_imm(ctx, t1, off, is32);
1278 				if (sign_extend)
1279 					emit_insn(ctx, ldxw, dst, src, t1);
1280 				else
1281 					emit_insn(ctx, ldxwu, dst, src, t1);
1282 			}
1283 			break;
1284 		case BPF_DW:
1285 			move_imm(ctx, t1, off, is32);
1286 			emit_insn(ctx, ldxd, dst, src, t1);
1287 			break;
1288 		}
1289 
1290 		ret = add_exception_handler(insn, ctx, dst);
1291 		if (ret)
1292 			return ret;
1293 		break;
1294 
1295 	/* *(size *)(dst + off) = imm */
1296 	case BPF_ST | BPF_MEM | BPF_B:
1297 	case BPF_ST | BPF_MEM | BPF_H:
1298 	case BPF_ST | BPF_MEM | BPF_W:
1299 	case BPF_ST | BPF_MEM | BPF_DW:
1300 	/* ST | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = imm */
1301 	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1302 	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1303 	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1304 	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1305 		if (BPF_MODE(code) == BPF_PROBE_MEM32) {
1306 			emit_insn(ctx, addd, t3, dst, REG_ARENA);
1307 			dst = t3;
1308 		}
1309 
1310 		switch (BPF_SIZE(code)) {
1311 		case BPF_B:
1312 			move_imm(ctx, t1, imm, is32);
1313 			if (is_signed_imm12(off)) {
1314 				emit_insn(ctx, stb, t1, dst, off);
1315 			} else {
1316 				move_imm(ctx, t2, off, is32);
1317 				emit_insn(ctx, stxb, t1, dst, t2);
1318 			}
1319 			break;
1320 		case BPF_H:
1321 			move_imm(ctx, t1, imm, is32);
1322 			if (is_signed_imm12(off)) {
1323 				emit_insn(ctx, sth, t1, dst, off);
1324 			} else {
1325 				move_imm(ctx, t2, off, is32);
1326 				emit_insn(ctx, stxh, t1, dst, t2);
1327 			}
1328 			break;
1329 		case BPF_W:
1330 			move_imm(ctx, t1, imm, is32);
1331 			if (is_signed_imm12(off)) {
1332 				emit_insn(ctx, stw, t1, dst, off);
1333 			} else if (is_signed_imm14(off)) {
1334 				emit_insn(ctx, stptrw, t1, dst, off);
1335 			} else {
1336 				move_imm(ctx, t2, off, is32);
1337 				emit_insn(ctx, stxw, t1, dst, t2);
1338 			}
1339 			break;
1340 		case BPF_DW:
1341 			move_imm(ctx, t1, imm, is32);
1342 			if (is_signed_imm12(off)) {
1343 				emit_insn(ctx, std, t1, dst, off);
1344 			} else if (is_signed_imm14(off)) {
1345 				emit_insn(ctx, stptrd, t1, dst, off);
1346 			} else {
1347 				move_imm(ctx, t2, off, is32);
1348 				emit_insn(ctx, stxd, t1, dst, t2);
1349 			}
1350 			break;
1351 		}
1352 
1353 		ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER);
1354 		if (ret)
1355 			return ret;
1356 		break;
1357 
1358 	/* *(size *)(dst + off) = src */
1359 	case BPF_STX | BPF_MEM | BPF_B:
1360 	case BPF_STX | BPF_MEM | BPF_H:
1361 	case BPF_STX | BPF_MEM | BPF_W:
1362 	case BPF_STX | BPF_MEM | BPF_DW:
1363 	/* STX | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = src */
1364 	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1365 	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1366 	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1367 	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1368 		if (BPF_MODE(code) == BPF_PROBE_MEM32) {
1369 			emit_insn(ctx, addd, t2, dst, REG_ARENA);
1370 			dst = t2;
1371 		}
1372 
1373 		switch (BPF_SIZE(code)) {
1374 		case BPF_B:
1375 			if (is_signed_imm12(off)) {
1376 				emit_insn(ctx, stb, src, dst, off);
1377 			} else {
1378 				move_imm(ctx, t1, off, is32);
1379 				emit_insn(ctx, stxb, src, dst, t1);
1380 			}
1381 			break;
1382 		case BPF_H:
1383 			if (is_signed_imm12(off)) {
1384 				emit_insn(ctx, sth, src, dst, off);
1385 			} else {
1386 				move_imm(ctx, t1, off, is32);
1387 				emit_insn(ctx, stxh, src, dst, t1);
1388 			}
1389 			break;
1390 		case BPF_W:
1391 			if (is_signed_imm12(off)) {
1392 				emit_insn(ctx, stw, src, dst, off);
1393 			} else if (is_signed_imm14(off)) {
1394 				emit_insn(ctx, stptrw, src, dst, off);
1395 			} else {
1396 				move_imm(ctx, t1, off, is32);
1397 				emit_insn(ctx, stxw, src, dst, t1);
1398 			}
1399 			break;
1400 		case BPF_DW:
1401 			if (is_signed_imm12(off)) {
1402 				emit_insn(ctx, std, src, dst, off);
1403 			} else if (is_signed_imm14(off)) {
1404 				emit_insn(ctx, stptrd, src, dst, off);
1405 			} else {
1406 				move_imm(ctx, t1, off, is32);
1407 				emit_insn(ctx, stxd, src, dst, t1);
1408 			}
1409 			break;
1410 		}
1411 
1412 		ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER);
1413 		if (ret)
1414 			return ret;
1415 		break;
1416 
1417 	/* Atomics */
1418 	case BPF_STX | BPF_ATOMIC | BPF_B:
1419 	case BPF_STX | BPF_ATOMIC | BPF_H:
1420 	case BPF_STX | BPF_ATOMIC | BPF_W:
1421 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1422 		if (!bpf_atomic_is_load_store(insn))
1423 			ret = emit_atomic_rmw(insn, ctx);
1424 		else
1425 			ret = emit_atomic_ld_st(insn, ctx);
1426 		if (ret)
1427 			return ret;
1428 		break;
1429 
1430 	/* Speculation barrier */
1431 	case BPF_ST | BPF_NOSPEC:
1432 		break;
1433 
1434 	default:
1435 		pr_err("bpf_jit: unknown opcode %02x\n", code);
1436 		return -EINVAL;
1437 	}
1438 
1439 	return 0;
1440 
1441 toofar:
1442 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1443 	return -E2BIG;
1444 }
1445 
build_body(struct jit_ctx * ctx,bool extra_pass)1446 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1447 {
1448 	int i;
1449 	const struct bpf_prog *prog = ctx->prog;
1450 
1451 	for (i = 0; i < prog->len; i++) {
1452 		const struct bpf_insn *insn = &prog->insnsi[i];
1453 		int ret;
1454 
1455 		if (ctx->image == NULL)
1456 			ctx->offset[i] = ctx->idx;
1457 
1458 		ret = build_insn(insn, ctx, extra_pass);
1459 		if (ret > 0) {
1460 			i++;
1461 			if (ctx->image == NULL)
1462 				ctx->offset[i] = ctx->idx;
1463 			continue;
1464 		}
1465 		if (ret)
1466 			return ret;
1467 	}
1468 
1469 	if (ctx->image == NULL)
1470 		ctx->offset[i] = ctx->idx;
1471 
1472 	return 0;
1473 }
1474 
1475 /* Fill space with break instructions */
jit_fill_hole(void * area,unsigned int size)1476 static void jit_fill_hole(void *area, unsigned int size)
1477 {
1478 	u32 *ptr;
1479 
1480 	/* We are guaranteed to have aligned memory */
1481 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1482 		*ptr++ = INSN_BREAK;
1483 }
1484 
validate_code(struct jit_ctx * ctx)1485 static int validate_code(struct jit_ctx *ctx)
1486 {
1487 	int i;
1488 	union loongarch_instruction insn;
1489 
1490 	for (i = 0; i < ctx->idx; i++) {
1491 		insn = ctx->image[i];
1492 		/* Check INSN_BREAK */
1493 		if (insn.word == INSN_BREAK)
1494 			return -1;
1495 	}
1496 
1497 	return 0;
1498 }
1499 
validate_ctx(struct jit_ctx * ctx)1500 static int validate_ctx(struct jit_ctx *ctx)
1501 {
1502 	if (validate_code(ctx))
1503 		return -1;
1504 
1505 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1506 		return -1;
1507 
1508 	return 0;
1509 }
1510 
emit_jump_and_link(struct jit_ctx * ctx,u8 rd,u64 target)1511 static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target)
1512 {
1513 	if (!target) {
1514 		pr_err("bpf_jit: jump target address is error\n");
1515 		return -EFAULT;
1516 	}
1517 
1518 	move_imm(ctx, LOONGARCH_GPR_T1, target, false);
1519 	emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0);
1520 
1521 	return 0;
1522 }
1523 
emit_jump_or_nops(void * target,void * ip,u32 * insns,bool is_call)1524 static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
1525 {
1526 	int i;
1527 	struct jit_ctx ctx;
1528 
1529 	ctx.idx = 0;
1530 	ctx.image = (union loongarch_instruction *)insns;
1531 
1532 	if (!target) {
1533 		for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
1534 			emit_insn((&ctx), nop);
1535 		return 0;
1536 	}
1537 
1538 	return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_RA : LOONGARCH_GPR_ZERO, (u64)target);
1539 }
1540 
emit_call(struct jit_ctx * ctx,u64 addr)1541 static int emit_call(struct jit_ctx *ctx, u64 addr)
1542 {
1543 	return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr);
1544 }
1545 
bpf_arch_text_copy(void * dst,void * src,size_t len)1546 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
1547 {
1548 	int ret;
1549 
1550 	cpus_read_lock();
1551 	mutex_lock(&text_mutex);
1552 	ret = larch_insn_text_copy(dst, src, len);
1553 	mutex_unlock(&text_mutex);
1554 	cpus_read_unlock();
1555 
1556 	return ret ? ERR_PTR(-EINVAL) : dst;
1557 }
1558 
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type old_t,enum bpf_text_poke_type new_t,void * old_addr,void * new_addr)1559 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
1560 		       enum bpf_text_poke_type new_t, void *old_addr,
1561 		       void *new_addr)
1562 {
1563 	int ret;
1564 	bool is_call;
1565 	unsigned long size = 0;
1566 	unsigned long offset = 0;
1567 	void *image = NULL;
1568 	char namebuf[KSYM_NAME_LEN];
1569 	u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
1570 	u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
1571 
1572 	/* Only poking bpf text is supported. Since kernel function entry
1573 	 * is set up by ftrace, we rely on ftrace to poke kernel functions.
1574 	 */
1575 	if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
1576 		return -ENOTSUPP;
1577 
1578 	image = ip - offset;
1579 
1580 	/* zero offset means we're poking bpf prog entry */
1581 	if (offset == 0) {
1582 		/* skip to the nop instruction in bpf prog entry:
1583 		 * move t0, ra
1584 		 * nop
1585 		 */
1586 		ip = image + LOONGARCH_INSN_SIZE;
1587 	}
1588 
1589 	is_call = old_t == BPF_MOD_CALL;
1590 	ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
1591 	if (ret)
1592 		return ret;
1593 
1594 	if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
1595 		return -EFAULT;
1596 
1597 	is_call = new_t == BPF_MOD_CALL;
1598 	ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
1599 	if (ret)
1600 		return ret;
1601 
1602 	cpus_read_lock();
1603 	mutex_lock(&text_mutex);
1604 	if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES))
1605 		ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES);
1606 	mutex_unlock(&text_mutex);
1607 	cpus_read_unlock();
1608 
1609 	return ret;
1610 }
1611 
bpf_arch_text_invalidate(void * dst,size_t len)1612 int bpf_arch_text_invalidate(void *dst, size_t len)
1613 {
1614 	int i;
1615 	int ret = 0;
1616 	u32 *inst;
1617 
1618 	inst = kvmalloc(len, GFP_KERNEL);
1619 	if (!inst)
1620 		return -ENOMEM;
1621 
1622 	for (i = 0; i < (len / sizeof(u32)); i++)
1623 		inst[i] = INSN_BREAK;
1624 
1625 	cpus_read_lock();
1626 	mutex_lock(&text_mutex);
1627 	if (larch_insn_text_copy(dst, inst, len))
1628 		ret = -EINVAL;
1629 	mutex_unlock(&text_mutex);
1630 	cpus_read_unlock();
1631 
1632 	kvfree(inst);
1633 
1634 	return ret;
1635 }
1636 
store_args(struct jit_ctx * ctx,int nr_arg_slots,int args_off)1637 static void store_args(struct jit_ctx *ctx, int nr_arg_slots, int args_off)
1638 {
1639 	int i;
1640 
1641 	for (i = 0; i < nr_arg_slots; i++) {
1642 		if (i < LOONGARCH_MAX_REG_ARGS)
1643 			emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
1644 		else {
1645 			/* Skip slots for T0 and FP of traced function */
1646 			emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP,
1647 				  16 + (i - LOONGARCH_MAX_REG_ARGS) * 8);
1648 			emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -args_off);
1649 		}
1650 		args_off -= 8;
1651 	}
1652 }
1653 
restore_args(struct jit_ctx * ctx,int nr_reg_args,int args_off)1654 static void restore_args(struct jit_ctx *ctx, int nr_reg_args, int args_off)
1655 {
1656 	int i;
1657 
1658 	for (i = 0; i < nr_reg_args; i++) {
1659 		emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
1660 		args_off -= 8;
1661 	}
1662 }
1663 
restore_stk_args(struct jit_ctx * ctx,int nr_stk_args,int args_off,int stk_args_off)1664 static void restore_stk_args(struct jit_ctx *ctx, int nr_stk_args, int args_off, int stk_args_off)
1665 {
1666 	int i;
1667 
1668 	for (i = 0; i < nr_stk_args; i++) {
1669 		emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP,
1670 			  -(args_off - LOONGARCH_MAX_REG_ARGS * 8));
1671 		emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -stk_args_off);
1672 		args_off -= 8;
1673 		stk_args_off -= 8;
1674 	}
1675 }
1676 
invoke_bpf_prog(struct jit_ctx * ctx,struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret)1677 static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
1678 			   int args_off, int retval_off, int run_ctx_off, bool save_ret)
1679 {
1680 	int ret;
1681 	u32 *branch;
1682 	struct bpf_prog *p = l->link.prog;
1683 	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1684 
1685 	if (l->cookie)
1686 		emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1,
1687 				      -run_ctx_off + cookie_off, l->cookie);
1688 	else
1689 		emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
1690 
1691 	/* arg1: prog */
1692 	move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
1693 	/* arg2: &run_ctx */
1694 	emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off);
1695 	ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p));
1696 	if (ret)
1697 		return ret;
1698 
1699 	/* store prog start time */
1700 	move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0);
1701 
1702 	/*
1703 	 * if (__bpf_prog_enter(prog) == 0)
1704 	 *      goto skip_exec_of_prog;
1705 	 */
1706 	branch = (u32 *)ctx->image + ctx->idx;
1707 	/* nop reserved for conditional jump */
1708 	emit_insn(ctx, nop);
1709 
1710 	/* arg1: &args_off */
1711 	emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off);
1712 	if (!p->jited)
1713 		move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false);
1714 	ret = emit_call(ctx, (const u64)p->bpf_func);
1715 	if (ret)
1716 		return ret;
1717 
1718 	if (save_ret) {
1719 		emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
1720 		emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
1721 	}
1722 
1723 	/* update branch with beqz */
1724 	if (ctx->image) {
1725 		int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch;
1726 		*branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset);
1727 	}
1728 
1729 	/* arg1: prog */
1730 	move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
1731 	/* arg2: prog start time */
1732 	move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1);
1733 	/* arg3: &run_ctx */
1734 	emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off);
1735 	ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p));
1736 
1737 	return ret;
1738 }
1739 
invoke_bpf(struct jit_ctx * ctx,struct bpf_tramp_links * tl,int args_off,int retval_off,int run_ctx_off,int func_meta_off,bool save_ret,u64 func_meta,int cookie_off)1740 static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
1741 		      int args_off, int retval_off, int run_ctx_off,
1742 		      int func_meta_off, bool save_ret, u64 func_meta, int cookie_off)
1743 {
1744 	int i, cur_cookie = (cookie_off - args_off) / 8;
1745 
1746 	for (i = 0; i < tl->nr_links; i++) {
1747 		int err;
1748 
1749 		if (bpf_prog_calls_session_cookie(tl->links[i])) {
1750 			u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
1751 
1752 			emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, meta);
1753 			cur_cookie--;
1754 		}
1755 		err = invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, save_ret);
1756 		if (err)
1757 			return err;
1758 	}
1759 
1760 	return 0;
1761 }
1762 
arch_alloc_bpf_trampoline(unsigned int size)1763 void *arch_alloc_bpf_trampoline(unsigned int size)
1764 {
1765 	return bpf_prog_pack_alloc(size, jit_fill_hole);
1766 }
1767 
arch_free_bpf_trampoline(void * image,unsigned int size)1768 void arch_free_bpf_trampoline(void *image, unsigned int size)
1769 {
1770 	bpf_prog_pack_free(image, size);
1771 }
1772 
arch_protect_bpf_trampoline(void * image,unsigned int size)1773 int arch_protect_bpf_trampoline(void *image, unsigned int size)
1774 {
1775 	return 0;
1776 }
1777 
1778 /*
1779  * Sign-extend the register if necessary
1780  */
sign_extend(struct jit_ctx * ctx,int rd,int rj,u8 size,bool sign)1781 static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign)
1782 {
1783 	/* ABI requires unsigned char/short to be zero-extended */
1784 	if (!sign && (size == 1 || size == 2)) {
1785 		if (rd != rj)
1786 			move_reg(ctx, rd, rj);
1787 		return;
1788 	}
1789 
1790 	switch (size) {
1791 	case 1:
1792 		emit_insn(ctx, extwb, rd, rj);
1793 		break;
1794 	case 2:
1795 		emit_insn(ctx, extwh, rd, rj);
1796 		break;
1797 	case 4:
1798 		emit_insn(ctx, addiw, rd, rj, 0);
1799 		break;
1800 	case 8:
1801 		if (rd != rj)
1802 			move_reg(ctx, rd, rj);
1803 		break;
1804 	default:
1805 		pr_warn("bpf_jit: invalid size %d for sign_extend\n", size);
1806 	}
1807 }
1808 
__arch_prepare_bpf_trampoline(struct jit_ctx * ctx,struct bpf_tramp_image * im,const struct btf_func_model * m,struct bpf_tramp_links * tlinks,void * func_addr,u32 flags)1809 static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
1810 					 const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
1811 					 void *func_addr, u32 flags)
1812 {
1813 	int i, ret, save_ret;
1814 	int cookie_cnt, cookie_off;
1815 	int stack_size, args_off, stk_args_off, nr_arg_slots = 0;
1816 	int retval_off, func_meta_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off;
1817 	unsigned long long func_meta;
1818 	bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
1819 	void *orig_call = func_addr;
1820 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
1821 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
1822 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
1823 	u32 **branches = NULL;
1824 
1825 	/*
1826 	 * FP + 8       [ RA to parent func ] return address to parent
1827 	 *                    function
1828 	 * FP + 0       [ FP of parent func ] frame pointer of parent
1829 	 *                    function
1830 	 * FP - 8       [ T0 to traced func ] return address of traced
1831 	 *                    function
1832 	 * FP - 16      [ FP of traced func ] frame pointer of traced
1833 	 *                    function
1834 	 *
1835 	 * FP - retval_off   [ return value      ] BPF_TRAMP_F_CALL_ORIG or
1836 	 *                                         BPF_TRAMP_F_RET_FENTRY_RET
1837 	 *                   [ arg regN          ]
1838 	 *                   [ ...               ]
1839 	 * FP - args_off     [ arg reg1          ]
1840 	 *
1841 	 * FP - func_meta_off [ regs count, etc ]
1842 	 *
1843 	 * FP - ip_off       [ traced func       ] BPF_TRAMP_F_IP_ARG
1844 	 *
1845 	 *                   [ stack cookie N    ]
1846 	 *                   [ ...               ]
1847 	 * FP - cookie_off   [ stack cookie 1    ]
1848 	 *
1849 	 * FP - run_ctx_off  [ bpf_tramp_run_ctx ]
1850 	 *
1851 	 * FP - sreg_off     [ callee saved reg  ]
1852 	 *
1853 	 * FP - tcc_ptr_off  [ tail_call_cnt_ptr ]
1854 	 *
1855 	 *                   [ stack_argN        ]
1856 	 *                   [ ...               ]
1857 	 * FP - stk_args_off [ stack_arg1        ] BPF_TRAMP_F_CALL_ORIG
1858 	 */
1859 
1860 	if (m->nr_args > MAX_BPF_FUNC_ARGS)
1861 		return -ENOTSUPP;
1862 
1863 	/* Extra registers for struct arguments */
1864 	for (i = 0; i < m->nr_args; i++) {
1865 		/*
1866 		 * The struct argument size is at most 16 bytes,
1867 		 * enforced by the verifier. The struct argument
1868 		 * may be passed in a pair of registers if its
1869 		 * size is more than 8 bytes and no more than 16
1870 		 * bytes.
1871 		 */
1872 		nr_arg_slots += round_up(m->arg_size[i], 8) / 8;
1873 	}
1874 
1875 	if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
1876 		return -ENOTSUPP;
1877 
1878 	/* Room of trampoline frame to store return address and frame pointer */
1879 	stack_size = 16;
1880 
1881 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
1882 	if (save_ret)
1883 		stack_size += 16; /* Save BPF R0 and A0 */
1884 
1885 	retval_off = stack_size;
1886 
1887 	/* Room of trampoline frame to store args */
1888 	stack_size += nr_arg_slots * 8;
1889 	args_off = stack_size;
1890 
1891 	/* Room of function metadata, such as regs count */
1892 	stack_size += 8;
1893 	func_meta_off = stack_size;
1894 
1895 	/* Room of trampoline frame to store ip address */
1896 	if (flags & BPF_TRAMP_F_IP_ARG) {
1897 		stack_size += 8;
1898 		ip_off = stack_size;
1899 	}
1900 
1901 	cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
1902 
1903 	/* Room for session cookies */
1904 	stack_size += cookie_cnt * 8;
1905 	cookie_off = stack_size;
1906 
1907 	/* Room of trampoline frame to store struct bpf_tramp_run_ctx */
1908 	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
1909 	run_ctx_off = stack_size;
1910 
1911 	stack_size += 8;
1912 	sreg_off = stack_size;
1913 
1914 	/* Room of trampoline frame to store tail_call_cnt_ptr */
1915 	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
1916 		stack_size += 8;
1917 		tcc_ptr_off = stack_size;
1918 	}
1919 
1920 	if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - LOONGARCH_MAX_REG_ARGS > 0))
1921 		stack_size += (nr_arg_slots - LOONGARCH_MAX_REG_ARGS) * 8;
1922 
1923 	stack_size = round_up(stack_size, 16);
1924 
1925 	/* Room for args on stack must be at the top of stack */
1926 	stk_args_off = stack_size;
1927 
1928 	if (is_struct_ops) {
1929 		/*
1930 		 * For the trampoline called directly, just handle
1931 		 * the frame of trampoline.
1932 		 */
1933 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
1934 		emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8);
1935 		emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1936 		emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
1937 	} else {
1938 		/*
1939 		 * For the trampoline called from function entry,
1940 		 * the frame of traced function and the frame of
1941 		 * trampoline need to be considered.
1942 		 */
1943 		/* RA and FP for parent function */
1944 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16);
1945 		emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
1946 		emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
1947 		emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16);
1948 
1949 		/* RA and FP for traced function */
1950 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
1951 		emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
1952 		emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1953 		emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
1954 	}
1955 
1956 	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
1957 		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
1958 
1959 	/* callee saved register S1 to pass start time */
1960 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
1961 
1962 	/* store ip address of the traced function */
1963 	if (flags & BPF_TRAMP_F_IP_ARG)
1964 		emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -ip_off, (u64)func_addr);
1965 
1966 	/* store arg regs count */
1967 	func_meta = nr_arg_slots;
1968 	emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta);
1969 
1970 	store_args(ctx, nr_arg_slots, args_off);
1971 
1972 	if (bpf_fsession_cnt(tlinks)) {
1973 		/* clear all session cookies' value */
1974 		for (i = 0; i < cookie_cnt; i++)
1975 			emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -cookie_off + 8 * i);
1976 
1977 		/* clear return value to make sure fentry always get 0 */
1978 		emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
1979 	}
1980 
1981 	/* To traced function */
1982 	/* Ftrace jump skips 2 NOP instructions */
1983 	if (is_kernel_text((unsigned long)orig_call) ||
1984 	    is_module_text_address((unsigned long)orig_call))
1985 		orig_call += LOONGARCH_FENTRY_NBYTES;
1986 	/* Direct jump skips 5 NOP instructions */
1987 	else if (is_bpf_text_address((unsigned long)orig_call))
1988 		orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
1989 
1990 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1991 		move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
1992 		ret = emit_call(ctx, (const u64)__bpf_tramp_enter);
1993 		if (ret)
1994 			return ret;
1995 	}
1996 
1997 	if (fentry->nr_links) {
1998 		ret = invoke_bpf(ctx, fentry, args_off, retval_off, run_ctx_off, func_meta_off,
1999 				 flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off);
2000 		if (ret)
2001 			return ret;
2002 	}
2003 	if (fmod_ret->nr_links) {
2004 		branches  = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
2005 		if (!branches)
2006 			return -ENOMEM;
2007 
2008 		emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
2009 		for (i = 0; i < fmod_ret->nr_links; i++) {
2010 			ret = invoke_bpf_prog(ctx, fmod_ret->links[i],
2011 					      args_off, retval_off, run_ctx_off, true);
2012 			if (ret)
2013 				goto out;
2014 			emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
2015 			branches[i] = (u32 *)ctx->image + ctx->idx;
2016 			emit_insn(ctx, nop);
2017 		}
2018 	}
2019 
2020 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2021 		restore_args(ctx, min_t(int, nr_arg_slots, LOONGARCH_MAX_REG_ARGS), args_off);
2022 		restore_stk_args(ctx, nr_arg_slots - LOONGARCH_MAX_REG_ARGS, args_off, stk_args_off);
2023 
2024 		if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
2025 			emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
2026 
2027 		ret = emit_call(ctx, (const u64)orig_call);
2028 		if (ret)
2029 			goto out;
2030 		emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
2031 		emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
2032 		im->ip_after_call = ctx->ro_image + ctx->idx;
2033 		/* Reserve space for the move_imm + jirl instruction */
2034 		for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
2035 			emit_insn(ctx, nop);
2036 	}
2037 
2038 	for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
2039 		int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
2040 		*branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
2041 	}
2042 
2043 	/* Set "is_return" flag for fsession */
2044 	func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
2045 	if (bpf_fsession_cnt(tlinks))
2046 		emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta);
2047 
2048 	if (fexit->nr_links) {
2049 		ret = invoke_bpf(ctx, fexit, args_off, retval_off, run_ctx_off,
2050 				 func_meta_off, false, func_meta, cookie_off);
2051 		if (ret)
2052 			goto out;
2053 	}
2054 
2055 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2056 		im->ip_epilogue = ctx->ro_image + ctx->idx;
2057 		move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
2058 		ret = emit_call(ctx, (const u64)__bpf_tramp_exit);
2059 		if (ret)
2060 			goto out;
2061 	}
2062 
2063 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
2064 		restore_args(ctx, min_t(int, nr_arg_slots, LOONGARCH_MAX_REG_ARGS), args_off);
2065 
2066 	if (save_ret) {
2067 		emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
2068 		if (is_struct_ops)
2069 			sign_extend(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0],
2070 				    m->ret_size, m->ret_flags & BTF_FMODEL_SIGNED_ARG);
2071 		else
2072 			emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
2073 	}
2074 
2075 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
2076 
2077 	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
2078 		emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
2079 
2080 	if (is_struct_ops) {
2081 		/* trampoline called directly */
2082 		emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8);
2083 		emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
2084 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
2085 
2086 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
2087 	} else {
2088 		/* trampoline called from function entry */
2089 		emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
2090 		emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
2091 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
2092 
2093 		emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
2094 		emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
2095 		emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16);
2096 
2097 		if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2098 			/* return to parent function */
2099 			move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0);
2100 			emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0);
2101 		} else {
2102 			/* return to traced function */
2103 			move_reg(ctx, LOONGARCH_GPR_T1, LOONGARCH_GPR_RA);
2104 			move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0);
2105 			emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, 0);
2106 		}
2107 	}
2108 
2109 	ret = ctx->idx;
2110 out:
2111 	kfree(branches);
2112 
2113 	return ret;
2114 }
2115 
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * ro_image,void * ro_image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2116 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
2117 				void *ro_image_end, const struct btf_func_model *m,
2118 				u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
2119 {
2120 	int ret, size;
2121 	void *image, *tmp;
2122 	struct jit_ctx ctx;
2123 
2124 	size = ro_image_end - ro_image;
2125 	image = kvmalloc(size, GFP_KERNEL);
2126 	if (!image)
2127 		return -ENOMEM;
2128 
2129 	ctx.image = (union loongarch_instruction *)image;
2130 	ctx.ro_image = (union loongarch_instruction *)ro_image;
2131 	ctx.idx = 0;
2132 
2133 	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2134 	ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
2135 	if (ret < 0)
2136 		goto out;
2137 
2138 	if (validate_code(&ctx) < 0) {
2139 		ret = -EINVAL;
2140 		goto out;
2141 	}
2142 
2143 	tmp = bpf_arch_text_copy(ro_image, image, size);
2144 	if (IS_ERR(tmp)) {
2145 		ret = PTR_ERR(tmp);
2146 		goto out;
2147 	}
2148 
2149 out:
2150 	kvfree(image);
2151 	return ret < 0 ? ret : size;
2152 }
2153 
arch_bpf_trampoline_size(const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2154 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2155 			     struct bpf_tramp_links *tlinks, void *func_addr)
2156 {
2157 	int ret;
2158 	struct jit_ctx ctx;
2159 	struct bpf_tramp_image im;
2160 
2161 	ctx.image = NULL;
2162 	ctx.idx = 0;
2163 
2164 	ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
2165 
2166 	return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
2167 }
2168 
bpf_int_jit_compile(struct bpf_verifier_env * env,struct bpf_prog * prog)2169 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
2170 {
2171 	bool extra_pass = false;
2172 	u8 *image_ptr, *ro_image_ptr;
2173 	int image_size, prog_size, extable_size;
2174 	struct jit_ctx ctx;
2175 	struct jit_data *jit_data;
2176 	struct bpf_binary_header *header;
2177 	struct bpf_binary_header *ro_header;
2178 
2179 	/*
2180 	 * If BPF JIT was not enabled then we must fall back to
2181 	 * the interpreter.
2182 	 */
2183 	if (!prog->jit_requested)
2184 		return prog;
2185 
2186 	jit_data = prog->aux->jit_data;
2187 	if (!jit_data) {
2188 		jit_data = kzalloc_obj(*jit_data);
2189 		if (!jit_data)
2190 			return prog;
2191 		prog->aux->jit_data = jit_data;
2192 	}
2193 	if (jit_data->ctx.offset) {
2194 		ctx = jit_data->ctx;
2195 		ro_header = jit_data->ro_header;
2196 		ro_image_ptr = (void *)ctx.ro_image;
2197 		header = jit_data->header;
2198 		image_ptr = (void *)header + ((void *)ro_image_ptr - (void *)ro_header);
2199 		extra_pass = true;
2200 		prog_size = sizeof(u32) * ctx.idx;
2201 		goto skip_init_ctx;
2202 	}
2203 
2204 	memset(&ctx, 0, sizeof(ctx));
2205 	ctx.prog = prog;
2206 	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
2207 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
2208 
2209 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
2210 	if (ctx.offset == NULL)
2211 		goto out_offset;
2212 
2213 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
2214 	build_prologue(&ctx);
2215 	if (build_body(&ctx, extra_pass))
2216 		goto out_offset;
2217 	ctx.epilogue_offset = ctx.idx;
2218 	build_epilogue(&ctx);
2219 
2220 	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
2221 
2222 	/* Now we know the actual image size.
2223 	 * As each LoongArch instruction is of length 32bit,
2224 	 * we are translating number of JITed intructions into
2225 	 * the size required to store these JITed code.
2226 	 */
2227 	prog_size = sizeof(u32) * ctx.idx;
2228 	image_size = prog_size + extable_size;
2229 	/* Now we know the size of the structure to make */
2230 	ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32),
2231 					      &header, &image_ptr, jit_fill_hole);
2232 	if (!ro_header)
2233 		goto out_offset;
2234 
2235 	/* 2. Now, the actual pass to generate final JIT code */
2236 	/*
2237 	 * Use the image (RW) for writing the JITed instructions. But also save
2238 	 * the ro_image (RX) for calculating the offsets in the image. The RW
2239 	 * image will be later copied to the RX image from where the program will
2240 	 * run. The bpf_jit_binary_pack_finalize() will do this copy in the final
2241 	 * step.
2242 	 */
2243 	ctx.image = (union loongarch_instruction *)image_ptr;
2244 	ctx.ro_image = (union loongarch_instruction *)ro_image_ptr;
2245 	if (extable_size)
2246 		prog->aux->extable = (void *)ro_image_ptr + prog_size;
2247 
2248 skip_init_ctx:
2249 	ctx.idx = 0;
2250 	ctx.num_exentries = 0;
2251 
2252 	build_prologue(&ctx);
2253 	if (build_body(&ctx, extra_pass))
2254 		goto out_free;
2255 	build_epilogue(&ctx);
2256 
2257 	/* 3. Extra pass to validate JITed code */
2258 	if (validate_ctx(&ctx))
2259 		goto out_free;
2260 
2261 	/* And we're done */
2262 	if (bpf_jit_enable > 1)
2263 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
2264 
2265 	if (!prog->is_func || extra_pass) {
2266 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
2267 			pr_err_once("multi-func JIT bug %d != %d\n",
2268 				    ctx.idx, jit_data->ctx.idx);
2269 			goto out_free;
2270 		}
2271 		if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
2272 			/* ro_header and header have been freed */
2273 			ro_header = NULL;
2274 			header = NULL;
2275 			goto out_free;
2276 		}
2277 		/*
2278 		 * The instructions have now been copied to the ROX region from
2279 		 * where they will execute. Now the data cache has to be cleaned
2280 		 * to the PoU and the I-cache has to be invalidated for the VAs.
2281 		 */
2282 		bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
2283 	} else {
2284 		jit_data->ctx = ctx;
2285 		jit_data->header = header;
2286 		jit_data->ro_header = ro_header;
2287 	}
2288 	prog->jited = 1;
2289 	prog->jited_len = prog_size;
2290 	prog->bpf_func = (void *)ctx.ro_image;
2291 
2292 	if (!prog->is_func || extra_pass) {
2293 		int i;
2294 
2295 		/* offset[prog->len] is the size of program */
2296 		for (i = 0; i <= prog->len; i++)
2297 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
2298 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
2299 
2300 out_offset:
2301 		kvfree(ctx.offset);
2302 		kfree(jit_data);
2303 		prog->aux->jit_data = NULL;
2304 	}
2305 
2306 	return prog;
2307 
2308 out_free:
2309 	if (extra_pass) {
2310 		prog->bpf_func = NULL;
2311 		prog->jited = 0;
2312 		prog->jited_len = 0;
2313 	}
2314 
2315 	if (header) {
2316 		bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size));
2317 		bpf_jit_binary_pack_free(ro_header, header);
2318 	}
2319 	goto out_offset;
2320 }
2321 
bpf_jit_free(struct bpf_prog * prog)2322 void bpf_jit_free(struct bpf_prog *prog)
2323 {
2324 	if (prog->jited) {
2325 		struct jit_data *jit_data = prog->aux->jit_data;
2326 		struct bpf_binary_header *hdr;
2327 
2328 		/*
2329 		 * If we fail the final pass of JIT (from jit_subprogs), the
2330 		 * program may not be finalized yet. Call finalize here before
2331 		 * freeing it.
2332 		 */
2333 		if (jit_data) {
2334 			bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
2335 			kfree(jit_data);
2336 		}
2337 		hdr = bpf_jit_binary_pack_hdr(prog);
2338 		bpf_jit_binary_pack_free(hdr, NULL);
2339 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
2340 	}
2341 
2342 	bpf_prog_unlock_free(prog);
2343 }
2344 
bpf_jit_bypass_spec_v1(void)2345 bool bpf_jit_bypass_spec_v1(void)
2346 {
2347 	return true;
2348 }
2349 
bpf_jit_bypass_spec_v4(void)2350 bool bpf_jit_bypass_spec_v4(void)
2351 {
2352 	return true;
2353 }
2354 
bpf_jit_supports_arena(void)2355 bool bpf_jit_supports_arena(void)
2356 {
2357 	return true;
2358 }
2359 
bpf_jit_supports_fsession(void)2360 bool bpf_jit_supports_fsession(void)
2361 {
2362 	return true;
2363 }
2364 
2365 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)2366 bool bpf_jit_supports_subprog_tailcalls(void)
2367 {
2368 	return true;
2369 }
2370