xref: /linux/arch/loongarch/net/bpf_jit.c (revision 0f657938e4345a77be871d906f3e0de3c58a7a49)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include "bpf_jit.h"
8 
9 #define REG_TCC		LOONGARCH_GPR_A6
10 #define TCC_SAVED	LOONGARCH_GPR_S5
11 
12 #define SAVE_RA		BIT(0)
13 #define SAVE_TCC	BIT(1)
14 
15 static const int regmap[] = {
16 	/* return value from in-kernel function, and exit value for eBPF program */
17 	[BPF_REG_0] = LOONGARCH_GPR_A5,
18 	/* arguments from eBPF program to in-kernel function */
19 	[BPF_REG_1] = LOONGARCH_GPR_A0,
20 	[BPF_REG_2] = LOONGARCH_GPR_A1,
21 	[BPF_REG_3] = LOONGARCH_GPR_A2,
22 	[BPF_REG_4] = LOONGARCH_GPR_A3,
23 	[BPF_REG_5] = LOONGARCH_GPR_A4,
24 	/* callee saved registers that in-kernel function will preserve */
25 	[BPF_REG_6] = LOONGARCH_GPR_S0,
26 	[BPF_REG_7] = LOONGARCH_GPR_S1,
27 	[BPF_REG_8] = LOONGARCH_GPR_S2,
28 	[BPF_REG_9] = LOONGARCH_GPR_S3,
29 	/* read-only frame pointer to access stack */
30 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31 	/* temporary register for blinding constants */
32 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33 };
34 
35 static void mark_call(struct jit_ctx *ctx)
36 {
37 	ctx->flags |= SAVE_RA;
38 }
39 
40 static void mark_tail_call(struct jit_ctx *ctx)
41 {
42 	ctx->flags |= SAVE_TCC;
43 }
44 
45 static bool seen_call(struct jit_ctx *ctx)
46 {
47 	return (ctx->flags & SAVE_RA);
48 }
49 
50 static bool seen_tail_call(struct jit_ctx *ctx)
51 {
52 	return (ctx->flags & SAVE_TCC);
53 }
54 
55 static u8 tail_call_reg(struct jit_ctx *ctx)
56 {
57 	if (seen_call(ctx))
58 		return TCC_SAVED;
59 
60 	return REG_TCC;
61 }
62 
63 /*
64  * eBPF prog stack layout:
65  *
66  *                                        high
67  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68  *                            |           $ra           |
69  *                            +-------------------------+
70  *                            |           $fp           |
71  *                            +-------------------------+
72  *                            |           $s0           |
73  *                            +-------------------------+
74  *                            |           $s1           |
75  *                            +-------------------------+
76  *                            |           $s2           |
77  *                            +-------------------------+
78  *                            |           $s3           |
79  *                            +-------------------------+
80  *                            |           $s4           |
81  *                            +-------------------------+
82  *                            |           $s5           |
83  *                            +-------------------------+ <--BPF_REG_FP
84  *                            |  prog->aux->stack_depth |
85  *                            |        (optional)       |
86  * current $sp -------------> +-------------------------+
87  *                                        low
88  */
89 static void build_prologue(struct jit_ctx *ctx)
90 {
91 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92 
93 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94 
95 	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96 	stack_adjust += sizeof(long) * 8;
97 
98 	stack_adjust = round_up(stack_adjust, 16);
99 	stack_adjust += bpf_stack_adjust;
100 
101 	/*
102 	 * First instruction initializes the tail call count (TCC).
103 	 * On tail call we skip this instruction, and the TCC is
104 	 * passed in REG_TCC from the caller.
105 	 */
106 	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107 
108 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109 
110 	store_offset = stack_adjust - sizeof(long);
111 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112 
113 	store_offset -= sizeof(long);
114 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115 
116 	store_offset -= sizeof(long);
117 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118 
119 	store_offset -= sizeof(long);
120 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121 
122 	store_offset -= sizeof(long);
123 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124 
125 	store_offset -= sizeof(long);
126 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127 
128 	store_offset -= sizeof(long);
129 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130 
131 	store_offset -= sizeof(long);
132 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133 
134 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135 
136 	if (bpf_stack_adjust)
137 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138 
139 	/*
140 	 * Program contains calls and tail calls, so REG_TCC need
141 	 * to be saved across calls.
142 	 */
143 	if (seen_tail_call(ctx) && seen_call(ctx))
144 		move_reg(ctx, TCC_SAVED, REG_TCC);
145 
146 	ctx->stack_size = stack_adjust;
147 }
148 
149 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
150 {
151 	int stack_adjust = ctx->stack_size;
152 	int load_offset;
153 
154 	load_offset = stack_adjust - sizeof(long);
155 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
156 
157 	load_offset -= sizeof(long);
158 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
159 
160 	load_offset -= sizeof(long);
161 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
162 
163 	load_offset -= sizeof(long);
164 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
165 
166 	load_offset -= sizeof(long);
167 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
168 
169 	load_offset -= sizeof(long);
170 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
171 
172 	load_offset -= sizeof(long);
173 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
174 
175 	load_offset -= sizeof(long);
176 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
177 
178 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
179 
180 	if (!is_tail_call) {
181 		/* Set return value */
182 		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183 		/* Return to the caller */
184 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
185 	} else {
186 		/*
187 		 * Call the next bpf prog and skip the first instruction
188 		 * of TCC initialization.
189 		 */
190 		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
191 	}
192 }
193 
194 static void build_epilogue(struct jit_ctx *ctx)
195 {
196 	__build_epilogue(ctx, false);
197 }
198 
199 bool bpf_jit_supports_kfunc_call(void)
200 {
201 	return true;
202 }
203 
204 /* initialized on the first pass of build_body() */
205 static int out_offset = -1;
206 static int emit_bpf_tail_call(struct jit_ctx *ctx)
207 {
208 	int off;
209 	u8 tcc = tail_call_reg(ctx);
210 	u8 a1 = LOONGARCH_GPR_A1;
211 	u8 a2 = LOONGARCH_GPR_A2;
212 	u8 t1 = LOONGARCH_GPR_T1;
213 	u8 t2 = LOONGARCH_GPR_T2;
214 	u8 t3 = LOONGARCH_GPR_T3;
215 	const int idx0 = ctx->idx;
216 
217 #define cur_offset (ctx->idx - idx0)
218 #define jmp_offset (out_offset - (cur_offset))
219 
220 	/*
221 	 * a0: &ctx
222 	 * a1: &array
223 	 * a2: index
224 	 *
225 	 * if (index >= array->map.max_entries)
226 	 *	 goto out;
227 	 */
228 	off = offsetof(struct bpf_array, map.max_entries);
229 	emit_insn(ctx, ldwu, t1, a1, off);
230 	/* bgeu $a2, $t1, jmp_offset */
231 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
232 		goto toofar;
233 
234 	/*
235 	 * if (--TCC < 0)
236 	 *	 goto out;
237 	 */
238 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
239 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
240 		goto toofar;
241 
242 	/*
243 	 * prog = array->ptrs[index];
244 	 * if (!prog)
245 	 *	 goto out;
246 	 */
247 	emit_insn(ctx, alsld, t2, a2, a1, 2);
248 	off = offsetof(struct bpf_array, ptrs);
249 	emit_insn(ctx, ldd, t2, t2, off);
250 	/* beq $t2, $zero, jmp_offset */
251 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
252 		goto toofar;
253 
254 	/* goto *(prog->bpf_func + 4); */
255 	off = offsetof(struct bpf_prog, bpf_func);
256 	emit_insn(ctx, ldd, t3, t2, off);
257 	__build_epilogue(ctx, true);
258 
259 	/* out: */
260 	if (out_offset == -1)
261 		out_offset = cur_offset;
262 	if (cur_offset != out_offset) {
263 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
264 			    cur_offset, out_offset);
265 		return -1;
266 	}
267 
268 	return 0;
269 
270 toofar:
271 	pr_info_once("tail_call: jump too far\n");
272 	return -1;
273 #undef cur_offset
274 #undef jmp_offset
275 }
276 
277 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
278 {
279 	const u8 t1 = LOONGARCH_GPR_T1;
280 	const u8 t2 = LOONGARCH_GPR_T2;
281 	const u8 t3 = LOONGARCH_GPR_T3;
282 	const u8 r0 = regmap[BPF_REG_0];
283 	const u8 src = regmap[insn->src_reg];
284 	const u8 dst = regmap[insn->dst_reg];
285 	const s16 off = insn->off;
286 	const s32 imm = insn->imm;
287 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
288 
289 	move_imm(ctx, t1, off, false);
290 	emit_insn(ctx, addd, t1, dst, t1);
291 	move_reg(ctx, t3, src);
292 
293 	switch (imm) {
294 	/* lock *(size *)(dst + off) <op>= src */
295 	case BPF_ADD:
296 		if (isdw)
297 			emit_insn(ctx, amaddd, t2, t1, src);
298 		else
299 			emit_insn(ctx, amaddw, t2, t1, src);
300 		break;
301 	case BPF_AND:
302 		if (isdw)
303 			emit_insn(ctx, amandd, t2, t1, src);
304 		else
305 			emit_insn(ctx, amandw, t2, t1, src);
306 		break;
307 	case BPF_OR:
308 		if (isdw)
309 			emit_insn(ctx, amord, t2, t1, src);
310 		else
311 			emit_insn(ctx, amorw, t2, t1, src);
312 		break;
313 	case BPF_XOR:
314 		if (isdw)
315 			emit_insn(ctx, amxord, t2, t1, src);
316 		else
317 			emit_insn(ctx, amxorw, t2, t1, src);
318 		break;
319 	/* src = atomic_fetch_<op>(dst + off, src) */
320 	case BPF_ADD | BPF_FETCH:
321 		if (isdw) {
322 			emit_insn(ctx, amaddd, src, t1, t3);
323 		} else {
324 			emit_insn(ctx, amaddw, src, t1, t3);
325 			emit_zext_32(ctx, src, true);
326 		}
327 		break;
328 	case BPF_AND | BPF_FETCH:
329 		if (isdw) {
330 			emit_insn(ctx, amandd, src, t1, t3);
331 		} else {
332 			emit_insn(ctx, amandw, src, t1, t3);
333 			emit_zext_32(ctx, src, true);
334 		}
335 		break;
336 	case BPF_OR | BPF_FETCH:
337 		if (isdw) {
338 			emit_insn(ctx, amord, src, t1, t3);
339 		} else {
340 			emit_insn(ctx, amorw, src, t1, t3);
341 			emit_zext_32(ctx, src, true);
342 		}
343 		break;
344 	case BPF_XOR | BPF_FETCH:
345 		if (isdw) {
346 			emit_insn(ctx, amxord, src, t1, t3);
347 		} else {
348 			emit_insn(ctx, amxorw, src, t1, t3);
349 			emit_zext_32(ctx, src, true);
350 		}
351 		break;
352 	/* src = atomic_xchg(dst + off, src); */
353 	case BPF_XCHG:
354 		if (isdw) {
355 			emit_insn(ctx, amswapd, src, t1, t3);
356 		} else {
357 			emit_insn(ctx, amswapw, src, t1, t3);
358 			emit_zext_32(ctx, src, true);
359 		}
360 		break;
361 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
362 	case BPF_CMPXCHG:
363 		move_reg(ctx, t2, r0);
364 		if (isdw) {
365 			emit_insn(ctx, lld, r0, t1, 0);
366 			emit_insn(ctx, bne, t2, r0, 4);
367 			move_reg(ctx, t3, src);
368 			emit_insn(ctx, scd, t3, t1, 0);
369 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
370 		} else {
371 			emit_insn(ctx, llw, r0, t1, 0);
372 			emit_zext_32(ctx, t2, true);
373 			emit_zext_32(ctx, r0, true);
374 			emit_insn(ctx, bne, t2, r0, 4);
375 			move_reg(ctx, t3, src);
376 			emit_insn(ctx, scw, t3, t1, 0);
377 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
378 			emit_zext_32(ctx, r0, true);
379 		}
380 		break;
381 	}
382 }
383 
384 static bool is_signed_bpf_cond(u8 cond)
385 {
386 	return cond == BPF_JSGT || cond == BPF_JSLT ||
387 	       cond == BPF_JSGE || cond == BPF_JSLE;
388 }
389 
390 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
391 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
392 
393 bool ex_handler_bpf(const struct exception_table_entry *ex,
394 		    struct pt_regs *regs)
395 {
396 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
397 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
398 
399 	regs->regs[dst_reg] = 0;
400 	regs->csr_era = (unsigned long)&ex->fixup - offset;
401 
402 	return true;
403 }
404 
405 /* For accesses to BTF pointers, add an entry to the exception table */
406 static int add_exception_handler(const struct bpf_insn *insn,
407 				 struct jit_ctx *ctx,
408 				 int dst_reg)
409 {
410 	unsigned long pc;
411 	off_t offset;
412 	struct exception_table_entry *ex;
413 
414 	if (!ctx->image || !ctx->prog->aux->extable)
415 		return 0;
416 
417 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
418 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
419 		return 0;
420 
421 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
422 		return -EINVAL;
423 
424 	ex = &ctx->prog->aux->extable[ctx->num_exentries];
425 	pc = (unsigned long)&ctx->image[ctx->idx - 1];
426 
427 	offset = pc - (long)&ex->insn;
428 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
429 		return -ERANGE;
430 
431 	ex->insn = offset;
432 
433 	/*
434 	 * Since the extable follows the program, the fixup offset is always
435 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
436 	 * to keep things simple, and put the destination register in the upper
437 	 * bits. We don't need to worry about buildtime or runtime sort
438 	 * modifying the upper bits because the table is already sorted, and
439 	 * isn't part of the main exception table.
440 	 */
441 	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
442 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
443 		return -ERANGE;
444 
445 	ex->type = EX_TYPE_BPF;
446 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
447 
448 	ctx->num_exentries++;
449 
450 	return 0;
451 }
452 
453 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
454 {
455 	u8 tm = -1;
456 	u64 func_addr;
457 	bool func_addr_fixed, sign_extend;
458 	int i = insn - ctx->prog->insnsi;
459 	int ret, jmp_offset;
460 	const u8 code = insn->code;
461 	const u8 cond = BPF_OP(code);
462 	const u8 t1 = LOONGARCH_GPR_T1;
463 	const u8 t2 = LOONGARCH_GPR_T2;
464 	const u8 src = regmap[insn->src_reg];
465 	const u8 dst = regmap[insn->dst_reg];
466 	const s16 off = insn->off;
467 	const s32 imm = insn->imm;
468 	const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
469 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
470 
471 	switch (code) {
472 	/* dst = src */
473 	case BPF_ALU | BPF_MOV | BPF_X:
474 	case BPF_ALU64 | BPF_MOV | BPF_X:
475 		switch (off) {
476 		case 0:
477 			move_reg(ctx, dst, src);
478 			emit_zext_32(ctx, dst, is32);
479 			break;
480 		case 8:
481 			move_reg(ctx, t1, src);
482 			emit_insn(ctx, extwb, dst, t1);
483 			emit_zext_32(ctx, dst, is32);
484 			break;
485 		case 16:
486 			move_reg(ctx, t1, src);
487 			emit_insn(ctx, extwh, dst, t1);
488 			emit_zext_32(ctx, dst, is32);
489 			break;
490 		case 32:
491 			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
492 			break;
493 		}
494 		break;
495 
496 	/* dst = imm */
497 	case BPF_ALU | BPF_MOV | BPF_K:
498 	case BPF_ALU64 | BPF_MOV | BPF_K:
499 		move_imm(ctx, dst, imm, is32);
500 		break;
501 
502 	/* dst = dst + src */
503 	case BPF_ALU | BPF_ADD | BPF_X:
504 	case BPF_ALU64 | BPF_ADD | BPF_X:
505 		emit_insn(ctx, addd, dst, dst, src);
506 		emit_zext_32(ctx, dst, is32);
507 		break;
508 
509 	/* dst = dst + imm */
510 	case BPF_ALU | BPF_ADD | BPF_K:
511 	case BPF_ALU64 | BPF_ADD | BPF_K:
512 		if (is_signed_imm12(imm)) {
513 			emit_insn(ctx, addid, dst, dst, imm);
514 		} else {
515 			move_imm(ctx, t1, imm, is32);
516 			emit_insn(ctx, addd, dst, dst, t1);
517 		}
518 		emit_zext_32(ctx, dst, is32);
519 		break;
520 
521 	/* dst = dst - src */
522 	case BPF_ALU | BPF_SUB | BPF_X:
523 	case BPF_ALU64 | BPF_SUB | BPF_X:
524 		emit_insn(ctx, subd, dst, dst, src);
525 		emit_zext_32(ctx, dst, is32);
526 		break;
527 
528 	/* dst = dst - imm */
529 	case BPF_ALU | BPF_SUB | BPF_K:
530 	case BPF_ALU64 | BPF_SUB | BPF_K:
531 		if (is_signed_imm12(-imm)) {
532 			emit_insn(ctx, addid, dst, dst, -imm);
533 		} else {
534 			move_imm(ctx, t1, imm, is32);
535 			emit_insn(ctx, subd, dst, dst, t1);
536 		}
537 		emit_zext_32(ctx, dst, is32);
538 		break;
539 
540 	/* dst = dst * src */
541 	case BPF_ALU | BPF_MUL | BPF_X:
542 	case BPF_ALU64 | BPF_MUL | BPF_X:
543 		emit_insn(ctx, muld, dst, dst, src);
544 		emit_zext_32(ctx, dst, is32);
545 		break;
546 
547 	/* dst = dst * imm */
548 	case BPF_ALU | BPF_MUL | BPF_K:
549 	case BPF_ALU64 | BPF_MUL | BPF_K:
550 		move_imm(ctx, t1, imm, is32);
551 		emit_insn(ctx, muld, dst, dst, t1);
552 		emit_zext_32(ctx, dst, is32);
553 		break;
554 
555 	/* dst = dst / src */
556 	case BPF_ALU | BPF_DIV | BPF_X:
557 	case BPF_ALU64 | BPF_DIV | BPF_X:
558 		if (!off) {
559 			emit_zext_32(ctx, dst, is32);
560 			move_reg(ctx, t1, src);
561 			emit_zext_32(ctx, t1, is32);
562 			emit_insn(ctx, divdu, dst, dst, t1);
563 			emit_zext_32(ctx, dst, is32);
564 		} else {
565 			emit_sext_32(ctx, dst, is32);
566 			move_reg(ctx, t1, src);
567 			emit_sext_32(ctx, t1, is32);
568 			emit_insn(ctx, divd, dst, dst, t1);
569 			emit_sext_32(ctx, dst, is32);
570 		}
571 		break;
572 
573 	/* dst = dst / imm */
574 	case BPF_ALU | BPF_DIV | BPF_K:
575 	case BPF_ALU64 | BPF_DIV | BPF_K:
576 		if (!off) {
577 			move_imm(ctx, t1, imm, is32);
578 			emit_zext_32(ctx, dst, is32);
579 			emit_insn(ctx, divdu, dst, dst, t1);
580 			emit_zext_32(ctx, dst, is32);
581 		} else {
582 			move_imm(ctx, t1, imm, false);
583 			emit_sext_32(ctx, t1, is32);
584 			emit_sext_32(ctx, dst, is32);
585 			emit_insn(ctx, divd, dst, dst, t1);
586 			emit_sext_32(ctx, dst, is32);
587 		}
588 		break;
589 
590 	/* dst = dst % src */
591 	case BPF_ALU | BPF_MOD | BPF_X:
592 	case BPF_ALU64 | BPF_MOD | BPF_X:
593 		if (!off) {
594 			emit_zext_32(ctx, dst, is32);
595 			move_reg(ctx, t1, src);
596 			emit_zext_32(ctx, t1, is32);
597 			emit_insn(ctx, moddu, dst, dst, t1);
598 			emit_zext_32(ctx, dst, is32);
599 		} else {
600 			emit_sext_32(ctx, dst, is32);
601 			move_reg(ctx, t1, src);
602 			emit_sext_32(ctx, t1, is32);
603 			emit_insn(ctx, modd, dst, dst, t1);
604 			emit_sext_32(ctx, dst, is32);
605 		}
606 		break;
607 
608 	/* dst = dst % imm */
609 	case BPF_ALU | BPF_MOD | BPF_K:
610 	case BPF_ALU64 | BPF_MOD | BPF_K:
611 		if (!off) {
612 			move_imm(ctx, t1, imm, is32);
613 			emit_zext_32(ctx, dst, is32);
614 			emit_insn(ctx, moddu, dst, dst, t1);
615 			emit_zext_32(ctx, dst, is32);
616 		} else {
617 			move_imm(ctx, t1, imm, false);
618 			emit_sext_32(ctx, t1, is32);
619 			emit_sext_32(ctx, dst, is32);
620 			emit_insn(ctx, modd, dst, dst, t1);
621 			emit_sext_32(ctx, dst, is32);
622 		}
623 		break;
624 
625 	/* dst = -dst */
626 	case BPF_ALU | BPF_NEG:
627 	case BPF_ALU64 | BPF_NEG:
628 		move_imm(ctx, t1, imm, is32);
629 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
630 		emit_zext_32(ctx, dst, is32);
631 		break;
632 
633 	/* dst = dst & src */
634 	case BPF_ALU | BPF_AND | BPF_X:
635 	case BPF_ALU64 | BPF_AND | BPF_X:
636 		emit_insn(ctx, and, dst, dst, src);
637 		emit_zext_32(ctx, dst, is32);
638 		break;
639 
640 	/* dst = dst & imm */
641 	case BPF_ALU | BPF_AND | BPF_K:
642 	case BPF_ALU64 | BPF_AND | BPF_K:
643 		if (is_unsigned_imm12(imm)) {
644 			emit_insn(ctx, andi, dst, dst, imm);
645 		} else {
646 			move_imm(ctx, t1, imm, is32);
647 			emit_insn(ctx, and, dst, dst, t1);
648 		}
649 		emit_zext_32(ctx, dst, is32);
650 		break;
651 
652 	/* dst = dst | src */
653 	case BPF_ALU | BPF_OR | BPF_X:
654 	case BPF_ALU64 | BPF_OR | BPF_X:
655 		emit_insn(ctx, or, dst, dst, src);
656 		emit_zext_32(ctx, dst, is32);
657 		break;
658 
659 	/* dst = dst | imm */
660 	case BPF_ALU | BPF_OR | BPF_K:
661 	case BPF_ALU64 | BPF_OR | BPF_K:
662 		if (is_unsigned_imm12(imm)) {
663 			emit_insn(ctx, ori, dst, dst, imm);
664 		} else {
665 			move_imm(ctx, t1, imm, is32);
666 			emit_insn(ctx, or, dst, dst, t1);
667 		}
668 		emit_zext_32(ctx, dst, is32);
669 		break;
670 
671 	/* dst = dst ^ src */
672 	case BPF_ALU | BPF_XOR | BPF_X:
673 	case BPF_ALU64 | BPF_XOR | BPF_X:
674 		emit_insn(ctx, xor, dst, dst, src);
675 		emit_zext_32(ctx, dst, is32);
676 		break;
677 
678 	/* dst = dst ^ imm */
679 	case BPF_ALU | BPF_XOR | BPF_K:
680 	case BPF_ALU64 | BPF_XOR | BPF_K:
681 		if (is_unsigned_imm12(imm)) {
682 			emit_insn(ctx, xori, dst, dst, imm);
683 		} else {
684 			move_imm(ctx, t1, imm, is32);
685 			emit_insn(ctx, xor, dst, dst, t1);
686 		}
687 		emit_zext_32(ctx, dst, is32);
688 		break;
689 
690 	/* dst = dst << src (logical) */
691 	case BPF_ALU | BPF_LSH | BPF_X:
692 		emit_insn(ctx, sllw, dst, dst, src);
693 		emit_zext_32(ctx, dst, is32);
694 		break;
695 
696 	case BPF_ALU64 | BPF_LSH | BPF_X:
697 		emit_insn(ctx, slld, dst, dst, src);
698 		break;
699 
700 	/* dst = dst << imm (logical) */
701 	case BPF_ALU | BPF_LSH | BPF_K:
702 		emit_insn(ctx, slliw, dst, dst, imm);
703 		emit_zext_32(ctx, dst, is32);
704 		break;
705 
706 	case BPF_ALU64 | BPF_LSH | BPF_K:
707 		emit_insn(ctx, sllid, dst, dst, imm);
708 		break;
709 
710 	/* dst = dst >> src (logical) */
711 	case BPF_ALU | BPF_RSH | BPF_X:
712 		emit_insn(ctx, srlw, dst, dst, src);
713 		emit_zext_32(ctx, dst, is32);
714 		break;
715 
716 	case BPF_ALU64 | BPF_RSH | BPF_X:
717 		emit_insn(ctx, srld, dst, dst, src);
718 		break;
719 
720 	/* dst = dst >> imm (logical) */
721 	case BPF_ALU | BPF_RSH | BPF_K:
722 		emit_insn(ctx, srliw, dst, dst, imm);
723 		emit_zext_32(ctx, dst, is32);
724 		break;
725 
726 	case BPF_ALU64 | BPF_RSH | BPF_K:
727 		emit_insn(ctx, srlid, dst, dst, imm);
728 		break;
729 
730 	/* dst = dst >> src (arithmetic) */
731 	case BPF_ALU | BPF_ARSH | BPF_X:
732 		emit_insn(ctx, sraw, dst, dst, src);
733 		emit_zext_32(ctx, dst, is32);
734 		break;
735 
736 	case BPF_ALU64 | BPF_ARSH | BPF_X:
737 		emit_insn(ctx, srad, dst, dst, src);
738 		break;
739 
740 	/* dst = dst >> imm (arithmetic) */
741 	case BPF_ALU | BPF_ARSH | BPF_K:
742 		emit_insn(ctx, sraiw, dst, dst, imm);
743 		emit_zext_32(ctx, dst, is32);
744 		break;
745 
746 	case BPF_ALU64 | BPF_ARSH | BPF_K:
747 		emit_insn(ctx, sraid, dst, dst, imm);
748 		break;
749 
750 	/* dst = BSWAP##imm(dst) */
751 	case BPF_ALU | BPF_END | BPF_FROM_LE:
752 		switch (imm) {
753 		case 16:
754 			/* zero-extend 16 bits into 64 bits */
755 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
756 			break;
757 		case 32:
758 			/* zero-extend 32 bits into 64 bits */
759 			emit_zext_32(ctx, dst, is32);
760 			break;
761 		case 64:
762 			/* do nothing */
763 			break;
764 		}
765 		break;
766 
767 	case BPF_ALU | BPF_END | BPF_FROM_BE:
768 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
769 		switch (imm) {
770 		case 16:
771 			emit_insn(ctx, revb2h, dst, dst);
772 			/* zero-extend 16 bits into 64 bits */
773 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
774 			break;
775 		case 32:
776 			emit_insn(ctx, revb2w, dst, dst);
777 			/* clear the upper 32 bits */
778 			emit_zext_32(ctx, dst, true);
779 			break;
780 		case 64:
781 			emit_insn(ctx, revbd, dst, dst);
782 			break;
783 		}
784 		break;
785 
786 	/* PC += off if dst cond src */
787 	case BPF_JMP | BPF_JEQ | BPF_X:
788 	case BPF_JMP | BPF_JNE | BPF_X:
789 	case BPF_JMP | BPF_JGT | BPF_X:
790 	case BPF_JMP | BPF_JGE | BPF_X:
791 	case BPF_JMP | BPF_JLT | BPF_X:
792 	case BPF_JMP | BPF_JLE | BPF_X:
793 	case BPF_JMP | BPF_JSGT | BPF_X:
794 	case BPF_JMP | BPF_JSGE | BPF_X:
795 	case BPF_JMP | BPF_JSLT | BPF_X:
796 	case BPF_JMP | BPF_JSLE | BPF_X:
797 	case BPF_JMP32 | BPF_JEQ | BPF_X:
798 	case BPF_JMP32 | BPF_JNE | BPF_X:
799 	case BPF_JMP32 | BPF_JGT | BPF_X:
800 	case BPF_JMP32 | BPF_JGE | BPF_X:
801 	case BPF_JMP32 | BPF_JLT | BPF_X:
802 	case BPF_JMP32 | BPF_JLE | BPF_X:
803 	case BPF_JMP32 | BPF_JSGT | BPF_X:
804 	case BPF_JMP32 | BPF_JSGE | BPF_X:
805 	case BPF_JMP32 | BPF_JSLT | BPF_X:
806 	case BPF_JMP32 | BPF_JSLE | BPF_X:
807 		jmp_offset = bpf2la_offset(i, off, ctx);
808 		move_reg(ctx, t1, dst);
809 		move_reg(ctx, t2, src);
810 		if (is_signed_bpf_cond(BPF_OP(code))) {
811 			emit_sext_32(ctx, t1, is32);
812 			emit_sext_32(ctx, t2, is32);
813 		} else {
814 			emit_zext_32(ctx, t1, is32);
815 			emit_zext_32(ctx, t2, is32);
816 		}
817 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
818 			goto toofar;
819 		break;
820 
821 	/* PC += off if dst cond imm */
822 	case BPF_JMP | BPF_JEQ | BPF_K:
823 	case BPF_JMP | BPF_JNE | BPF_K:
824 	case BPF_JMP | BPF_JGT | BPF_K:
825 	case BPF_JMP | BPF_JGE | BPF_K:
826 	case BPF_JMP | BPF_JLT | BPF_K:
827 	case BPF_JMP | BPF_JLE | BPF_K:
828 	case BPF_JMP | BPF_JSGT | BPF_K:
829 	case BPF_JMP | BPF_JSGE | BPF_K:
830 	case BPF_JMP | BPF_JSLT | BPF_K:
831 	case BPF_JMP | BPF_JSLE | BPF_K:
832 	case BPF_JMP32 | BPF_JEQ | BPF_K:
833 	case BPF_JMP32 | BPF_JNE | BPF_K:
834 	case BPF_JMP32 | BPF_JGT | BPF_K:
835 	case BPF_JMP32 | BPF_JGE | BPF_K:
836 	case BPF_JMP32 | BPF_JLT | BPF_K:
837 	case BPF_JMP32 | BPF_JLE | BPF_K:
838 	case BPF_JMP32 | BPF_JSGT | BPF_K:
839 	case BPF_JMP32 | BPF_JSGE | BPF_K:
840 	case BPF_JMP32 | BPF_JSLT | BPF_K:
841 	case BPF_JMP32 | BPF_JSLE | BPF_K:
842 		jmp_offset = bpf2la_offset(i, off, ctx);
843 		if (imm) {
844 			move_imm(ctx, t1, imm, false);
845 			tm = t1;
846 		} else {
847 			/* If imm is 0, simply use zero register. */
848 			tm = LOONGARCH_GPR_ZERO;
849 		}
850 		move_reg(ctx, t2, dst);
851 		if (is_signed_bpf_cond(BPF_OP(code))) {
852 			emit_sext_32(ctx, tm, is32);
853 			emit_sext_32(ctx, t2, is32);
854 		} else {
855 			emit_zext_32(ctx, tm, is32);
856 			emit_zext_32(ctx, t2, is32);
857 		}
858 		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
859 			goto toofar;
860 		break;
861 
862 	/* PC += off if dst & src */
863 	case BPF_JMP | BPF_JSET | BPF_X:
864 	case BPF_JMP32 | BPF_JSET | BPF_X:
865 		jmp_offset = bpf2la_offset(i, off, ctx);
866 		emit_insn(ctx, and, t1, dst, src);
867 		emit_zext_32(ctx, t1, is32);
868 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
869 			goto toofar;
870 		break;
871 
872 	/* PC += off if dst & imm */
873 	case BPF_JMP | BPF_JSET | BPF_K:
874 	case BPF_JMP32 | BPF_JSET | BPF_K:
875 		jmp_offset = bpf2la_offset(i, off, ctx);
876 		move_imm(ctx, t1, imm, is32);
877 		emit_insn(ctx, and, t1, dst, t1);
878 		emit_zext_32(ctx, t1, is32);
879 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
880 			goto toofar;
881 		break;
882 
883 	/* PC += off */
884 	case BPF_JMP | BPF_JA:
885 	case BPF_JMP32 | BPF_JA:
886 		if (BPF_CLASS(code) == BPF_JMP)
887 			jmp_offset = bpf2la_offset(i, off, ctx);
888 		else
889 			jmp_offset = bpf2la_offset(i, imm, ctx);
890 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
891 			goto toofar;
892 		break;
893 
894 	/* function call */
895 	case BPF_JMP | BPF_CALL:
896 		mark_call(ctx);
897 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
898 					    &func_addr, &func_addr_fixed);
899 		if (ret < 0)
900 			return ret;
901 
902 		move_addr(ctx, t1, func_addr);
903 		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
904 		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
905 		break;
906 
907 	/* tail call */
908 	case BPF_JMP | BPF_TAIL_CALL:
909 		mark_tail_call(ctx);
910 		if (emit_bpf_tail_call(ctx) < 0)
911 			return -EINVAL;
912 		break;
913 
914 	/* function return */
915 	case BPF_JMP | BPF_EXIT:
916 		if (i == ctx->prog->len - 1)
917 			break;
918 
919 		jmp_offset = epilogue_offset(ctx);
920 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
921 			goto toofar;
922 		break;
923 
924 	/* dst = imm64 */
925 	case BPF_LD | BPF_IMM | BPF_DW:
926 		move_imm(ctx, dst, imm64, is32);
927 		return 1;
928 
929 	/* dst = *(size *)(src + off) */
930 	case BPF_LDX | BPF_MEM | BPF_B:
931 	case BPF_LDX | BPF_MEM | BPF_H:
932 	case BPF_LDX | BPF_MEM | BPF_W:
933 	case BPF_LDX | BPF_MEM | BPF_DW:
934 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
935 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
936 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
937 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
938 	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
939 	case BPF_LDX | BPF_MEMSX | BPF_B:
940 	case BPF_LDX | BPF_MEMSX | BPF_H:
941 	case BPF_LDX | BPF_MEMSX | BPF_W:
942 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
943 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
944 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
945 		sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
946 			      BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
947 		switch (BPF_SIZE(code)) {
948 		case BPF_B:
949 			if (is_signed_imm12(off)) {
950 				if (sign_extend)
951 					emit_insn(ctx, ldb, dst, src, off);
952 				else
953 					emit_insn(ctx, ldbu, dst, src, off);
954 			} else {
955 				move_imm(ctx, t1, off, is32);
956 				if (sign_extend)
957 					emit_insn(ctx, ldxb, dst, src, t1);
958 				else
959 					emit_insn(ctx, ldxbu, dst, src, t1);
960 			}
961 			break;
962 		case BPF_H:
963 			if (is_signed_imm12(off)) {
964 				if (sign_extend)
965 					emit_insn(ctx, ldh, dst, src, off);
966 				else
967 					emit_insn(ctx, ldhu, dst, src, off);
968 			} else {
969 				move_imm(ctx, t1, off, is32);
970 				if (sign_extend)
971 					emit_insn(ctx, ldxh, dst, src, t1);
972 				else
973 					emit_insn(ctx, ldxhu, dst, src, t1);
974 			}
975 			break;
976 		case BPF_W:
977 			if (is_signed_imm12(off)) {
978 				if (sign_extend)
979 					emit_insn(ctx, ldw, dst, src, off);
980 				else
981 					emit_insn(ctx, ldwu, dst, src, off);
982 			} else {
983 				move_imm(ctx, t1, off, is32);
984 				if (sign_extend)
985 					emit_insn(ctx, ldxw, dst, src, t1);
986 				else
987 					emit_insn(ctx, ldxwu, dst, src, t1);
988 			}
989 			break;
990 		case BPF_DW:
991 			move_imm(ctx, t1, off, is32);
992 			emit_insn(ctx, ldxd, dst, src, t1);
993 			break;
994 		}
995 
996 		ret = add_exception_handler(insn, ctx, dst);
997 		if (ret)
998 			return ret;
999 		break;
1000 
1001 	/* *(size *)(dst + off) = imm */
1002 	case BPF_ST | BPF_MEM | BPF_B:
1003 	case BPF_ST | BPF_MEM | BPF_H:
1004 	case BPF_ST | BPF_MEM | BPF_W:
1005 	case BPF_ST | BPF_MEM | BPF_DW:
1006 		switch (BPF_SIZE(code)) {
1007 		case BPF_B:
1008 			move_imm(ctx, t1, imm, is32);
1009 			if (is_signed_imm12(off)) {
1010 				emit_insn(ctx, stb, t1, dst, off);
1011 			} else {
1012 				move_imm(ctx, t2, off, is32);
1013 				emit_insn(ctx, stxb, t1, dst, t2);
1014 			}
1015 			break;
1016 		case BPF_H:
1017 			move_imm(ctx, t1, imm, is32);
1018 			if (is_signed_imm12(off)) {
1019 				emit_insn(ctx, sth, t1, dst, off);
1020 			} else {
1021 				move_imm(ctx, t2, off, is32);
1022 				emit_insn(ctx, stxh, t1, dst, t2);
1023 			}
1024 			break;
1025 		case BPF_W:
1026 			move_imm(ctx, t1, imm, is32);
1027 			if (is_signed_imm12(off)) {
1028 				emit_insn(ctx, stw, t1, dst, off);
1029 			} else if (is_signed_imm14(off)) {
1030 				emit_insn(ctx, stptrw, t1, dst, off);
1031 			} else {
1032 				move_imm(ctx, t2, off, is32);
1033 				emit_insn(ctx, stxw, t1, dst, t2);
1034 			}
1035 			break;
1036 		case BPF_DW:
1037 			move_imm(ctx, t1, imm, is32);
1038 			if (is_signed_imm12(off)) {
1039 				emit_insn(ctx, std, t1, dst, off);
1040 			} else if (is_signed_imm14(off)) {
1041 				emit_insn(ctx, stptrd, t1, dst, off);
1042 			} else {
1043 				move_imm(ctx, t2, off, is32);
1044 				emit_insn(ctx, stxd, t1, dst, t2);
1045 			}
1046 			break;
1047 		}
1048 		break;
1049 
1050 	/* *(size *)(dst + off) = src */
1051 	case BPF_STX | BPF_MEM | BPF_B:
1052 	case BPF_STX | BPF_MEM | BPF_H:
1053 	case BPF_STX | BPF_MEM | BPF_W:
1054 	case BPF_STX | BPF_MEM | BPF_DW:
1055 		switch (BPF_SIZE(code)) {
1056 		case BPF_B:
1057 			if (is_signed_imm12(off)) {
1058 				emit_insn(ctx, stb, src, dst, off);
1059 			} else {
1060 				move_imm(ctx, t1, off, is32);
1061 				emit_insn(ctx, stxb, src, dst, t1);
1062 			}
1063 			break;
1064 		case BPF_H:
1065 			if (is_signed_imm12(off)) {
1066 				emit_insn(ctx, sth, src, dst, off);
1067 			} else {
1068 				move_imm(ctx, t1, off, is32);
1069 				emit_insn(ctx, stxh, src, dst, t1);
1070 			}
1071 			break;
1072 		case BPF_W:
1073 			if (is_signed_imm12(off)) {
1074 				emit_insn(ctx, stw, src, dst, off);
1075 			} else if (is_signed_imm14(off)) {
1076 				emit_insn(ctx, stptrw, src, dst, off);
1077 			} else {
1078 				move_imm(ctx, t1, off, is32);
1079 				emit_insn(ctx, stxw, src, dst, t1);
1080 			}
1081 			break;
1082 		case BPF_DW:
1083 			if (is_signed_imm12(off)) {
1084 				emit_insn(ctx, std, src, dst, off);
1085 			} else if (is_signed_imm14(off)) {
1086 				emit_insn(ctx, stptrd, src, dst, off);
1087 			} else {
1088 				move_imm(ctx, t1, off, is32);
1089 				emit_insn(ctx, stxd, src, dst, t1);
1090 			}
1091 			break;
1092 		}
1093 		break;
1094 
1095 	case BPF_STX | BPF_ATOMIC | BPF_W:
1096 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1097 		emit_atomic(insn, ctx);
1098 		break;
1099 
1100 	/* Speculation barrier */
1101 	case BPF_ST | BPF_NOSPEC:
1102 		break;
1103 
1104 	default:
1105 		pr_err("bpf_jit: unknown opcode %02x\n", code);
1106 		return -EINVAL;
1107 	}
1108 
1109 	return 0;
1110 
1111 toofar:
1112 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1113 	return -E2BIG;
1114 }
1115 
1116 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1117 {
1118 	int i;
1119 	const struct bpf_prog *prog = ctx->prog;
1120 
1121 	for (i = 0; i < prog->len; i++) {
1122 		const struct bpf_insn *insn = &prog->insnsi[i];
1123 		int ret;
1124 
1125 		if (ctx->image == NULL)
1126 			ctx->offset[i] = ctx->idx;
1127 
1128 		ret = build_insn(insn, ctx, extra_pass);
1129 		if (ret > 0) {
1130 			i++;
1131 			if (ctx->image == NULL)
1132 				ctx->offset[i] = ctx->idx;
1133 			continue;
1134 		}
1135 		if (ret)
1136 			return ret;
1137 	}
1138 
1139 	if (ctx->image == NULL)
1140 		ctx->offset[i] = ctx->idx;
1141 
1142 	return 0;
1143 }
1144 
1145 /* Fill space with break instructions */
1146 static void jit_fill_hole(void *area, unsigned int size)
1147 {
1148 	u32 *ptr;
1149 
1150 	/* We are guaranteed to have aligned memory */
1151 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1152 		*ptr++ = INSN_BREAK;
1153 }
1154 
1155 static int validate_code(struct jit_ctx *ctx)
1156 {
1157 	int i;
1158 	union loongarch_instruction insn;
1159 
1160 	for (i = 0; i < ctx->idx; i++) {
1161 		insn = ctx->image[i];
1162 		/* Check INSN_BREAK */
1163 		if (insn.word == INSN_BREAK)
1164 			return -1;
1165 	}
1166 
1167 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1168 		return -1;
1169 
1170 	return 0;
1171 }
1172 
1173 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1174 {
1175 	bool tmp_blinded = false, extra_pass = false;
1176 	u8 *image_ptr;
1177 	int image_size, prog_size, extable_size;
1178 	struct jit_ctx ctx;
1179 	struct jit_data *jit_data;
1180 	struct bpf_binary_header *header;
1181 	struct bpf_prog *tmp, *orig_prog = prog;
1182 
1183 	/*
1184 	 * If BPF JIT was not enabled then we must fall back to
1185 	 * the interpreter.
1186 	 */
1187 	if (!prog->jit_requested)
1188 		return orig_prog;
1189 
1190 	tmp = bpf_jit_blind_constants(prog);
1191 	/*
1192 	 * If blinding was requested and we failed during blinding,
1193 	 * we must fall back to the interpreter. Otherwise, we save
1194 	 * the new JITed code.
1195 	 */
1196 	if (IS_ERR(tmp))
1197 		return orig_prog;
1198 
1199 	if (tmp != prog) {
1200 		tmp_blinded = true;
1201 		prog = tmp;
1202 	}
1203 
1204 	jit_data = prog->aux->jit_data;
1205 	if (!jit_data) {
1206 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1207 		if (!jit_data) {
1208 			prog = orig_prog;
1209 			goto out;
1210 		}
1211 		prog->aux->jit_data = jit_data;
1212 	}
1213 	if (jit_data->ctx.offset) {
1214 		ctx = jit_data->ctx;
1215 		image_ptr = jit_data->image;
1216 		header = jit_data->header;
1217 		extra_pass = true;
1218 		prog_size = sizeof(u32) * ctx.idx;
1219 		goto skip_init_ctx;
1220 	}
1221 
1222 	memset(&ctx, 0, sizeof(ctx));
1223 	ctx.prog = prog;
1224 
1225 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1226 	if (ctx.offset == NULL) {
1227 		prog = orig_prog;
1228 		goto out_offset;
1229 	}
1230 
1231 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1232 	build_prologue(&ctx);
1233 	if (build_body(&ctx, extra_pass)) {
1234 		prog = orig_prog;
1235 		goto out_offset;
1236 	}
1237 	ctx.epilogue_offset = ctx.idx;
1238 	build_epilogue(&ctx);
1239 
1240 	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1241 
1242 	/* Now we know the actual image size.
1243 	 * As each LoongArch instruction is of length 32bit,
1244 	 * we are translating number of JITed intructions into
1245 	 * the size required to store these JITed code.
1246 	 */
1247 	prog_size = sizeof(u32) * ctx.idx;
1248 	image_size = prog_size + extable_size;
1249 	/* Now we know the size of the structure to make */
1250 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1251 				      sizeof(u32), jit_fill_hole);
1252 	if (header == NULL) {
1253 		prog = orig_prog;
1254 		goto out_offset;
1255 	}
1256 
1257 	/* 2. Now, the actual pass to generate final JIT code */
1258 	ctx.image = (union loongarch_instruction *)image_ptr;
1259 	if (extable_size)
1260 		prog->aux->extable = (void *)image_ptr + prog_size;
1261 
1262 skip_init_ctx:
1263 	ctx.idx = 0;
1264 	ctx.num_exentries = 0;
1265 
1266 	build_prologue(&ctx);
1267 	if (build_body(&ctx, extra_pass)) {
1268 		bpf_jit_binary_free(header);
1269 		prog = orig_prog;
1270 		goto out_offset;
1271 	}
1272 	build_epilogue(&ctx);
1273 
1274 	/* 3. Extra pass to validate JITed code */
1275 	if (validate_code(&ctx)) {
1276 		bpf_jit_binary_free(header);
1277 		prog = orig_prog;
1278 		goto out_offset;
1279 	}
1280 
1281 	/* And we're done */
1282 	if (bpf_jit_enable > 1)
1283 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1284 
1285 	/* Update the icache */
1286 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1287 
1288 	if (!prog->is_func || extra_pass) {
1289 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1290 			pr_err_once("multi-func JIT bug %d != %d\n",
1291 				    ctx.idx, jit_data->ctx.idx);
1292 			bpf_jit_binary_free(header);
1293 			prog->bpf_func = NULL;
1294 			prog->jited = 0;
1295 			prog->jited_len = 0;
1296 			goto out_offset;
1297 		}
1298 		bpf_jit_binary_lock_ro(header);
1299 	} else {
1300 		jit_data->ctx = ctx;
1301 		jit_data->image = image_ptr;
1302 		jit_data->header = header;
1303 	}
1304 	prog->jited = 1;
1305 	prog->jited_len = prog_size;
1306 	prog->bpf_func = (void *)ctx.image;
1307 
1308 	if (!prog->is_func || extra_pass) {
1309 		int i;
1310 
1311 		/* offset[prog->len] is the size of program */
1312 		for (i = 0; i <= prog->len; i++)
1313 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1314 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1315 
1316 out_offset:
1317 		kvfree(ctx.offset);
1318 		kfree(jit_data);
1319 		prog->aux->jit_data = NULL;
1320 	}
1321 
1322 out:
1323 	if (tmp_blinded)
1324 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1325 
1326 	out_offset = -1;
1327 
1328 	return prog;
1329 }
1330 
1331 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
1332 bool bpf_jit_supports_subprog_tailcalls(void)
1333 {
1334 	return true;
1335 }
1336