xref: /linux/arch/loongarch/net/bpf_jit.c (revision e3234e547a4db0572e271e490d044bdb4cb7233b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include "bpf_jit.h"
8 
9 #define REG_TCC		LOONGARCH_GPR_A6
10 #define TCC_SAVED	LOONGARCH_GPR_S5
11 
12 #define SAVE_RA		BIT(0)
13 #define SAVE_TCC	BIT(1)
14 
15 static const int regmap[] = {
16 	/* return value from in-kernel function, and exit value for eBPF program */
17 	[BPF_REG_0] = LOONGARCH_GPR_A5,
18 	/* arguments from eBPF program to in-kernel function */
19 	[BPF_REG_1] = LOONGARCH_GPR_A0,
20 	[BPF_REG_2] = LOONGARCH_GPR_A1,
21 	[BPF_REG_3] = LOONGARCH_GPR_A2,
22 	[BPF_REG_4] = LOONGARCH_GPR_A3,
23 	[BPF_REG_5] = LOONGARCH_GPR_A4,
24 	/* callee saved registers that in-kernel function will preserve */
25 	[BPF_REG_6] = LOONGARCH_GPR_S0,
26 	[BPF_REG_7] = LOONGARCH_GPR_S1,
27 	[BPF_REG_8] = LOONGARCH_GPR_S2,
28 	[BPF_REG_9] = LOONGARCH_GPR_S3,
29 	/* read-only frame pointer to access stack */
30 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31 	/* temporary register for blinding constants */
32 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33 };
34 
35 static void mark_call(struct jit_ctx *ctx)
36 {
37 	ctx->flags |= SAVE_RA;
38 }
39 
40 static void mark_tail_call(struct jit_ctx *ctx)
41 {
42 	ctx->flags |= SAVE_TCC;
43 }
44 
45 static bool seen_call(struct jit_ctx *ctx)
46 {
47 	return (ctx->flags & SAVE_RA);
48 }
49 
50 static bool seen_tail_call(struct jit_ctx *ctx)
51 {
52 	return (ctx->flags & SAVE_TCC);
53 }
54 
55 static u8 tail_call_reg(struct jit_ctx *ctx)
56 {
57 	if (seen_call(ctx))
58 		return TCC_SAVED;
59 
60 	return REG_TCC;
61 }
62 
63 /*
64  * eBPF prog stack layout:
65  *
66  *                                        high
67  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68  *                            |           $ra           |
69  *                            +-------------------------+
70  *                            |           $fp           |
71  *                            +-------------------------+
72  *                            |           $s0           |
73  *                            +-------------------------+
74  *                            |           $s1           |
75  *                            +-------------------------+
76  *                            |           $s2           |
77  *                            +-------------------------+
78  *                            |           $s3           |
79  *                            +-------------------------+
80  *                            |           $s4           |
81  *                            +-------------------------+
82  *                            |           $s5           |
83  *                            +-------------------------+ <--BPF_REG_FP
84  *                            |  prog->aux->stack_depth |
85  *                            |        (optional)       |
86  * current $sp -------------> +-------------------------+
87  *                                        low
88  */
89 static void build_prologue(struct jit_ctx *ctx)
90 {
91 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92 
93 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94 
95 	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96 	stack_adjust += sizeof(long) * 8;
97 
98 	stack_adjust = round_up(stack_adjust, 16);
99 	stack_adjust += bpf_stack_adjust;
100 
101 	/*
102 	 * First instruction initializes the tail call count (TCC).
103 	 * On tail call we skip this instruction, and the TCC is
104 	 * passed in REG_TCC from the caller.
105 	 */
106 	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107 
108 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109 
110 	store_offset = stack_adjust - sizeof(long);
111 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112 
113 	store_offset -= sizeof(long);
114 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115 
116 	store_offset -= sizeof(long);
117 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118 
119 	store_offset -= sizeof(long);
120 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121 
122 	store_offset -= sizeof(long);
123 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124 
125 	store_offset -= sizeof(long);
126 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127 
128 	store_offset -= sizeof(long);
129 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130 
131 	store_offset -= sizeof(long);
132 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133 
134 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135 
136 	if (bpf_stack_adjust)
137 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138 
139 	/*
140 	 * Program contains calls and tail calls, so REG_TCC need
141 	 * to be saved across calls.
142 	 */
143 	if (seen_tail_call(ctx) && seen_call(ctx))
144 		move_reg(ctx, TCC_SAVED, REG_TCC);
145 
146 	ctx->stack_size = stack_adjust;
147 }
148 
149 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
150 {
151 	int stack_adjust = ctx->stack_size;
152 	int load_offset;
153 
154 	load_offset = stack_adjust - sizeof(long);
155 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
156 
157 	load_offset -= sizeof(long);
158 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
159 
160 	load_offset -= sizeof(long);
161 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
162 
163 	load_offset -= sizeof(long);
164 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
165 
166 	load_offset -= sizeof(long);
167 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
168 
169 	load_offset -= sizeof(long);
170 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
171 
172 	load_offset -= sizeof(long);
173 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
174 
175 	load_offset -= sizeof(long);
176 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
177 
178 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
179 
180 	if (!is_tail_call) {
181 		/* Set return value */
182 		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183 		/* Return to the caller */
184 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
185 	} else {
186 		/*
187 		 * Call the next bpf prog and skip the first instruction
188 		 * of TCC initialization.
189 		 */
190 		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
191 	}
192 }
193 
194 static void build_epilogue(struct jit_ctx *ctx)
195 {
196 	__build_epilogue(ctx, false);
197 }
198 
199 bool bpf_jit_supports_kfunc_call(void)
200 {
201 	return true;
202 }
203 
204 /* initialized on the first pass of build_body() */
205 static int out_offset = -1;
206 static int emit_bpf_tail_call(struct jit_ctx *ctx)
207 {
208 	int off;
209 	u8 tcc = tail_call_reg(ctx);
210 	u8 a1 = LOONGARCH_GPR_A1;
211 	u8 a2 = LOONGARCH_GPR_A2;
212 	u8 t1 = LOONGARCH_GPR_T1;
213 	u8 t2 = LOONGARCH_GPR_T2;
214 	u8 t3 = LOONGARCH_GPR_T3;
215 	const int idx0 = ctx->idx;
216 
217 #define cur_offset (ctx->idx - idx0)
218 #define jmp_offset (out_offset - (cur_offset))
219 
220 	/*
221 	 * a0: &ctx
222 	 * a1: &array
223 	 * a2: index
224 	 *
225 	 * if (index >= array->map.max_entries)
226 	 *	 goto out;
227 	 */
228 	off = offsetof(struct bpf_array, map.max_entries);
229 	emit_insn(ctx, ldwu, t1, a1, off);
230 	/* bgeu $a2, $t1, jmp_offset */
231 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
232 		goto toofar;
233 
234 	/*
235 	 * if (--TCC < 0)
236 	 *	 goto out;
237 	 */
238 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
239 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
240 		goto toofar;
241 
242 	/*
243 	 * prog = array->ptrs[index];
244 	 * if (!prog)
245 	 *	 goto out;
246 	 */
247 	emit_insn(ctx, alsld, t2, a2, a1, 2);
248 	off = offsetof(struct bpf_array, ptrs);
249 	emit_insn(ctx, ldd, t2, t2, off);
250 	/* beq $t2, $zero, jmp_offset */
251 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
252 		goto toofar;
253 
254 	/* goto *(prog->bpf_func + 4); */
255 	off = offsetof(struct bpf_prog, bpf_func);
256 	emit_insn(ctx, ldd, t3, t2, off);
257 	__build_epilogue(ctx, true);
258 
259 	/* out: */
260 	if (out_offset == -1)
261 		out_offset = cur_offset;
262 	if (cur_offset != out_offset) {
263 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
264 			    cur_offset, out_offset);
265 		return -1;
266 	}
267 
268 	return 0;
269 
270 toofar:
271 	pr_info_once("tail_call: jump too far\n");
272 	return -1;
273 #undef cur_offset
274 #undef jmp_offset
275 }
276 
277 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
278 {
279 	const u8 t1 = LOONGARCH_GPR_T1;
280 	const u8 t2 = LOONGARCH_GPR_T2;
281 	const u8 t3 = LOONGARCH_GPR_T3;
282 	const u8 r0 = regmap[BPF_REG_0];
283 	const u8 src = regmap[insn->src_reg];
284 	const u8 dst = regmap[insn->dst_reg];
285 	const s16 off = insn->off;
286 	const s32 imm = insn->imm;
287 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
288 
289 	move_imm(ctx, t1, off, false);
290 	emit_insn(ctx, addd, t1, dst, t1);
291 	move_reg(ctx, t3, src);
292 
293 	switch (imm) {
294 	/* lock *(size *)(dst + off) <op>= src */
295 	case BPF_ADD:
296 		if (isdw)
297 			emit_insn(ctx, amaddd, t2, t1, src);
298 		else
299 			emit_insn(ctx, amaddw, t2, t1, src);
300 		break;
301 	case BPF_AND:
302 		if (isdw)
303 			emit_insn(ctx, amandd, t2, t1, src);
304 		else
305 			emit_insn(ctx, amandw, t2, t1, src);
306 		break;
307 	case BPF_OR:
308 		if (isdw)
309 			emit_insn(ctx, amord, t2, t1, src);
310 		else
311 			emit_insn(ctx, amorw, t2, t1, src);
312 		break;
313 	case BPF_XOR:
314 		if (isdw)
315 			emit_insn(ctx, amxord, t2, t1, src);
316 		else
317 			emit_insn(ctx, amxorw, t2, t1, src);
318 		break;
319 	/* src = atomic_fetch_<op>(dst + off, src) */
320 	case BPF_ADD | BPF_FETCH:
321 		if (isdw) {
322 			emit_insn(ctx, amaddd, src, t1, t3);
323 		} else {
324 			emit_insn(ctx, amaddw, src, t1, t3);
325 			emit_zext_32(ctx, src, true);
326 		}
327 		break;
328 	case BPF_AND | BPF_FETCH:
329 		if (isdw) {
330 			emit_insn(ctx, amandd, src, t1, t3);
331 		} else {
332 			emit_insn(ctx, amandw, src, t1, t3);
333 			emit_zext_32(ctx, src, true);
334 		}
335 		break;
336 	case BPF_OR | BPF_FETCH:
337 		if (isdw) {
338 			emit_insn(ctx, amord, src, t1, t3);
339 		} else {
340 			emit_insn(ctx, amorw, src, t1, t3);
341 			emit_zext_32(ctx, src, true);
342 		}
343 		break;
344 	case BPF_XOR | BPF_FETCH:
345 		if (isdw) {
346 			emit_insn(ctx, amxord, src, t1, t3);
347 		} else {
348 			emit_insn(ctx, amxorw, src, t1, t3);
349 			emit_zext_32(ctx, src, true);
350 		}
351 		break;
352 	/* src = atomic_xchg(dst + off, src); */
353 	case BPF_XCHG:
354 		if (isdw) {
355 			emit_insn(ctx, amswapd, src, t1, t3);
356 		} else {
357 			emit_insn(ctx, amswapw, src, t1, t3);
358 			emit_zext_32(ctx, src, true);
359 		}
360 		break;
361 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
362 	case BPF_CMPXCHG:
363 		move_reg(ctx, t2, r0);
364 		if (isdw) {
365 			emit_insn(ctx, lld, r0, t1, 0);
366 			emit_insn(ctx, bne, t2, r0, 4);
367 			move_reg(ctx, t3, src);
368 			emit_insn(ctx, scd, t3, t1, 0);
369 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
370 		} else {
371 			emit_insn(ctx, llw, r0, t1, 0);
372 			emit_zext_32(ctx, t2, true);
373 			emit_zext_32(ctx, r0, true);
374 			emit_insn(ctx, bne, t2, r0, 4);
375 			move_reg(ctx, t3, src);
376 			emit_insn(ctx, scw, t3, t1, 0);
377 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
378 			emit_zext_32(ctx, r0, true);
379 		}
380 		break;
381 	}
382 }
383 
384 static bool is_signed_bpf_cond(u8 cond)
385 {
386 	return cond == BPF_JSGT || cond == BPF_JSLT ||
387 	       cond == BPF_JSGE || cond == BPF_JSLE;
388 }
389 
390 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
391 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
392 
393 bool ex_handler_bpf(const struct exception_table_entry *ex,
394 		    struct pt_regs *regs)
395 {
396 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
397 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
398 
399 	regs->regs[dst_reg] = 0;
400 	regs->csr_era = (unsigned long)&ex->fixup - offset;
401 
402 	return true;
403 }
404 
405 /* For accesses to BTF pointers, add an entry to the exception table */
406 static int add_exception_handler(const struct bpf_insn *insn,
407 				 struct jit_ctx *ctx,
408 				 int dst_reg)
409 {
410 	unsigned long pc;
411 	off_t offset;
412 	struct exception_table_entry *ex;
413 
414 	if (!ctx->image || !ctx->prog->aux->extable)
415 		return 0;
416 
417 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
418 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
419 		return 0;
420 
421 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
422 		return -EINVAL;
423 
424 	ex = &ctx->prog->aux->extable[ctx->num_exentries];
425 	pc = (unsigned long)&ctx->image[ctx->idx - 1];
426 
427 	offset = pc - (long)&ex->insn;
428 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
429 		return -ERANGE;
430 
431 	ex->insn = offset;
432 
433 	/*
434 	 * Since the extable follows the program, the fixup offset is always
435 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
436 	 * to keep things simple, and put the destination register in the upper
437 	 * bits. We don't need to worry about buildtime or runtime sort
438 	 * modifying the upper bits because the table is already sorted, and
439 	 * isn't part of the main exception table.
440 	 */
441 	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
442 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
443 		return -ERANGE;
444 
445 	ex->type = EX_TYPE_BPF;
446 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
447 
448 	ctx->num_exentries++;
449 
450 	return 0;
451 }
452 
453 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
454 {
455 	u8 tm = -1;
456 	u64 func_addr;
457 	bool func_addr_fixed, sign_extend;
458 	int i = insn - ctx->prog->insnsi;
459 	int ret, jmp_offset;
460 	const u8 code = insn->code;
461 	const u8 cond = BPF_OP(code);
462 	const u8 t1 = LOONGARCH_GPR_T1;
463 	const u8 t2 = LOONGARCH_GPR_T2;
464 	const u8 src = regmap[insn->src_reg];
465 	const u8 dst = regmap[insn->dst_reg];
466 	const s16 off = insn->off;
467 	const s32 imm = insn->imm;
468 	const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
469 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
470 
471 	switch (code) {
472 	/* dst = src */
473 	case BPF_ALU | BPF_MOV | BPF_X:
474 	case BPF_ALU64 | BPF_MOV | BPF_X:
475 		switch (off) {
476 		case 0:
477 			move_reg(ctx, dst, src);
478 			emit_zext_32(ctx, dst, is32);
479 			break;
480 		case 8:
481 			move_reg(ctx, t1, src);
482 			emit_insn(ctx, extwb, dst, t1);
483 			break;
484 		case 16:
485 			move_reg(ctx, t1, src);
486 			emit_insn(ctx, extwh, dst, t1);
487 			break;
488 		case 32:
489 			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
490 			break;
491 		}
492 		break;
493 
494 	/* dst = imm */
495 	case BPF_ALU | BPF_MOV | BPF_K:
496 	case BPF_ALU64 | BPF_MOV | BPF_K:
497 		move_imm(ctx, dst, imm, is32);
498 		break;
499 
500 	/* dst = dst + src */
501 	case BPF_ALU | BPF_ADD | BPF_X:
502 	case BPF_ALU64 | BPF_ADD | BPF_X:
503 		emit_insn(ctx, addd, dst, dst, src);
504 		emit_zext_32(ctx, dst, is32);
505 		break;
506 
507 	/* dst = dst + imm */
508 	case BPF_ALU | BPF_ADD | BPF_K:
509 	case BPF_ALU64 | BPF_ADD | BPF_K:
510 		if (is_signed_imm12(imm)) {
511 			emit_insn(ctx, addid, dst, dst, imm);
512 		} else {
513 			move_imm(ctx, t1, imm, is32);
514 			emit_insn(ctx, addd, dst, dst, t1);
515 		}
516 		emit_zext_32(ctx, dst, is32);
517 		break;
518 
519 	/* dst = dst - src */
520 	case BPF_ALU | BPF_SUB | BPF_X:
521 	case BPF_ALU64 | BPF_SUB | BPF_X:
522 		emit_insn(ctx, subd, dst, dst, src);
523 		emit_zext_32(ctx, dst, is32);
524 		break;
525 
526 	/* dst = dst - imm */
527 	case BPF_ALU | BPF_SUB | BPF_K:
528 	case BPF_ALU64 | BPF_SUB | BPF_K:
529 		if (is_signed_imm12(-imm)) {
530 			emit_insn(ctx, addid, dst, dst, -imm);
531 		} else {
532 			move_imm(ctx, t1, imm, is32);
533 			emit_insn(ctx, subd, dst, dst, t1);
534 		}
535 		emit_zext_32(ctx, dst, is32);
536 		break;
537 
538 	/* dst = dst * src */
539 	case BPF_ALU | BPF_MUL | BPF_X:
540 	case BPF_ALU64 | BPF_MUL | BPF_X:
541 		emit_insn(ctx, muld, dst, dst, src);
542 		emit_zext_32(ctx, dst, is32);
543 		break;
544 
545 	/* dst = dst * imm */
546 	case BPF_ALU | BPF_MUL | BPF_K:
547 	case BPF_ALU64 | BPF_MUL | BPF_K:
548 		move_imm(ctx, t1, imm, is32);
549 		emit_insn(ctx, muld, dst, dst, t1);
550 		emit_zext_32(ctx, dst, is32);
551 		break;
552 
553 	/* dst = dst / src */
554 	case BPF_ALU | BPF_DIV | BPF_X:
555 	case BPF_ALU64 | BPF_DIV | BPF_X:
556 		if (!off) {
557 			emit_zext_32(ctx, dst, is32);
558 			move_reg(ctx, t1, src);
559 			emit_zext_32(ctx, t1, is32);
560 			emit_insn(ctx, divdu, dst, dst, t1);
561 			emit_zext_32(ctx, dst, is32);
562 		} else {
563 			emit_sext_32(ctx, dst, is32);
564 			move_reg(ctx, t1, src);
565 			emit_sext_32(ctx, t1, is32);
566 			emit_insn(ctx, divd, dst, dst, t1);
567 			emit_sext_32(ctx, dst, is32);
568 		}
569 		break;
570 
571 	/* dst = dst / imm */
572 	case BPF_ALU | BPF_DIV | BPF_K:
573 	case BPF_ALU64 | BPF_DIV | BPF_K:
574 		if (!off) {
575 			move_imm(ctx, t1, imm, is32);
576 			emit_zext_32(ctx, dst, is32);
577 			emit_insn(ctx, divdu, dst, dst, t1);
578 			emit_zext_32(ctx, dst, is32);
579 		} else {
580 			move_imm(ctx, t1, imm, false);
581 			emit_sext_32(ctx, t1, is32);
582 			emit_sext_32(ctx, dst, is32);
583 			emit_insn(ctx, divd, dst, dst, t1);
584 			emit_sext_32(ctx, dst, is32);
585 		}
586 		break;
587 
588 	/* dst = dst % src */
589 	case BPF_ALU | BPF_MOD | BPF_X:
590 	case BPF_ALU64 | BPF_MOD | BPF_X:
591 		if (!off) {
592 			emit_zext_32(ctx, dst, is32);
593 			move_reg(ctx, t1, src);
594 			emit_zext_32(ctx, t1, is32);
595 			emit_insn(ctx, moddu, dst, dst, t1);
596 			emit_zext_32(ctx, dst, is32);
597 		} else {
598 			emit_sext_32(ctx, dst, is32);
599 			move_reg(ctx, t1, src);
600 			emit_sext_32(ctx, t1, is32);
601 			emit_insn(ctx, modd, dst, dst, t1);
602 			emit_sext_32(ctx, dst, is32);
603 		}
604 		break;
605 
606 	/* dst = dst % imm */
607 	case BPF_ALU | BPF_MOD | BPF_K:
608 	case BPF_ALU64 | BPF_MOD | BPF_K:
609 		if (!off) {
610 			move_imm(ctx, t1, imm, is32);
611 			emit_zext_32(ctx, dst, is32);
612 			emit_insn(ctx, moddu, dst, dst, t1);
613 			emit_zext_32(ctx, dst, is32);
614 		} else {
615 			move_imm(ctx, t1, imm, false);
616 			emit_sext_32(ctx, t1, is32);
617 			emit_sext_32(ctx, dst, is32);
618 			emit_insn(ctx, modd, dst, dst, t1);
619 			emit_sext_32(ctx, dst, is32);
620 		}
621 		break;
622 
623 	/* dst = -dst */
624 	case BPF_ALU | BPF_NEG:
625 	case BPF_ALU64 | BPF_NEG:
626 		move_imm(ctx, t1, imm, is32);
627 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
628 		emit_zext_32(ctx, dst, is32);
629 		break;
630 
631 	/* dst = dst & src */
632 	case BPF_ALU | BPF_AND | BPF_X:
633 	case BPF_ALU64 | BPF_AND | BPF_X:
634 		emit_insn(ctx, and, dst, dst, src);
635 		emit_zext_32(ctx, dst, is32);
636 		break;
637 
638 	/* dst = dst & imm */
639 	case BPF_ALU | BPF_AND | BPF_K:
640 	case BPF_ALU64 | BPF_AND | BPF_K:
641 		if (is_unsigned_imm12(imm)) {
642 			emit_insn(ctx, andi, dst, dst, imm);
643 		} else {
644 			move_imm(ctx, t1, imm, is32);
645 			emit_insn(ctx, and, dst, dst, t1);
646 		}
647 		emit_zext_32(ctx, dst, is32);
648 		break;
649 
650 	/* dst = dst | src */
651 	case BPF_ALU | BPF_OR | BPF_X:
652 	case BPF_ALU64 | BPF_OR | BPF_X:
653 		emit_insn(ctx, or, dst, dst, src);
654 		emit_zext_32(ctx, dst, is32);
655 		break;
656 
657 	/* dst = dst | imm */
658 	case BPF_ALU | BPF_OR | BPF_K:
659 	case BPF_ALU64 | BPF_OR | BPF_K:
660 		if (is_unsigned_imm12(imm)) {
661 			emit_insn(ctx, ori, dst, dst, imm);
662 		} else {
663 			move_imm(ctx, t1, imm, is32);
664 			emit_insn(ctx, or, dst, dst, t1);
665 		}
666 		emit_zext_32(ctx, dst, is32);
667 		break;
668 
669 	/* dst = dst ^ src */
670 	case BPF_ALU | BPF_XOR | BPF_X:
671 	case BPF_ALU64 | BPF_XOR | BPF_X:
672 		emit_insn(ctx, xor, dst, dst, src);
673 		emit_zext_32(ctx, dst, is32);
674 		break;
675 
676 	/* dst = dst ^ imm */
677 	case BPF_ALU | BPF_XOR | BPF_K:
678 	case BPF_ALU64 | BPF_XOR | BPF_K:
679 		if (is_unsigned_imm12(imm)) {
680 			emit_insn(ctx, xori, dst, dst, imm);
681 		} else {
682 			move_imm(ctx, t1, imm, is32);
683 			emit_insn(ctx, xor, dst, dst, t1);
684 		}
685 		emit_zext_32(ctx, dst, is32);
686 		break;
687 
688 	/* dst = dst << src (logical) */
689 	case BPF_ALU | BPF_LSH | BPF_X:
690 		emit_insn(ctx, sllw, dst, dst, src);
691 		emit_zext_32(ctx, dst, is32);
692 		break;
693 
694 	case BPF_ALU64 | BPF_LSH | BPF_X:
695 		emit_insn(ctx, slld, dst, dst, src);
696 		break;
697 
698 	/* dst = dst << imm (logical) */
699 	case BPF_ALU | BPF_LSH | BPF_K:
700 		emit_insn(ctx, slliw, dst, dst, imm);
701 		emit_zext_32(ctx, dst, is32);
702 		break;
703 
704 	case BPF_ALU64 | BPF_LSH | BPF_K:
705 		emit_insn(ctx, sllid, dst, dst, imm);
706 		break;
707 
708 	/* dst = dst >> src (logical) */
709 	case BPF_ALU | BPF_RSH | BPF_X:
710 		emit_insn(ctx, srlw, dst, dst, src);
711 		emit_zext_32(ctx, dst, is32);
712 		break;
713 
714 	case BPF_ALU64 | BPF_RSH | BPF_X:
715 		emit_insn(ctx, srld, dst, dst, src);
716 		break;
717 
718 	/* dst = dst >> imm (logical) */
719 	case BPF_ALU | BPF_RSH | BPF_K:
720 		emit_insn(ctx, srliw, dst, dst, imm);
721 		emit_zext_32(ctx, dst, is32);
722 		break;
723 
724 	case BPF_ALU64 | BPF_RSH | BPF_K:
725 		emit_insn(ctx, srlid, dst, dst, imm);
726 		break;
727 
728 	/* dst = dst >> src (arithmetic) */
729 	case BPF_ALU | BPF_ARSH | BPF_X:
730 		emit_insn(ctx, sraw, dst, dst, src);
731 		emit_zext_32(ctx, dst, is32);
732 		break;
733 
734 	case BPF_ALU64 | BPF_ARSH | BPF_X:
735 		emit_insn(ctx, srad, dst, dst, src);
736 		break;
737 
738 	/* dst = dst >> imm (arithmetic) */
739 	case BPF_ALU | BPF_ARSH | BPF_K:
740 		emit_insn(ctx, sraiw, dst, dst, imm);
741 		emit_zext_32(ctx, dst, is32);
742 		break;
743 
744 	case BPF_ALU64 | BPF_ARSH | BPF_K:
745 		emit_insn(ctx, sraid, dst, dst, imm);
746 		break;
747 
748 	/* dst = BSWAP##imm(dst) */
749 	case BPF_ALU | BPF_END | BPF_FROM_LE:
750 		switch (imm) {
751 		case 16:
752 			/* zero-extend 16 bits into 64 bits */
753 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
754 			break;
755 		case 32:
756 			/* zero-extend 32 bits into 64 bits */
757 			emit_zext_32(ctx, dst, is32);
758 			break;
759 		case 64:
760 			/* do nothing */
761 			break;
762 		}
763 		break;
764 
765 	case BPF_ALU | BPF_END | BPF_FROM_BE:
766 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
767 		switch (imm) {
768 		case 16:
769 			emit_insn(ctx, revb2h, dst, dst);
770 			/* zero-extend 16 bits into 64 bits */
771 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
772 			break;
773 		case 32:
774 			emit_insn(ctx, revb2w, dst, dst);
775 			/* zero-extend 32 bits into 64 bits */
776 			emit_zext_32(ctx, dst, is32);
777 			break;
778 		case 64:
779 			emit_insn(ctx, revbd, dst, dst);
780 			break;
781 		}
782 		break;
783 
784 	/* PC += off if dst cond src */
785 	case BPF_JMP | BPF_JEQ | BPF_X:
786 	case BPF_JMP | BPF_JNE | BPF_X:
787 	case BPF_JMP | BPF_JGT | BPF_X:
788 	case BPF_JMP | BPF_JGE | BPF_X:
789 	case BPF_JMP | BPF_JLT | BPF_X:
790 	case BPF_JMP | BPF_JLE | BPF_X:
791 	case BPF_JMP | BPF_JSGT | BPF_X:
792 	case BPF_JMP | BPF_JSGE | BPF_X:
793 	case BPF_JMP | BPF_JSLT | BPF_X:
794 	case BPF_JMP | BPF_JSLE | BPF_X:
795 	case BPF_JMP32 | BPF_JEQ | BPF_X:
796 	case BPF_JMP32 | BPF_JNE | BPF_X:
797 	case BPF_JMP32 | BPF_JGT | BPF_X:
798 	case BPF_JMP32 | BPF_JGE | BPF_X:
799 	case BPF_JMP32 | BPF_JLT | BPF_X:
800 	case BPF_JMP32 | BPF_JLE | BPF_X:
801 	case BPF_JMP32 | BPF_JSGT | BPF_X:
802 	case BPF_JMP32 | BPF_JSGE | BPF_X:
803 	case BPF_JMP32 | BPF_JSLT | BPF_X:
804 	case BPF_JMP32 | BPF_JSLE | BPF_X:
805 		jmp_offset = bpf2la_offset(i, off, ctx);
806 		move_reg(ctx, t1, dst);
807 		move_reg(ctx, t2, src);
808 		if (is_signed_bpf_cond(BPF_OP(code))) {
809 			emit_sext_32(ctx, t1, is32);
810 			emit_sext_32(ctx, t2, is32);
811 		} else {
812 			emit_zext_32(ctx, t1, is32);
813 			emit_zext_32(ctx, t2, is32);
814 		}
815 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
816 			goto toofar;
817 		break;
818 
819 	/* PC += off if dst cond imm */
820 	case BPF_JMP | BPF_JEQ | BPF_K:
821 	case BPF_JMP | BPF_JNE | BPF_K:
822 	case BPF_JMP | BPF_JGT | BPF_K:
823 	case BPF_JMP | BPF_JGE | BPF_K:
824 	case BPF_JMP | BPF_JLT | BPF_K:
825 	case BPF_JMP | BPF_JLE | BPF_K:
826 	case BPF_JMP | BPF_JSGT | BPF_K:
827 	case BPF_JMP | BPF_JSGE | BPF_K:
828 	case BPF_JMP | BPF_JSLT | BPF_K:
829 	case BPF_JMP | BPF_JSLE | BPF_K:
830 	case BPF_JMP32 | BPF_JEQ | BPF_K:
831 	case BPF_JMP32 | BPF_JNE | BPF_K:
832 	case BPF_JMP32 | BPF_JGT | BPF_K:
833 	case BPF_JMP32 | BPF_JGE | BPF_K:
834 	case BPF_JMP32 | BPF_JLT | BPF_K:
835 	case BPF_JMP32 | BPF_JLE | BPF_K:
836 	case BPF_JMP32 | BPF_JSGT | BPF_K:
837 	case BPF_JMP32 | BPF_JSGE | BPF_K:
838 	case BPF_JMP32 | BPF_JSLT | BPF_K:
839 	case BPF_JMP32 | BPF_JSLE | BPF_K:
840 		jmp_offset = bpf2la_offset(i, off, ctx);
841 		if (imm) {
842 			move_imm(ctx, t1, imm, false);
843 			tm = t1;
844 		} else {
845 			/* If imm is 0, simply use zero register. */
846 			tm = LOONGARCH_GPR_ZERO;
847 		}
848 		move_reg(ctx, t2, dst);
849 		if (is_signed_bpf_cond(BPF_OP(code))) {
850 			emit_sext_32(ctx, tm, is32);
851 			emit_sext_32(ctx, t2, is32);
852 		} else {
853 			emit_zext_32(ctx, tm, is32);
854 			emit_zext_32(ctx, t2, is32);
855 		}
856 		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
857 			goto toofar;
858 		break;
859 
860 	/* PC += off if dst & src */
861 	case BPF_JMP | BPF_JSET | BPF_X:
862 	case BPF_JMP32 | BPF_JSET | BPF_X:
863 		jmp_offset = bpf2la_offset(i, off, ctx);
864 		emit_insn(ctx, and, t1, dst, src);
865 		emit_zext_32(ctx, t1, is32);
866 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
867 			goto toofar;
868 		break;
869 
870 	/* PC += off if dst & imm */
871 	case BPF_JMP | BPF_JSET | BPF_K:
872 	case BPF_JMP32 | BPF_JSET | BPF_K:
873 		jmp_offset = bpf2la_offset(i, off, ctx);
874 		move_imm(ctx, t1, imm, is32);
875 		emit_insn(ctx, and, t1, dst, t1);
876 		emit_zext_32(ctx, t1, is32);
877 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
878 			goto toofar;
879 		break;
880 
881 	/* PC += off */
882 	case BPF_JMP | BPF_JA:
883 	case BPF_JMP32 | BPF_JA:
884 		if (BPF_CLASS(code) == BPF_JMP)
885 			jmp_offset = bpf2la_offset(i, off, ctx);
886 		else
887 			jmp_offset = bpf2la_offset(i, imm, ctx);
888 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
889 			goto toofar;
890 		break;
891 
892 	/* function call */
893 	case BPF_JMP | BPF_CALL:
894 		mark_call(ctx);
895 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
896 					    &func_addr, &func_addr_fixed);
897 		if (ret < 0)
898 			return ret;
899 
900 		move_addr(ctx, t1, func_addr);
901 		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
902 		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
903 		break;
904 
905 	/* tail call */
906 	case BPF_JMP | BPF_TAIL_CALL:
907 		mark_tail_call(ctx);
908 		if (emit_bpf_tail_call(ctx) < 0)
909 			return -EINVAL;
910 		break;
911 
912 	/* function return */
913 	case BPF_JMP | BPF_EXIT:
914 		emit_sext_32(ctx, regmap[BPF_REG_0], true);
915 
916 		if (i == ctx->prog->len - 1)
917 			break;
918 
919 		jmp_offset = epilogue_offset(ctx);
920 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
921 			goto toofar;
922 		break;
923 
924 	/* dst = imm64 */
925 	case BPF_LD | BPF_IMM | BPF_DW:
926 		move_imm(ctx, dst, imm64, is32);
927 		return 1;
928 
929 	/* dst = *(size *)(src + off) */
930 	case BPF_LDX | BPF_MEM | BPF_B:
931 	case BPF_LDX | BPF_MEM | BPF_H:
932 	case BPF_LDX | BPF_MEM | BPF_W:
933 	case BPF_LDX | BPF_MEM | BPF_DW:
934 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
935 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
936 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
937 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
938 	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
939 	case BPF_LDX | BPF_MEMSX | BPF_B:
940 	case BPF_LDX | BPF_MEMSX | BPF_H:
941 	case BPF_LDX | BPF_MEMSX | BPF_W:
942 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
943 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
944 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
945 		sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
946 			      BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
947 		switch (BPF_SIZE(code)) {
948 		case BPF_B:
949 			if (is_signed_imm12(off)) {
950 				if (sign_extend)
951 					emit_insn(ctx, ldb, dst, src, off);
952 				else
953 					emit_insn(ctx, ldbu, dst, src, off);
954 			} else {
955 				move_imm(ctx, t1, off, is32);
956 				if (sign_extend)
957 					emit_insn(ctx, ldxb, dst, src, t1);
958 				else
959 					emit_insn(ctx, ldxbu, dst, src, t1);
960 			}
961 			break;
962 		case BPF_H:
963 			if (is_signed_imm12(off)) {
964 				if (sign_extend)
965 					emit_insn(ctx, ldh, dst, src, off);
966 				else
967 					emit_insn(ctx, ldhu, dst, src, off);
968 			} else {
969 				move_imm(ctx, t1, off, is32);
970 				if (sign_extend)
971 					emit_insn(ctx, ldxh, dst, src, t1);
972 				else
973 					emit_insn(ctx, ldxhu, dst, src, t1);
974 			}
975 			break;
976 		case BPF_W:
977 			if (is_signed_imm12(off)) {
978 				if (sign_extend)
979 					emit_insn(ctx, ldw, dst, src, off);
980 				else
981 					emit_insn(ctx, ldwu, dst, src, off);
982 			} else {
983 				move_imm(ctx, t1, off, is32);
984 				if (sign_extend)
985 					emit_insn(ctx, ldxw, dst, src, t1);
986 				else
987 					emit_insn(ctx, ldxwu, dst, src, t1);
988 			}
989 			break;
990 		case BPF_DW:
991 			if (is_signed_imm12(off)) {
992 				emit_insn(ctx, ldd, dst, src, off);
993 			} else if (is_signed_imm14(off)) {
994 				emit_insn(ctx, ldptrd, dst, src, off);
995 			} else {
996 				move_imm(ctx, t1, off, is32);
997 				emit_insn(ctx, ldxd, dst, src, t1);
998 			}
999 			break;
1000 		}
1001 
1002 		ret = add_exception_handler(insn, ctx, dst);
1003 		if (ret)
1004 			return ret;
1005 		break;
1006 
1007 	/* *(size *)(dst + off) = imm */
1008 	case BPF_ST | BPF_MEM | BPF_B:
1009 	case BPF_ST | BPF_MEM | BPF_H:
1010 	case BPF_ST | BPF_MEM | BPF_W:
1011 	case BPF_ST | BPF_MEM | BPF_DW:
1012 		switch (BPF_SIZE(code)) {
1013 		case BPF_B:
1014 			move_imm(ctx, t1, imm, is32);
1015 			if (is_signed_imm12(off)) {
1016 				emit_insn(ctx, stb, t1, dst, off);
1017 			} else {
1018 				move_imm(ctx, t2, off, is32);
1019 				emit_insn(ctx, stxb, t1, dst, t2);
1020 			}
1021 			break;
1022 		case BPF_H:
1023 			move_imm(ctx, t1, imm, is32);
1024 			if (is_signed_imm12(off)) {
1025 				emit_insn(ctx, sth, t1, dst, off);
1026 			} else {
1027 				move_imm(ctx, t2, off, is32);
1028 				emit_insn(ctx, stxh, t1, dst, t2);
1029 			}
1030 			break;
1031 		case BPF_W:
1032 			move_imm(ctx, t1, imm, is32);
1033 			if (is_signed_imm12(off)) {
1034 				emit_insn(ctx, stw, t1, dst, off);
1035 			} else if (is_signed_imm14(off)) {
1036 				emit_insn(ctx, stptrw, t1, dst, off);
1037 			} else {
1038 				move_imm(ctx, t2, off, is32);
1039 				emit_insn(ctx, stxw, t1, dst, t2);
1040 			}
1041 			break;
1042 		case BPF_DW:
1043 			move_imm(ctx, t1, imm, is32);
1044 			if (is_signed_imm12(off)) {
1045 				emit_insn(ctx, std, t1, dst, off);
1046 			} else if (is_signed_imm14(off)) {
1047 				emit_insn(ctx, stptrd, t1, dst, off);
1048 			} else {
1049 				move_imm(ctx, t2, off, is32);
1050 				emit_insn(ctx, stxd, t1, dst, t2);
1051 			}
1052 			break;
1053 		}
1054 		break;
1055 
1056 	/* *(size *)(dst + off) = src */
1057 	case BPF_STX | BPF_MEM | BPF_B:
1058 	case BPF_STX | BPF_MEM | BPF_H:
1059 	case BPF_STX | BPF_MEM | BPF_W:
1060 	case BPF_STX | BPF_MEM | BPF_DW:
1061 		switch (BPF_SIZE(code)) {
1062 		case BPF_B:
1063 			if (is_signed_imm12(off)) {
1064 				emit_insn(ctx, stb, src, dst, off);
1065 			} else {
1066 				move_imm(ctx, t1, off, is32);
1067 				emit_insn(ctx, stxb, src, dst, t1);
1068 			}
1069 			break;
1070 		case BPF_H:
1071 			if (is_signed_imm12(off)) {
1072 				emit_insn(ctx, sth, src, dst, off);
1073 			} else {
1074 				move_imm(ctx, t1, off, is32);
1075 				emit_insn(ctx, stxh, src, dst, t1);
1076 			}
1077 			break;
1078 		case BPF_W:
1079 			if (is_signed_imm12(off)) {
1080 				emit_insn(ctx, stw, src, dst, off);
1081 			} else if (is_signed_imm14(off)) {
1082 				emit_insn(ctx, stptrw, src, dst, off);
1083 			} else {
1084 				move_imm(ctx, t1, off, is32);
1085 				emit_insn(ctx, stxw, src, dst, t1);
1086 			}
1087 			break;
1088 		case BPF_DW:
1089 			if (is_signed_imm12(off)) {
1090 				emit_insn(ctx, std, src, dst, off);
1091 			} else if (is_signed_imm14(off)) {
1092 				emit_insn(ctx, stptrd, src, dst, off);
1093 			} else {
1094 				move_imm(ctx, t1, off, is32);
1095 				emit_insn(ctx, stxd, src, dst, t1);
1096 			}
1097 			break;
1098 		}
1099 		break;
1100 
1101 	case BPF_STX | BPF_ATOMIC | BPF_W:
1102 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1103 		emit_atomic(insn, ctx);
1104 		break;
1105 
1106 	/* Speculation barrier */
1107 	case BPF_ST | BPF_NOSPEC:
1108 		break;
1109 
1110 	default:
1111 		pr_err("bpf_jit: unknown opcode %02x\n", code);
1112 		return -EINVAL;
1113 	}
1114 
1115 	return 0;
1116 
1117 toofar:
1118 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1119 	return -E2BIG;
1120 }
1121 
1122 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1123 {
1124 	int i;
1125 	const struct bpf_prog *prog = ctx->prog;
1126 
1127 	for (i = 0; i < prog->len; i++) {
1128 		const struct bpf_insn *insn = &prog->insnsi[i];
1129 		int ret;
1130 
1131 		if (ctx->image == NULL)
1132 			ctx->offset[i] = ctx->idx;
1133 
1134 		ret = build_insn(insn, ctx, extra_pass);
1135 		if (ret > 0) {
1136 			i++;
1137 			if (ctx->image == NULL)
1138 				ctx->offset[i] = ctx->idx;
1139 			continue;
1140 		}
1141 		if (ret)
1142 			return ret;
1143 	}
1144 
1145 	if (ctx->image == NULL)
1146 		ctx->offset[i] = ctx->idx;
1147 
1148 	return 0;
1149 }
1150 
1151 /* Fill space with break instructions */
1152 static void jit_fill_hole(void *area, unsigned int size)
1153 {
1154 	u32 *ptr;
1155 
1156 	/* We are guaranteed to have aligned memory */
1157 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1158 		*ptr++ = INSN_BREAK;
1159 }
1160 
1161 static int validate_code(struct jit_ctx *ctx)
1162 {
1163 	int i;
1164 	union loongarch_instruction insn;
1165 
1166 	for (i = 0; i < ctx->idx; i++) {
1167 		insn = ctx->image[i];
1168 		/* Check INSN_BREAK */
1169 		if (insn.word == INSN_BREAK)
1170 			return -1;
1171 	}
1172 
1173 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1174 		return -1;
1175 
1176 	return 0;
1177 }
1178 
1179 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1180 {
1181 	bool tmp_blinded = false, extra_pass = false;
1182 	u8 *image_ptr;
1183 	int image_size, prog_size, extable_size;
1184 	struct jit_ctx ctx;
1185 	struct jit_data *jit_data;
1186 	struct bpf_binary_header *header;
1187 	struct bpf_prog *tmp, *orig_prog = prog;
1188 
1189 	/*
1190 	 * If BPF JIT was not enabled then we must fall back to
1191 	 * the interpreter.
1192 	 */
1193 	if (!prog->jit_requested)
1194 		return orig_prog;
1195 
1196 	tmp = bpf_jit_blind_constants(prog);
1197 	/*
1198 	 * If blinding was requested and we failed during blinding,
1199 	 * we must fall back to the interpreter. Otherwise, we save
1200 	 * the new JITed code.
1201 	 */
1202 	if (IS_ERR(tmp))
1203 		return orig_prog;
1204 
1205 	if (tmp != prog) {
1206 		tmp_blinded = true;
1207 		prog = tmp;
1208 	}
1209 
1210 	jit_data = prog->aux->jit_data;
1211 	if (!jit_data) {
1212 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1213 		if (!jit_data) {
1214 			prog = orig_prog;
1215 			goto out;
1216 		}
1217 		prog->aux->jit_data = jit_data;
1218 	}
1219 	if (jit_data->ctx.offset) {
1220 		ctx = jit_data->ctx;
1221 		image_ptr = jit_data->image;
1222 		header = jit_data->header;
1223 		extra_pass = true;
1224 		prog_size = sizeof(u32) * ctx.idx;
1225 		goto skip_init_ctx;
1226 	}
1227 
1228 	memset(&ctx, 0, sizeof(ctx));
1229 	ctx.prog = prog;
1230 
1231 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1232 	if (ctx.offset == NULL) {
1233 		prog = orig_prog;
1234 		goto out_offset;
1235 	}
1236 
1237 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1238 	build_prologue(&ctx);
1239 	if (build_body(&ctx, extra_pass)) {
1240 		prog = orig_prog;
1241 		goto out_offset;
1242 	}
1243 	ctx.epilogue_offset = ctx.idx;
1244 	build_epilogue(&ctx);
1245 
1246 	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1247 
1248 	/* Now we know the actual image size.
1249 	 * As each LoongArch instruction is of length 32bit,
1250 	 * we are translating number of JITed intructions into
1251 	 * the size required to store these JITed code.
1252 	 */
1253 	prog_size = sizeof(u32) * ctx.idx;
1254 	image_size = prog_size + extable_size;
1255 	/* Now we know the size of the structure to make */
1256 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1257 				      sizeof(u32), jit_fill_hole);
1258 	if (header == NULL) {
1259 		prog = orig_prog;
1260 		goto out_offset;
1261 	}
1262 
1263 	/* 2. Now, the actual pass to generate final JIT code */
1264 	ctx.image = (union loongarch_instruction *)image_ptr;
1265 	if (extable_size)
1266 		prog->aux->extable = (void *)image_ptr + prog_size;
1267 
1268 skip_init_ctx:
1269 	ctx.idx = 0;
1270 	ctx.num_exentries = 0;
1271 
1272 	build_prologue(&ctx);
1273 	if (build_body(&ctx, extra_pass)) {
1274 		bpf_jit_binary_free(header);
1275 		prog = orig_prog;
1276 		goto out_offset;
1277 	}
1278 	build_epilogue(&ctx);
1279 
1280 	/* 3. Extra pass to validate JITed code */
1281 	if (validate_code(&ctx)) {
1282 		bpf_jit_binary_free(header);
1283 		prog = orig_prog;
1284 		goto out_offset;
1285 	}
1286 
1287 	/* And we're done */
1288 	if (bpf_jit_enable > 1)
1289 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1290 
1291 	/* Update the icache */
1292 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1293 
1294 	if (!prog->is_func || extra_pass) {
1295 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1296 			pr_err_once("multi-func JIT bug %d != %d\n",
1297 				    ctx.idx, jit_data->ctx.idx);
1298 			bpf_jit_binary_free(header);
1299 			prog->bpf_func = NULL;
1300 			prog->jited = 0;
1301 			prog->jited_len = 0;
1302 			goto out_offset;
1303 		}
1304 		bpf_jit_binary_lock_ro(header);
1305 	} else {
1306 		jit_data->ctx = ctx;
1307 		jit_data->image = image_ptr;
1308 		jit_data->header = header;
1309 	}
1310 	prog->jited = 1;
1311 	prog->jited_len = prog_size;
1312 	prog->bpf_func = (void *)ctx.image;
1313 
1314 	if (!prog->is_func || extra_pass) {
1315 		int i;
1316 
1317 		/* offset[prog->len] is the size of program */
1318 		for (i = 0; i <= prog->len; i++)
1319 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1320 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1321 
1322 out_offset:
1323 		kvfree(ctx.offset);
1324 		kfree(jit_data);
1325 		prog->aux->jit_data = NULL;
1326 	}
1327 
1328 out:
1329 	if (tmp_blinded)
1330 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1331 
1332 	out_offset = -1;
1333 
1334 	return prog;
1335 }
1336 
1337 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
1338 bool bpf_jit_supports_subprog_tailcalls(void)
1339 {
1340 	return true;
1341 }
1342