1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * BPF JIT compiler for LoongArch
4 *
5 * Copyright (C) 2022 Loongson Technology Corporation Limited
6 */
7 #include <linux/memory.h>
8 #include "bpf_jit.h"
9
10 #define LOONGARCH_MAX_REG_ARGS 8
11
12 #define LOONGARCH_LONG_JUMP_NINSNS 5
13 #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
14
15 #define LOONGARCH_FENTRY_NINSNS 2
16 #define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4)
17 #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
18
19 #define REG_TCC LOONGARCH_GPR_A6
20 #define REG_ARENA LOONGARCH_GPR_S6 /* For storing arena_vm_start */
21 #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
22
23 static const int regmap[] = {
24 /* return value from in-kernel function, and exit value for eBPF program */
25 [BPF_REG_0] = LOONGARCH_GPR_A5,
26 /* arguments from eBPF program to in-kernel function */
27 [BPF_REG_1] = LOONGARCH_GPR_A0,
28 [BPF_REG_2] = LOONGARCH_GPR_A1,
29 [BPF_REG_3] = LOONGARCH_GPR_A2,
30 [BPF_REG_4] = LOONGARCH_GPR_A3,
31 [BPF_REG_5] = LOONGARCH_GPR_A4,
32 /* callee saved registers that in-kernel function will preserve */
33 [BPF_REG_6] = LOONGARCH_GPR_S0,
34 [BPF_REG_7] = LOONGARCH_GPR_S1,
35 [BPF_REG_8] = LOONGARCH_GPR_S2,
36 [BPF_REG_9] = LOONGARCH_GPR_S3,
37 /* read-only frame pointer to access stack */
38 [BPF_REG_FP] = LOONGARCH_GPR_S4,
39 /* temporary register for blinding constants */
40 [BPF_REG_AX] = LOONGARCH_GPR_T0,
41 };
42
prepare_bpf_tail_call_cnt(struct jit_ctx * ctx,int * store_offset)43 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
44 {
45 const struct bpf_prog *prog = ctx->prog;
46 const bool is_main_prog = !bpf_is_subprog(prog);
47
48 if (is_main_prog) {
49 /*
50 * LOONGARCH_GPR_T3 = MAX_TAIL_CALL_CNT
51 * if (REG_TCC > T3 )
52 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset
53 * else
54 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset
55 * REG_TCC = LOONGARCH_GPR_SP + store_offset
56 *
57 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset
58 *
59 * The purpose of this code is to first push the TCC into stack,
60 * and then push the address of TCC into stack.
61 * In cases where bpf2bpf and tailcall are used in combination,
62 * the value in REG_TCC may be a count or an address,
63 * these two cases need to be judged and handled separately.
64 */
65 emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
66 *store_offset -= sizeof(long);
67
68 emit_cond_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);
69
70 /*
71 * If REG_TCC < MAX_TAIL_CALL_CNT, the value in REG_TCC is a count,
72 * push tcc into stack
73 */
74 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
75
76 /* Push the address of TCC into the REG_TCC */
77 emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
78
79 emit_uncond_jmp(ctx, 2);
80
81 /*
82 * If REG_TCC > MAX_TAIL_CALL_CNT, the value in REG_TCC is an address,
83 * push tcc_ptr into stack
84 */
85 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
86 } else {
87 *store_offset -= sizeof(long);
88 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
89 }
90
91 /* Push tcc_ptr into stack */
92 *store_offset -= sizeof(long);
93 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
94 }
95
96 /*
97 * eBPF prog stack layout:
98 *
99 * high
100 * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
101 * | $ra |
102 * +-------------------------+
103 * | $fp |
104 * +-------------------------+
105 * | $s0 |
106 * +-------------------------+
107 * | $s1 |
108 * +-------------------------+
109 * | $s2 |
110 * +-------------------------+
111 * | $s3 |
112 * +-------------------------+
113 * | $s4 |
114 * +-------------------------+
115 * | $s5 |
116 * +-------------------------+
117 * | tcc |
118 * +-------------------------+
119 * | tcc_ptr |
120 * +-------------------------+ <--BPF_REG_FP
121 * | prog->aux->stack_depth |
122 * | (optional) |
123 * current $sp -------------> +-------------------------+
124 * low
125 */
build_prologue(struct jit_ctx * ctx)126 static void build_prologue(struct jit_ctx *ctx)
127 {
128 int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
129 const struct bpf_prog *prog = ctx->prog;
130 const bool is_main_prog = !bpf_is_subprog(prog);
131
132 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
133
134 /* To store ra, fp, s0, s1, s2, s3, s4, s5 */
135 stack_adjust += sizeof(long) * 8;
136
137 /* To store tcc and tcc_ptr */
138 stack_adjust += sizeof(long) * 2;
139
140 if (ctx->arena_vm_start)
141 stack_adjust += 8;
142
143 stack_adjust = round_up(stack_adjust, 16);
144 stack_adjust += bpf_stack_adjust;
145
146 move_reg(ctx, LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
147 /* Reserve space for the move_imm + jirl instruction */
148 for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
149 emit_insn(ctx, nop);
150
151 /*
152 * First instruction initializes the tail call count (TCC)
153 * register to zero. On tail call we skip this instruction,
154 * and the TCC is passed in REG_TCC from the caller.
155 */
156 if (is_main_prog)
157 emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
158
159 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
160
161 store_offset = stack_adjust - sizeof(long);
162 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
163
164 store_offset -= sizeof(long);
165 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
166
167 store_offset -= sizeof(long);
168 emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
169
170 store_offset -= sizeof(long);
171 emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
172
173 store_offset -= sizeof(long);
174 emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
175
176 store_offset -= sizeof(long);
177 emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
178
179 store_offset -= sizeof(long);
180 emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
181
182 store_offset -= sizeof(long);
183 emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
184
185 if (ctx->arena_vm_start) {
186 store_offset -= sizeof(long);
187 emit_insn(ctx, std, REG_ARENA, LOONGARCH_GPR_SP, store_offset);
188 }
189
190 prepare_bpf_tail_call_cnt(ctx, &store_offset);
191
192 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
193
194 if (bpf_stack_adjust)
195 emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
196
197 ctx->stack_size = stack_adjust;
198
199 if (ctx->arena_vm_start)
200 move_imm(ctx, REG_ARENA, ctx->arena_vm_start, false);
201 }
202
__build_epilogue(struct jit_ctx * ctx,bool is_tail_call)203 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
204 {
205 int stack_adjust = ctx->stack_size;
206 int load_offset;
207
208 load_offset = stack_adjust - sizeof(long);
209 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
210
211 load_offset -= sizeof(long);
212 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
213
214 load_offset -= sizeof(long);
215 emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
216
217 load_offset -= sizeof(long);
218 emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
219
220 load_offset -= sizeof(long);
221 emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
222
223 load_offset -= sizeof(long);
224 emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
225
226 load_offset -= sizeof(long);
227 emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
228
229 load_offset -= sizeof(long);
230 emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
231
232 if (ctx->arena_vm_start) {
233 load_offset -= sizeof(long);
234 emit_insn(ctx, ldd, REG_ARENA, LOONGARCH_GPR_SP, load_offset);
235 }
236
237 /*
238 * When push into the stack, follow the order of tcc then tcc_ptr.
239 * When pop from the stack, first pop tcc_ptr then followed by tcc.
240 */
241 load_offset -= 2 * sizeof(long);
242 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
243
244 load_offset += sizeof(long);
245 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
246
247 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
248
249 if (!is_tail_call) {
250 /* Set return value */
251 emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0);
252 /* Return to the caller */
253 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
254 } else {
255 /*
256 * Call the next bpf prog and skip the first instruction
257 * of TCC initialization.
258 */
259 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 7);
260 }
261 }
262
build_epilogue(struct jit_ctx * ctx)263 static void build_epilogue(struct jit_ctx *ctx)
264 {
265 __build_epilogue(ctx, false);
266 }
267
bpf_jit_supports_kfunc_call(void)268 bool bpf_jit_supports_kfunc_call(void)
269 {
270 return true;
271 }
272
bpf_jit_supports_far_kfunc_call(void)273 bool bpf_jit_supports_far_kfunc_call(void)
274 {
275 return true;
276 }
277
emit_bpf_tail_call(struct jit_ctx * ctx,int insn)278 static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
279 {
280 int off, tc_ninsn = 0;
281 int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
282 u8 a1 = LOONGARCH_GPR_A1;
283 u8 a2 = LOONGARCH_GPR_A2;
284 u8 t1 = LOONGARCH_GPR_T1;
285 u8 t2 = LOONGARCH_GPR_T2;
286 u8 t3 = LOONGARCH_GPR_T3;
287 const int idx0 = ctx->idx;
288
289 #define cur_offset (ctx->idx - idx0)
290 #define jmp_offset (tc_ninsn - (cur_offset))
291
292 /*
293 * a0: &ctx
294 * a1: &array
295 * a2: index
296 *
297 * if (index >= array->map.max_entries)
298 * goto out;
299 */
300 tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0];
301 emit_zext_32(ctx, a2, true);
302
303 off = offsetof(struct bpf_array, map.max_entries);
304 emit_insn(ctx, ldwu, t1, a1, off);
305 /* bgeu $a2, $t1, jmp_offset */
306 if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
307 goto toofar;
308
309 /*
310 * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
311 * goto out;
312 */
313 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
314 emit_insn(ctx, ldd, t3, REG_TCC, 0);
315 emit_insn(ctx, addid, t3, t3, 1);
316 emit_insn(ctx, std, t3, REG_TCC, 0);
317 emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
318 if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
319 goto toofar;
320
321 /*
322 * prog = array->ptrs[index];
323 * if (!prog)
324 * goto out;
325 */
326 emit_insn(ctx, alsld, t2, a2, a1, 2);
327 off = offsetof(struct bpf_array, ptrs);
328 emit_insn(ctx, ldd, t2, t2, off);
329 /* beq $t2, $zero, jmp_offset */
330 if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
331 goto toofar;
332
333 /* goto *(prog->bpf_func + 4); */
334 off = offsetof(struct bpf_prog, bpf_func);
335 emit_insn(ctx, ldd, t3, t2, off);
336 __build_epilogue(ctx, true);
337
338 return 0;
339
340 toofar:
341 pr_info_once("tail_call: jump too far\n");
342 return -1;
343 #undef cur_offset
344 #undef jmp_offset
345 }
346
emit_store_stack_imm64(struct jit_ctx * ctx,int reg,int stack_off,u64 imm64)347 static void emit_store_stack_imm64(struct jit_ctx *ctx, int reg, int stack_off, u64 imm64)
348 {
349 move_imm(ctx, reg, imm64, false);
350 emit_insn(ctx, std, reg, LOONGARCH_GPR_FP, stack_off);
351 }
352
emit_atomic_rmw(const struct bpf_insn * insn,struct jit_ctx * ctx)353 static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx)
354 {
355 const u8 t1 = LOONGARCH_GPR_T1;
356 const u8 t2 = LOONGARCH_GPR_T2;
357 const u8 t3 = LOONGARCH_GPR_T3;
358 const u8 r0 = regmap[BPF_REG_0];
359 const u8 src = regmap[insn->src_reg];
360 const u8 dst = regmap[insn->dst_reg];
361 const s16 off = insn->off;
362 const s32 imm = insn->imm;
363 const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
364
365 move_imm(ctx, t1, off, false);
366 emit_insn(ctx, addd, t1, dst, t1);
367 move_reg(ctx, t3, src);
368
369 switch (imm) {
370 /* lock *(size *)(dst + off) <op>= src */
371 case BPF_ADD:
372 switch (BPF_SIZE(insn->code)) {
373 case BPF_B:
374 if (!cpu_has_lam_bh) {
375 pr_err_once("bpf-jit: amadd.b instruction is not supported\n");
376 return -EINVAL;
377 }
378 emit_insn(ctx, amaddb, t2, t1, src);
379 break;
380 case BPF_H:
381 if (!cpu_has_lam_bh) {
382 pr_err_once("bpf-jit: amadd.h instruction is not supported\n");
383 return -EINVAL;
384 }
385 emit_insn(ctx, amaddh, t2, t1, src);
386 break;
387 case BPF_W:
388 emit_insn(ctx, amaddw, t2, t1, src);
389 break;
390 case BPF_DW:
391 emit_insn(ctx, amaddd, t2, t1, src);
392 break;
393 }
394 break;
395 case BPF_AND:
396 if (isdw)
397 emit_insn(ctx, amandd, t2, t1, src);
398 else
399 emit_insn(ctx, amandw, t2, t1, src);
400 break;
401 case BPF_OR:
402 if (isdw)
403 emit_insn(ctx, amord, t2, t1, src);
404 else
405 emit_insn(ctx, amorw, t2, t1, src);
406 break;
407 case BPF_XOR:
408 if (isdw)
409 emit_insn(ctx, amxord, t2, t1, src);
410 else
411 emit_insn(ctx, amxorw, t2, t1, src);
412 break;
413 /* src = atomic_fetch_<op>(dst + off, src) */
414 case BPF_ADD | BPF_FETCH:
415 switch (BPF_SIZE(insn->code)) {
416 case BPF_B:
417 if (!cpu_has_lam_bh) {
418 pr_err_once("bpf-jit: amadd.b instruction is not supported\n");
419 return -EINVAL;
420 }
421 emit_insn(ctx, amaddb, src, t1, t3);
422 emit_zext_32(ctx, src, true);
423 break;
424 case BPF_H:
425 if (!cpu_has_lam_bh) {
426 pr_err_once("bpf-jit: amadd.h instruction is not supported\n");
427 return -EINVAL;
428 }
429 emit_insn(ctx, amaddh, src, t1, t3);
430 emit_zext_32(ctx, src, true);
431 break;
432 case BPF_W:
433 emit_insn(ctx, amaddw, src, t1, t3);
434 emit_zext_32(ctx, src, true);
435 break;
436 case BPF_DW:
437 emit_insn(ctx, amaddd, src, t1, t3);
438 break;
439 }
440 break;
441 case BPF_AND | BPF_FETCH:
442 if (isdw) {
443 emit_insn(ctx, amandd, src, t1, t3);
444 } else {
445 emit_insn(ctx, amandw, src, t1, t3);
446 emit_zext_32(ctx, src, true);
447 }
448 break;
449 case BPF_OR | BPF_FETCH:
450 if (isdw) {
451 emit_insn(ctx, amord, src, t1, t3);
452 } else {
453 emit_insn(ctx, amorw, src, t1, t3);
454 emit_zext_32(ctx, src, true);
455 }
456 break;
457 case BPF_XOR | BPF_FETCH:
458 if (isdw) {
459 emit_insn(ctx, amxord, src, t1, t3);
460 } else {
461 emit_insn(ctx, amxorw, src, t1, t3);
462 emit_zext_32(ctx, src, true);
463 }
464 break;
465 /* src = atomic_xchg(dst + off, src); */
466 case BPF_XCHG:
467 switch (BPF_SIZE(insn->code)) {
468 case BPF_B:
469 if (!cpu_has_lam_bh) {
470 pr_err_once("bpf-jit: amswap.b instruction is not supported\n");
471 return -EINVAL;
472 }
473 emit_insn(ctx, amswapb, src, t1, t3);
474 emit_zext_32(ctx, src, true);
475 break;
476 case BPF_H:
477 if (!cpu_has_lam_bh) {
478 pr_err_once("bpf-jit: amswap.h instruction is not supported\n");
479 return -EINVAL;
480 }
481 emit_insn(ctx, amswaph, src, t1, t3);
482 emit_zext_32(ctx, src, true);
483 break;
484 case BPF_W:
485 emit_insn(ctx, amswapw, src, t1, t3);
486 emit_zext_32(ctx, src, true);
487 break;
488 case BPF_DW:
489 emit_insn(ctx, amswapd, src, t1, t3);
490 break;
491 }
492 break;
493 /* r0 = atomic_cmpxchg(dst + off, r0, src); */
494 case BPF_CMPXCHG:
495 move_reg(ctx, t2, r0);
496 if (isdw) {
497 emit_insn(ctx, lld, r0, t1, 0);
498 emit_insn(ctx, bne, t2, r0, 4);
499 move_reg(ctx, t3, src);
500 emit_insn(ctx, scd, t3, t1, 0);
501 emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
502 } else {
503 emit_insn(ctx, llw, r0, t1, 0);
504 emit_zext_32(ctx, t2, true);
505 emit_zext_32(ctx, r0, true);
506 emit_insn(ctx, bne, t2, r0, 4);
507 move_reg(ctx, t3, src);
508 emit_insn(ctx, scw, t3, t1, 0);
509 emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
510 emit_zext_32(ctx, r0, true);
511 }
512 break;
513 default:
514 pr_err_once("bpf-jit: invalid atomic read-modify-write opcode %02x\n", imm);
515 return -EINVAL;
516 }
517
518 return 0;
519 }
520
emit_atomic_ld_st(const struct bpf_insn * insn,struct jit_ctx * ctx)521 static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
522 {
523 const u8 t1 = LOONGARCH_GPR_T1;
524 const u8 src = regmap[insn->src_reg];
525 const u8 dst = regmap[insn->dst_reg];
526 const s16 off = insn->off;
527 const s32 imm = insn->imm;
528
529 switch (imm) {
530 /* dst_reg = load_acquire(src_reg + off16) */
531 case BPF_LOAD_ACQ:
532 switch (BPF_SIZE(insn->code)) {
533 case BPF_B:
534 if (is_signed_imm12(off)) {
535 emit_insn(ctx, ldbu, dst, src, off);
536 } else {
537 move_imm(ctx, t1, off, false);
538 emit_insn(ctx, ldxbu, dst, src, t1);
539 }
540 break;
541 case BPF_H:
542 if (is_signed_imm12(off)) {
543 emit_insn(ctx, ldhu, dst, src, off);
544 } else {
545 move_imm(ctx, t1, off, false);
546 emit_insn(ctx, ldxhu, dst, src, t1);
547 }
548 break;
549 case BPF_W:
550 if (is_signed_imm12(off)) {
551 emit_insn(ctx, ldwu, dst, src, off);
552 } else {
553 move_imm(ctx, t1, off, false);
554 emit_insn(ctx, ldxwu, dst, src, t1);
555 }
556 break;
557 case BPF_DW:
558 if (is_signed_imm12(off)) {
559 emit_insn(ctx, ldd, dst, src, off);
560 } else {
561 move_imm(ctx, t1, off, false);
562 emit_insn(ctx, ldxd, dst, src, t1);
563 }
564 break;
565 }
566 emit_insn(ctx, dbar, 0b10100);
567 break;
568 /* store_release(dst_reg + off16, src_reg) */
569 case BPF_STORE_REL:
570 emit_insn(ctx, dbar, 0b10010);
571 switch (BPF_SIZE(insn->code)) {
572 case BPF_B:
573 if (is_signed_imm12(off)) {
574 emit_insn(ctx, stb, src, dst, off);
575 } else {
576 move_imm(ctx, t1, off, false);
577 emit_insn(ctx, stxb, src, dst, t1);
578 }
579 break;
580 case BPF_H:
581 if (is_signed_imm12(off)) {
582 emit_insn(ctx, sth, src, dst, off);
583 } else {
584 move_imm(ctx, t1, off, false);
585 emit_insn(ctx, stxh, src, dst, t1);
586 }
587 break;
588 case BPF_W:
589 if (is_signed_imm12(off)) {
590 emit_insn(ctx, stw, src, dst, off);
591 } else {
592 move_imm(ctx, t1, off, false);
593 emit_insn(ctx, stxw, src, dst, t1);
594 }
595 break;
596 case BPF_DW:
597 if (is_signed_imm12(off)) {
598 emit_insn(ctx, std, src, dst, off);
599 } else {
600 move_imm(ctx, t1, off, false);
601 emit_insn(ctx, stxd, src, dst, t1);
602 }
603 break;
604 }
605 break;
606 default:
607 pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
608 return -EINVAL;
609 }
610
611 return 0;
612 }
613
is_signed_bpf_cond(u8 cond)614 static bool is_signed_bpf_cond(u8 cond)
615 {
616 return cond == BPF_JSGT || cond == BPF_JSLT ||
617 cond == BPF_JSGE || cond == BPF_JSLE;
618 }
619
620 #define BPF_FIXUP_REG_MASK GENMASK(31, 27)
621 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
622 #define REG_DONT_CLEAR_MARKER 0
623
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)624 bool ex_handler_bpf(const struct exception_table_entry *ex,
625 struct pt_regs *regs)
626 {
627 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
628 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
629
630 if (dst_reg != REG_DONT_CLEAR_MARKER)
631 regs->regs[dst_reg] = 0;
632 regs->csr_era = (unsigned long)&ex->fixup - offset;
633
634 return true;
635 }
636
637 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)638 static int add_exception_handler(const struct bpf_insn *insn,
639 struct jit_ctx *ctx,
640 int dst_reg)
641 {
642 unsigned long pc;
643 off_t ins_offset, fixup_offset;
644 struct exception_table_entry *ex;
645
646 if (!ctx->image || !ctx->ro_image || !ctx->prog->aux->extable)
647 return 0;
648
649 if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
650 BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
651 BPF_MODE(insn->code) != BPF_PROBE_MEM32)
652 return 0;
653
654 if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
655 return -EINVAL;
656
657 ex = &ctx->prog->aux->extable[ctx->num_exentries];
658 pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
659
660 /*
661 * This is the relative offset of the instruction that may fault from
662 * the exception table itself. This will be written to the exception
663 * table and if this instruction faults, the destination register will
664 * be set to '0' and the execution will jump to the next instruction.
665 */
666 ins_offset = pc - (long)&ex->insn;
667 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
668 return -ERANGE;
669
670 /*
671 * Since the extable follows the program, the fixup offset is always
672 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
673 * to keep things simple, and put the destination register in the upper
674 * bits. We don't need to worry about buildtime or runtime sort
675 * modifying the upper bits because the table is already sorted, and
676 * isn't part of the main exception table.
677 *
678 * The fixup_offset is set to the next instruction from the instruction
679 * that may fault. The execution will jump to this after handling the fault.
680 */
681 fixup_offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
682 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
683 return -ERANGE;
684
685 /*
686 * The offsets above have been calculated using the RO buffer but we
687 * need to use the R/W buffer for writes. Switch ex to rw buffer for writing.
688 */
689 ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
690 ex->insn = ins_offset;
691 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
692 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
693 ex->type = EX_TYPE_BPF;
694
695 ctx->num_exentries++;
696
697 return 0;
698 }
699
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)700 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
701 {
702 u8 tm = -1;
703 u64 func_addr;
704 bool func_addr_fixed, sign_extend;
705 int i = insn - ctx->prog->insnsi;
706 int ret, jmp_offset, tcc_ptr_off;
707 const u8 code = insn->code;
708 const u8 cond = BPF_OP(code);
709 const u8 t1 = LOONGARCH_GPR_T1;
710 const u8 t2 = LOONGARCH_GPR_T2;
711 const u8 t3 = LOONGARCH_GPR_T3;
712 u8 src = regmap[insn->src_reg];
713 u8 dst = regmap[insn->dst_reg];
714 const s16 off = insn->off;
715 const s32 imm = insn->imm;
716 const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
717
718 switch (code) {
719 /* dst = src */
720 case BPF_ALU | BPF_MOV | BPF_X:
721 case BPF_ALU64 | BPF_MOV | BPF_X:
722 if (insn_is_cast_user(insn)) {
723 move_reg(ctx, t1, src);
724 emit_zext_32(ctx, t1, true);
725 move_imm(ctx, dst, (ctx->user_vm_start >> 32) << 32, false);
726 emit_insn(ctx, beq, t1, LOONGARCH_GPR_ZERO, 1);
727 emit_insn(ctx, or, t1, dst, t1);
728 move_reg(ctx, dst, t1);
729 break;
730 }
731 switch (off) {
732 case 0:
733 move_reg(ctx, dst, src);
734 emit_zext_32(ctx, dst, is32);
735 break;
736 case 8:
737 emit_insn(ctx, extwb, dst, src);
738 emit_zext_32(ctx, dst, is32);
739 break;
740 case 16:
741 emit_insn(ctx, extwh, dst, src);
742 emit_zext_32(ctx, dst, is32);
743 break;
744 case 32:
745 emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
746 break;
747 }
748 break;
749
750 /* dst = imm */
751 case BPF_ALU | BPF_MOV | BPF_K:
752 case BPF_ALU64 | BPF_MOV | BPF_K:
753 move_imm(ctx, dst, imm, is32);
754 break;
755
756 /* dst = dst + src */
757 case BPF_ALU | BPF_ADD | BPF_X:
758 case BPF_ALU64 | BPF_ADD | BPF_X:
759 emit_insn(ctx, addd, dst, dst, src);
760 emit_zext_32(ctx, dst, is32);
761 break;
762
763 /* dst = dst + imm */
764 case BPF_ALU | BPF_ADD | BPF_K:
765 case BPF_ALU64 | BPF_ADD | BPF_K:
766 if (is_signed_imm12(imm)) {
767 emit_insn(ctx, addid, dst, dst, imm);
768 } else {
769 move_imm(ctx, t1, imm, is32);
770 emit_insn(ctx, addd, dst, dst, t1);
771 }
772 emit_zext_32(ctx, dst, is32);
773 break;
774
775 /* dst = dst - src */
776 case BPF_ALU | BPF_SUB | BPF_X:
777 case BPF_ALU64 | BPF_SUB | BPF_X:
778 emit_insn(ctx, subd, dst, dst, src);
779 emit_zext_32(ctx, dst, is32);
780 break;
781
782 /* dst = dst - imm */
783 case BPF_ALU | BPF_SUB | BPF_K:
784 case BPF_ALU64 | BPF_SUB | BPF_K:
785 if (is_signed_imm12(-imm)) {
786 emit_insn(ctx, addid, dst, dst, -imm);
787 } else {
788 move_imm(ctx, t1, imm, is32);
789 emit_insn(ctx, subd, dst, dst, t1);
790 }
791 emit_zext_32(ctx, dst, is32);
792 break;
793
794 /* dst = dst * src */
795 case BPF_ALU | BPF_MUL | BPF_X:
796 case BPF_ALU64 | BPF_MUL | BPF_X:
797 emit_insn(ctx, muld, dst, dst, src);
798 emit_zext_32(ctx, dst, is32);
799 break;
800
801 /* dst = dst * imm */
802 case BPF_ALU | BPF_MUL | BPF_K:
803 case BPF_ALU64 | BPF_MUL | BPF_K:
804 move_imm(ctx, t1, imm, is32);
805 emit_insn(ctx, muld, dst, dst, t1);
806 emit_zext_32(ctx, dst, is32);
807 break;
808
809 /* dst = dst / src */
810 case BPF_ALU | BPF_DIV | BPF_X:
811 case BPF_ALU64 | BPF_DIV | BPF_X:
812 if (!off) {
813 emit_zext_32(ctx, dst, is32);
814 move_reg(ctx, t1, src);
815 emit_zext_32(ctx, t1, is32);
816 emit_insn(ctx, divdu, dst, dst, t1);
817 emit_zext_32(ctx, dst, is32);
818 } else {
819 emit_sext_32(ctx, dst, is32);
820 move_reg(ctx, t1, src);
821 emit_sext_32(ctx, t1, is32);
822 emit_insn(ctx, divd, dst, dst, t1);
823 emit_sext_32(ctx, dst, is32);
824 }
825 break;
826
827 /* dst = dst / imm */
828 case BPF_ALU | BPF_DIV | BPF_K:
829 case BPF_ALU64 | BPF_DIV | BPF_K:
830 if (!off) {
831 move_imm(ctx, t1, imm, is32);
832 emit_zext_32(ctx, dst, is32);
833 emit_insn(ctx, divdu, dst, dst, t1);
834 emit_zext_32(ctx, dst, is32);
835 } else {
836 move_imm(ctx, t1, imm, false);
837 emit_sext_32(ctx, t1, is32);
838 emit_sext_32(ctx, dst, is32);
839 emit_insn(ctx, divd, dst, dst, t1);
840 emit_sext_32(ctx, dst, is32);
841 }
842 break;
843
844 /* dst = dst % src */
845 case BPF_ALU | BPF_MOD | BPF_X:
846 case BPF_ALU64 | BPF_MOD | BPF_X:
847 if (!off) {
848 emit_zext_32(ctx, dst, is32);
849 move_reg(ctx, t1, src);
850 emit_zext_32(ctx, t1, is32);
851 emit_insn(ctx, moddu, dst, dst, t1);
852 emit_zext_32(ctx, dst, is32);
853 } else {
854 emit_sext_32(ctx, dst, is32);
855 move_reg(ctx, t1, src);
856 emit_sext_32(ctx, t1, is32);
857 emit_insn(ctx, modd, dst, dst, t1);
858 emit_sext_32(ctx, dst, is32);
859 }
860 break;
861
862 /* dst = dst % imm */
863 case BPF_ALU | BPF_MOD | BPF_K:
864 case BPF_ALU64 | BPF_MOD | BPF_K:
865 if (!off) {
866 move_imm(ctx, t1, imm, is32);
867 emit_zext_32(ctx, dst, is32);
868 emit_insn(ctx, moddu, dst, dst, t1);
869 emit_zext_32(ctx, dst, is32);
870 } else {
871 move_imm(ctx, t1, imm, false);
872 emit_sext_32(ctx, t1, is32);
873 emit_sext_32(ctx, dst, is32);
874 emit_insn(ctx, modd, dst, dst, t1);
875 emit_sext_32(ctx, dst, is32);
876 }
877 break;
878
879 /* dst = -dst */
880 case BPF_ALU | BPF_NEG:
881 case BPF_ALU64 | BPF_NEG:
882 move_imm(ctx, t1, imm, is32);
883 emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
884 emit_zext_32(ctx, dst, is32);
885 break;
886
887 /* dst = dst & src */
888 case BPF_ALU | BPF_AND | BPF_X:
889 case BPF_ALU64 | BPF_AND | BPF_X:
890 emit_insn(ctx, and, dst, dst, src);
891 emit_zext_32(ctx, dst, is32);
892 break;
893
894 /* dst = dst & imm */
895 case BPF_ALU | BPF_AND | BPF_K:
896 case BPF_ALU64 | BPF_AND | BPF_K:
897 if (is_unsigned_imm12(imm)) {
898 emit_insn(ctx, andi, dst, dst, imm);
899 } else {
900 move_imm(ctx, t1, imm, is32);
901 emit_insn(ctx, and, dst, dst, t1);
902 }
903 emit_zext_32(ctx, dst, is32);
904 break;
905
906 /* dst = dst | src */
907 case BPF_ALU | BPF_OR | BPF_X:
908 case BPF_ALU64 | BPF_OR | BPF_X:
909 emit_insn(ctx, or, dst, dst, src);
910 emit_zext_32(ctx, dst, is32);
911 break;
912
913 /* dst = dst | imm */
914 case BPF_ALU | BPF_OR | BPF_K:
915 case BPF_ALU64 | BPF_OR | BPF_K:
916 if (is_unsigned_imm12(imm)) {
917 emit_insn(ctx, ori, dst, dst, imm);
918 } else {
919 move_imm(ctx, t1, imm, is32);
920 emit_insn(ctx, or, dst, dst, t1);
921 }
922 emit_zext_32(ctx, dst, is32);
923 break;
924
925 /* dst = dst ^ src */
926 case BPF_ALU | BPF_XOR | BPF_X:
927 case BPF_ALU64 | BPF_XOR | BPF_X:
928 emit_insn(ctx, xor, dst, dst, src);
929 emit_zext_32(ctx, dst, is32);
930 break;
931
932 /* dst = dst ^ imm */
933 case BPF_ALU | BPF_XOR | BPF_K:
934 case BPF_ALU64 | BPF_XOR | BPF_K:
935 if (is_unsigned_imm12(imm)) {
936 emit_insn(ctx, xori, dst, dst, imm);
937 } else {
938 move_imm(ctx, t1, imm, is32);
939 emit_insn(ctx, xor, dst, dst, t1);
940 }
941 emit_zext_32(ctx, dst, is32);
942 break;
943
944 /* dst = dst << src (logical) */
945 case BPF_ALU | BPF_LSH | BPF_X:
946 emit_insn(ctx, sllw, dst, dst, src);
947 emit_zext_32(ctx, dst, is32);
948 break;
949
950 case BPF_ALU64 | BPF_LSH | BPF_X:
951 emit_insn(ctx, slld, dst, dst, src);
952 break;
953
954 /* dst = dst << imm (logical) */
955 case BPF_ALU | BPF_LSH | BPF_K:
956 emit_insn(ctx, slliw, dst, dst, imm);
957 emit_zext_32(ctx, dst, is32);
958 break;
959
960 case BPF_ALU64 | BPF_LSH | BPF_K:
961 emit_insn(ctx, sllid, dst, dst, imm);
962 break;
963
964 /* dst = dst >> src (logical) */
965 case BPF_ALU | BPF_RSH | BPF_X:
966 emit_insn(ctx, srlw, dst, dst, src);
967 emit_zext_32(ctx, dst, is32);
968 break;
969
970 case BPF_ALU64 | BPF_RSH | BPF_X:
971 emit_insn(ctx, srld, dst, dst, src);
972 break;
973
974 /* dst = dst >> imm (logical) */
975 case BPF_ALU | BPF_RSH | BPF_K:
976 emit_insn(ctx, srliw, dst, dst, imm);
977 emit_zext_32(ctx, dst, is32);
978 break;
979
980 case BPF_ALU64 | BPF_RSH | BPF_K:
981 emit_insn(ctx, srlid, dst, dst, imm);
982 break;
983
984 /* dst = dst >> src (arithmetic) */
985 case BPF_ALU | BPF_ARSH | BPF_X:
986 emit_insn(ctx, sraw, dst, dst, src);
987 emit_zext_32(ctx, dst, is32);
988 break;
989
990 case BPF_ALU64 | BPF_ARSH | BPF_X:
991 emit_insn(ctx, srad, dst, dst, src);
992 break;
993
994 /* dst = dst >> imm (arithmetic) */
995 case BPF_ALU | BPF_ARSH | BPF_K:
996 emit_insn(ctx, sraiw, dst, dst, imm);
997 emit_zext_32(ctx, dst, is32);
998 break;
999
1000 case BPF_ALU64 | BPF_ARSH | BPF_K:
1001 emit_insn(ctx, sraid, dst, dst, imm);
1002 break;
1003
1004 /* dst = BSWAP##imm(dst) */
1005 case BPF_ALU | BPF_END | BPF_FROM_LE:
1006 switch (imm) {
1007 case 16:
1008 /* zero-extend 16 bits into 64 bits */
1009 emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
1010 break;
1011 case 32:
1012 /* zero-extend 32 bits into 64 bits */
1013 emit_zext_32(ctx, dst, is32);
1014 break;
1015 case 64:
1016 /* do nothing */
1017 break;
1018 }
1019 break;
1020
1021 case BPF_ALU | BPF_END | BPF_FROM_BE:
1022 case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1023 switch (imm) {
1024 case 16:
1025 emit_insn(ctx, revb2h, dst, dst);
1026 /* zero-extend 16 bits into 64 bits */
1027 emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
1028 break;
1029 case 32:
1030 emit_insn(ctx, revb2w, dst, dst);
1031 /* clear the upper 32 bits */
1032 emit_zext_32(ctx, dst, true);
1033 break;
1034 case 64:
1035 emit_insn(ctx, revbd, dst, dst);
1036 break;
1037 }
1038 break;
1039
1040 /* PC += off if dst cond src */
1041 case BPF_JMP | BPF_JEQ | BPF_X:
1042 case BPF_JMP | BPF_JNE | BPF_X:
1043 case BPF_JMP | BPF_JGT | BPF_X:
1044 case BPF_JMP | BPF_JGE | BPF_X:
1045 case BPF_JMP | BPF_JLT | BPF_X:
1046 case BPF_JMP | BPF_JLE | BPF_X:
1047 case BPF_JMP | BPF_JSGT | BPF_X:
1048 case BPF_JMP | BPF_JSGE | BPF_X:
1049 case BPF_JMP | BPF_JSLT | BPF_X:
1050 case BPF_JMP | BPF_JSLE | BPF_X:
1051 case BPF_JMP32 | BPF_JEQ | BPF_X:
1052 case BPF_JMP32 | BPF_JNE | BPF_X:
1053 case BPF_JMP32 | BPF_JGT | BPF_X:
1054 case BPF_JMP32 | BPF_JGE | BPF_X:
1055 case BPF_JMP32 | BPF_JLT | BPF_X:
1056 case BPF_JMP32 | BPF_JLE | BPF_X:
1057 case BPF_JMP32 | BPF_JSGT | BPF_X:
1058 case BPF_JMP32 | BPF_JSGE | BPF_X:
1059 case BPF_JMP32 | BPF_JSLT | BPF_X:
1060 case BPF_JMP32 | BPF_JSLE | BPF_X:
1061 jmp_offset = bpf2la_offset(i, off, ctx);
1062 move_reg(ctx, t1, dst);
1063 move_reg(ctx, t2, src);
1064 if (is_signed_bpf_cond(BPF_OP(code))) {
1065 emit_sext_32(ctx, t1, is32);
1066 emit_sext_32(ctx, t2, is32);
1067 } else {
1068 emit_zext_32(ctx, t1, is32);
1069 emit_zext_32(ctx, t2, is32);
1070 }
1071 if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
1072 goto toofar;
1073 break;
1074
1075 /* PC += off if dst cond imm */
1076 case BPF_JMP | BPF_JEQ | BPF_K:
1077 case BPF_JMP | BPF_JNE | BPF_K:
1078 case BPF_JMP | BPF_JGT | BPF_K:
1079 case BPF_JMP | BPF_JGE | BPF_K:
1080 case BPF_JMP | BPF_JLT | BPF_K:
1081 case BPF_JMP | BPF_JLE | BPF_K:
1082 case BPF_JMP | BPF_JSGT | BPF_K:
1083 case BPF_JMP | BPF_JSGE | BPF_K:
1084 case BPF_JMP | BPF_JSLT | BPF_K:
1085 case BPF_JMP | BPF_JSLE | BPF_K:
1086 case BPF_JMP32 | BPF_JEQ | BPF_K:
1087 case BPF_JMP32 | BPF_JNE | BPF_K:
1088 case BPF_JMP32 | BPF_JGT | BPF_K:
1089 case BPF_JMP32 | BPF_JGE | BPF_K:
1090 case BPF_JMP32 | BPF_JLT | BPF_K:
1091 case BPF_JMP32 | BPF_JLE | BPF_K:
1092 case BPF_JMP32 | BPF_JSGT | BPF_K:
1093 case BPF_JMP32 | BPF_JSGE | BPF_K:
1094 case BPF_JMP32 | BPF_JSLT | BPF_K:
1095 case BPF_JMP32 | BPF_JSLE | BPF_K:
1096 jmp_offset = bpf2la_offset(i, off, ctx);
1097 if (imm) {
1098 move_imm(ctx, t1, imm, false);
1099 tm = t1;
1100 } else {
1101 /* If imm is 0, simply use zero register. */
1102 tm = LOONGARCH_GPR_ZERO;
1103 }
1104 move_reg(ctx, t2, dst);
1105 if (is_signed_bpf_cond(BPF_OP(code))) {
1106 emit_sext_32(ctx, tm, is32);
1107 emit_sext_32(ctx, t2, is32);
1108 } else {
1109 emit_zext_32(ctx, tm, is32);
1110 emit_zext_32(ctx, t2, is32);
1111 }
1112 if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
1113 goto toofar;
1114 break;
1115
1116 /* PC += off if dst & src */
1117 case BPF_JMP | BPF_JSET | BPF_X:
1118 case BPF_JMP32 | BPF_JSET | BPF_X:
1119 jmp_offset = bpf2la_offset(i, off, ctx);
1120 emit_insn(ctx, and, t1, dst, src);
1121 emit_zext_32(ctx, t1, is32);
1122 if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
1123 goto toofar;
1124 break;
1125
1126 /* PC += off if dst & imm */
1127 case BPF_JMP | BPF_JSET | BPF_K:
1128 case BPF_JMP32 | BPF_JSET | BPF_K:
1129 jmp_offset = bpf2la_offset(i, off, ctx);
1130 move_imm(ctx, t1, imm, is32);
1131 emit_insn(ctx, and, t1, dst, t1);
1132 emit_zext_32(ctx, t1, is32);
1133 if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
1134 goto toofar;
1135 break;
1136
1137 /* PC += off */
1138 case BPF_JMP | BPF_JA:
1139 case BPF_JMP32 | BPF_JA:
1140 if (BPF_CLASS(code) == BPF_JMP)
1141 jmp_offset = bpf2la_offset(i, off, ctx);
1142 else
1143 jmp_offset = bpf2la_offset(i, imm, ctx);
1144 if (emit_uncond_jmp(ctx, jmp_offset) < 0)
1145 goto toofar;
1146 break;
1147
1148 /* function call */
1149 case BPF_JMP | BPF_CALL:
1150 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1151 &func_addr, &func_addr_fixed);
1152 if (ret < 0)
1153 return ret;
1154
1155 if (insn->src_reg == BPF_PSEUDO_CALL) {
1156 tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
1157 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
1158 }
1159
1160 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1161 const struct btf_func_model *m;
1162 int i;
1163
1164 m = bpf_jit_find_kfunc_model(ctx->prog, insn);
1165 if (!m)
1166 return -EINVAL;
1167
1168 for (i = 0; i < m->nr_args; i++) {
1169 u8 reg = regmap[BPF_REG_1 + i];
1170 bool sign = m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG;
1171
1172 emit_abi_ext(ctx, reg, m->arg_size[i], sign);
1173 }
1174 }
1175
1176 move_addr(ctx, t1, func_addr);
1177 emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
1178
1179 if (insn->src_reg != BPF_PSEUDO_CALL)
1180 move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
1181
1182 break;
1183
1184 /* tail call */
1185 case BPF_JMP | BPF_TAIL_CALL:
1186 if (emit_bpf_tail_call(ctx, i) < 0)
1187 return -EINVAL;
1188 break;
1189
1190 /* function return */
1191 case BPF_JMP | BPF_EXIT:
1192 if (i == ctx->prog->len - 1)
1193 break;
1194
1195 jmp_offset = epilogue_offset(ctx);
1196 if (emit_uncond_jmp(ctx, jmp_offset) < 0)
1197 goto toofar;
1198 break;
1199
1200 /* dst = imm64 */
1201 case BPF_LD | BPF_IMM | BPF_DW:
1202 {
1203 const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
1204
1205 if (bpf_pseudo_func(insn))
1206 move_addr(ctx, dst, imm64);
1207 else
1208 move_imm(ctx, dst, imm64, is32);
1209 return 1;
1210 }
1211
1212 /* dst = *(size *)(src + off) */
1213 case BPF_LDX | BPF_MEM | BPF_B:
1214 case BPF_LDX | BPF_MEM | BPF_H:
1215 case BPF_LDX | BPF_MEM | BPF_W:
1216 case BPF_LDX | BPF_MEM | BPF_DW:
1217 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1218 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1219 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1220 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1221 /* dst_reg = (s64)*(signed size *)(src_reg + off) */
1222 case BPF_LDX | BPF_MEMSX | BPF_B:
1223 case BPF_LDX | BPF_MEMSX | BPF_H:
1224 case BPF_LDX | BPF_MEMSX | BPF_W:
1225 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1226 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1227 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1228 /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + REG_ARENA + off) */
1229 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1230 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1231 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1232 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1233 sign_extend = BPF_MODE(code) == BPF_MEMSX ||
1234 BPF_MODE(code) == BPF_PROBE_MEMSX;
1235
1236 if (BPF_MODE(code) == BPF_PROBE_MEM32) {
1237 emit_insn(ctx, addd, t2, src, REG_ARENA);
1238 src = t2;
1239 }
1240
1241 switch (BPF_SIZE(code)) {
1242 case BPF_B:
1243 if (is_signed_imm12(off)) {
1244 if (sign_extend)
1245 emit_insn(ctx, ldb, dst, src, off);
1246 else
1247 emit_insn(ctx, ldbu, dst, src, off);
1248 } else {
1249 move_imm(ctx, t1, off, is32);
1250 if (sign_extend)
1251 emit_insn(ctx, ldxb, dst, src, t1);
1252 else
1253 emit_insn(ctx, ldxbu, dst, src, t1);
1254 }
1255 break;
1256 case BPF_H:
1257 if (is_signed_imm12(off)) {
1258 if (sign_extend)
1259 emit_insn(ctx, ldh, dst, src, off);
1260 else
1261 emit_insn(ctx, ldhu, dst, src, off);
1262 } else {
1263 move_imm(ctx, t1, off, is32);
1264 if (sign_extend)
1265 emit_insn(ctx, ldxh, dst, src, t1);
1266 else
1267 emit_insn(ctx, ldxhu, dst, src, t1);
1268 }
1269 break;
1270 case BPF_W:
1271 if (is_signed_imm12(off)) {
1272 if (sign_extend)
1273 emit_insn(ctx, ldw, dst, src, off);
1274 else
1275 emit_insn(ctx, ldwu, dst, src, off);
1276 } else {
1277 move_imm(ctx, t1, off, is32);
1278 if (sign_extend)
1279 emit_insn(ctx, ldxw, dst, src, t1);
1280 else
1281 emit_insn(ctx, ldxwu, dst, src, t1);
1282 }
1283 break;
1284 case BPF_DW:
1285 move_imm(ctx, t1, off, is32);
1286 emit_insn(ctx, ldxd, dst, src, t1);
1287 break;
1288 }
1289
1290 ret = add_exception_handler(insn, ctx, dst);
1291 if (ret)
1292 return ret;
1293 break;
1294
1295 /* *(size *)(dst + off) = imm */
1296 case BPF_ST | BPF_MEM | BPF_B:
1297 case BPF_ST | BPF_MEM | BPF_H:
1298 case BPF_ST | BPF_MEM | BPF_W:
1299 case BPF_ST | BPF_MEM | BPF_DW:
1300 /* ST | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = imm */
1301 case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1302 case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1303 case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1304 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1305 if (BPF_MODE(code) == BPF_PROBE_MEM32) {
1306 emit_insn(ctx, addd, t3, dst, REG_ARENA);
1307 dst = t3;
1308 }
1309
1310 switch (BPF_SIZE(code)) {
1311 case BPF_B:
1312 move_imm(ctx, t1, imm, is32);
1313 if (is_signed_imm12(off)) {
1314 emit_insn(ctx, stb, t1, dst, off);
1315 } else {
1316 move_imm(ctx, t2, off, is32);
1317 emit_insn(ctx, stxb, t1, dst, t2);
1318 }
1319 break;
1320 case BPF_H:
1321 move_imm(ctx, t1, imm, is32);
1322 if (is_signed_imm12(off)) {
1323 emit_insn(ctx, sth, t1, dst, off);
1324 } else {
1325 move_imm(ctx, t2, off, is32);
1326 emit_insn(ctx, stxh, t1, dst, t2);
1327 }
1328 break;
1329 case BPF_W:
1330 move_imm(ctx, t1, imm, is32);
1331 if (is_signed_imm12(off)) {
1332 emit_insn(ctx, stw, t1, dst, off);
1333 } else if (is_signed_imm14(off)) {
1334 emit_insn(ctx, stptrw, t1, dst, off);
1335 } else {
1336 move_imm(ctx, t2, off, is32);
1337 emit_insn(ctx, stxw, t1, dst, t2);
1338 }
1339 break;
1340 case BPF_DW:
1341 move_imm(ctx, t1, imm, is32);
1342 if (is_signed_imm12(off)) {
1343 emit_insn(ctx, std, t1, dst, off);
1344 } else if (is_signed_imm14(off)) {
1345 emit_insn(ctx, stptrd, t1, dst, off);
1346 } else {
1347 move_imm(ctx, t2, off, is32);
1348 emit_insn(ctx, stxd, t1, dst, t2);
1349 }
1350 break;
1351 }
1352
1353 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER);
1354 if (ret)
1355 return ret;
1356 break;
1357
1358 /* *(size *)(dst + off) = src */
1359 case BPF_STX | BPF_MEM | BPF_B:
1360 case BPF_STX | BPF_MEM | BPF_H:
1361 case BPF_STX | BPF_MEM | BPF_W:
1362 case BPF_STX | BPF_MEM | BPF_DW:
1363 /* STX | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = src */
1364 case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1365 case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1366 case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1367 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1368 if (BPF_MODE(code) == BPF_PROBE_MEM32) {
1369 emit_insn(ctx, addd, t2, dst, REG_ARENA);
1370 dst = t2;
1371 }
1372
1373 switch (BPF_SIZE(code)) {
1374 case BPF_B:
1375 if (is_signed_imm12(off)) {
1376 emit_insn(ctx, stb, src, dst, off);
1377 } else {
1378 move_imm(ctx, t1, off, is32);
1379 emit_insn(ctx, stxb, src, dst, t1);
1380 }
1381 break;
1382 case BPF_H:
1383 if (is_signed_imm12(off)) {
1384 emit_insn(ctx, sth, src, dst, off);
1385 } else {
1386 move_imm(ctx, t1, off, is32);
1387 emit_insn(ctx, stxh, src, dst, t1);
1388 }
1389 break;
1390 case BPF_W:
1391 if (is_signed_imm12(off)) {
1392 emit_insn(ctx, stw, src, dst, off);
1393 } else if (is_signed_imm14(off)) {
1394 emit_insn(ctx, stptrw, src, dst, off);
1395 } else {
1396 move_imm(ctx, t1, off, is32);
1397 emit_insn(ctx, stxw, src, dst, t1);
1398 }
1399 break;
1400 case BPF_DW:
1401 if (is_signed_imm12(off)) {
1402 emit_insn(ctx, std, src, dst, off);
1403 } else if (is_signed_imm14(off)) {
1404 emit_insn(ctx, stptrd, src, dst, off);
1405 } else {
1406 move_imm(ctx, t1, off, is32);
1407 emit_insn(ctx, stxd, src, dst, t1);
1408 }
1409 break;
1410 }
1411
1412 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER);
1413 if (ret)
1414 return ret;
1415 break;
1416
1417 /* Atomics */
1418 case BPF_STX | BPF_ATOMIC | BPF_B:
1419 case BPF_STX | BPF_ATOMIC | BPF_H:
1420 case BPF_STX | BPF_ATOMIC | BPF_W:
1421 case BPF_STX | BPF_ATOMIC | BPF_DW:
1422 if (!bpf_atomic_is_load_store(insn))
1423 ret = emit_atomic_rmw(insn, ctx);
1424 else
1425 ret = emit_atomic_ld_st(insn, ctx);
1426 if (ret)
1427 return ret;
1428 break;
1429
1430 /* Speculation barrier */
1431 case BPF_ST | BPF_NOSPEC:
1432 break;
1433
1434 default:
1435 pr_err("bpf_jit: unknown opcode %02x\n", code);
1436 return -EINVAL;
1437 }
1438
1439 return 0;
1440
1441 toofar:
1442 pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1443 return -E2BIG;
1444 }
1445
build_body(struct jit_ctx * ctx,bool extra_pass)1446 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1447 {
1448 int i;
1449 const struct bpf_prog *prog = ctx->prog;
1450
1451 for (i = 0; i < prog->len; i++) {
1452 const struct bpf_insn *insn = &prog->insnsi[i];
1453 int ret;
1454
1455 if (ctx->image == NULL)
1456 ctx->offset[i] = ctx->idx;
1457
1458 ret = build_insn(insn, ctx, extra_pass);
1459 if (ret > 0) {
1460 i++;
1461 if (ctx->image == NULL)
1462 ctx->offset[i] = ctx->idx;
1463 continue;
1464 }
1465 if (ret)
1466 return ret;
1467 }
1468
1469 if (ctx->image == NULL)
1470 ctx->offset[i] = ctx->idx;
1471
1472 return 0;
1473 }
1474
1475 /* Fill space with break instructions */
jit_fill_hole(void * area,unsigned int size)1476 static void jit_fill_hole(void *area, unsigned int size)
1477 {
1478 u32 *ptr;
1479
1480 /* We are guaranteed to have aligned memory */
1481 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1482 *ptr++ = INSN_BREAK;
1483 }
1484
validate_code(struct jit_ctx * ctx)1485 static int validate_code(struct jit_ctx *ctx)
1486 {
1487 int i;
1488 union loongarch_instruction insn;
1489
1490 for (i = 0; i < ctx->idx; i++) {
1491 insn = ctx->image[i];
1492 /* Check INSN_BREAK */
1493 if (insn.word == INSN_BREAK)
1494 return -1;
1495 }
1496
1497 return 0;
1498 }
1499
validate_ctx(struct jit_ctx * ctx)1500 static int validate_ctx(struct jit_ctx *ctx)
1501 {
1502 if (validate_code(ctx))
1503 return -1;
1504
1505 if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1506 return -1;
1507
1508 return 0;
1509 }
1510
emit_jump_and_link(struct jit_ctx * ctx,u8 rd,u64 target)1511 static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target)
1512 {
1513 if (!target) {
1514 pr_err("bpf_jit: jump target address is error\n");
1515 return -EFAULT;
1516 }
1517
1518 move_imm(ctx, LOONGARCH_GPR_T1, target, false);
1519 emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0);
1520
1521 return 0;
1522 }
1523
emit_jump_or_nops(void * target,void * ip,u32 * insns,bool is_call)1524 static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
1525 {
1526 int i;
1527 struct jit_ctx ctx;
1528
1529 ctx.idx = 0;
1530 ctx.image = (union loongarch_instruction *)insns;
1531
1532 if (!target) {
1533 for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
1534 emit_insn((&ctx), nop);
1535 return 0;
1536 }
1537
1538 return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_RA : LOONGARCH_GPR_ZERO, (u64)target);
1539 }
1540
emit_call(struct jit_ctx * ctx,u64 addr)1541 static int emit_call(struct jit_ctx *ctx, u64 addr)
1542 {
1543 return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr);
1544 }
1545
bpf_arch_text_copy(void * dst,void * src,size_t len)1546 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
1547 {
1548 int ret;
1549
1550 cpus_read_lock();
1551 mutex_lock(&text_mutex);
1552 ret = larch_insn_text_copy(dst, src, len);
1553 mutex_unlock(&text_mutex);
1554 cpus_read_unlock();
1555
1556 return ret ? ERR_PTR(-EINVAL) : dst;
1557 }
1558
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type old_t,enum bpf_text_poke_type new_t,void * old_addr,void * new_addr)1559 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
1560 enum bpf_text_poke_type new_t, void *old_addr,
1561 void *new_addr)
1562 {
1563 int ret;
1564 bool is_call;
1565 unsigned long size = 0;
1566 unsigned long offset = 0;
1567 void *image = NULL;
1568 char namebuf[KSYM_NAME_LEN];
1569 u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
1570 u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
1571
1572 /* Only poking bpf text is supported. Since kernel function entry
1573 * is set up by ftrace, we rely on ftrace to poke kernel functions.
1574 */
1575 if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
1576 return -ENOTSUPP;
1577
1578 image = ip - offset;
1579
1580 /* zero offset means we're poking bpf prog entry */
1581 if (offset == 0) {
1582 /* skip to the nop instruction in bpf prog entry:
1583 * move t0, ra
1584 * nop
1585 */
1586 ip = image + LOONGARCH_INSN_SIZE;
1587 }
1588
1589 is_call = old_t == BPF_MOD_CALL;
1590 ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
1591 if (ret)
1592 return ret;
1593
1594 if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
1595 return -EFAULT;
1596
1597 is_call = new_t == BPF_MOD_CALL;
1598 ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
1599 if (ret)
1600 return ret;
1601
1602 cpus_read_lock();
1603 mutex_lock(&text_mutex);
1604 if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES))
1605 ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES);
1606 mutex_unlock(&text_mutex);
1607 cpus_read_unlock();
1608
1609 return ret;
1610 }
1611
bpf_arch_text_invalidate(void * dst,size_t len)1612 int bpf_arch_text_invalidate(void *dst, size_t len)
1613 {
1614 int i;
1615 int ret = 0;
1616 u32 *inst;
1617
1618 inst = kvmalloc(len, GFP_KERNEL);
1619 if (!inst)
1620 return -ENOMEM;
1621
1622 for (i = 0; i < (len / sizeof(u32)); i++)
1623 inst[i] = INSN_BREAK;
1624
1625 cpus_read_lock();
1626 mutex_lock(&text_mutex);
1627 if (larch_insn_text_copy(dst, inst, len))
1628 ret = -EINVAL;
1629 mutex_unlock(&text_mutex);
1630 cpus_read_unlock();
1631
1632 kvfree(inst);
1633
1634 return ret;
1635 }
1636
store_args(struct jit_ctx * ctx,int nr_arg_slots,int args_off)1637 static void store_args(struct jit_ctx *ctx, int nr_arg_slots, int args_off)
1638 {
1639 int i;
1640
1641 for (i = 0; i < nr_arg_slots; i++) {
1642 if (i < LOONGARCH_MAX_REG_ARGS)
1643 emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
1644 else {
1645 /* Skip slots for T0 and FP of traced function */
1646 emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP,
1647 16 + (i - LOONGARCH_MAX_REG_ARGS) * 8);
1648 emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -args_off);
1649 }
1650 args_off -= 8;
1651 }
1652 }
1653
restore_args(struct jit_ctx * ctx,int nr_reg_args,int args_off)1654 static void restore_args(struct jit_ctx *ctx, int nr_reg_args, int args_off)
1655 {
1656 int i;
1657
1658 for (i = 0; i < nr_reg_args; i++) {
1659 emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
1660 args_off -= 8;
1661 }
1662 }
1663
restore_stk_args(struct jit_ctx * ctx,int nr_stk_args,int args_off,int stk_args_off)1664 static void restore_stk_args(struct jit_ctx *ctx, int nr_stk_args, int args_off, int stk_args_off)
1665 {
1666 int i;
1667
1668 for (i = 0; i < nr_stk_args; i++) {
1669 emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP,
1670 -(args_off - LOONGARCH_MAX_REG_ARGS * 8));
1671 emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -stk_args_off);
1672 args_off -= 8;
1673 stk_args_off -= 8;
1674 }
1675 }
1676
invoke_bpf_prog(struct jit_ctx * ctx,struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret)1677 static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
1678 int args_off, int retval_off, int run_ctx_off, bool save_ret)
1679 {
1680 int ret;
1681 u32 *branch;
1682 struct bpf_prog *p = l->link.prog;
1683 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1684
1685 if (l->cookie)
1686 emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1,
1687 -run_ctx_off + cookie_off, l->cookie);
1688 else
1689 emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
1690
1691 /* arg1: prog */
1692 move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
1693 /* arg2: &run_ctx */
1694 emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off);
1695 ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p));
1696 if (ret)
1697 return ret;
1698
1699 /* store prog start time */
1700 move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0);
1701
1702 /*
1703 * if (__bpf_prog_enter(prog) == 0)
1704 * goto skip_exec_of_prog;
1705 */
1706 branch = (u32 *)ctx->image + ctx->idx;
1707 /* nop reserved for conditional jump */
1708 emit_insn(ctx, nop);
1709
1710 /* arg1: &args_off */
1711 emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off);
1712 if (!p->jited)
1713 move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false);
1714 ret = emit_call(ctx, (const u64)p->bpf_func);
1715 if (ret)
1716 return ret;
1717
1718 if (save_ret) {
1719 emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
1720 emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
1721 }
1722
1723 /* update branch with beqz */
1724 if (ctx->image) {
1725 int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch;
1726 *branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset);
1727 }
1728
1729 /* arg1: prog */
1730 move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
1731 /* arg2: prog start time */
1732 move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1);
1733 /* arg3: &run_ctx */
1734 emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off);
1735 ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p));
1736
1737 return ret;
1738 }
1739
invoke_bpf(struct jit_ctx * ctx,struct bpf_tramp_links * tl,int args_off,int retval_off,int run_ctx_off,int func_meta_off,bool save_ret,u64 func_meta,int cookie_off)1740 static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
1741 int args_off, int retval_off, int run_ctx_off,
1742 int func_meta_off, bool save_ret, u64 func_meta, int cookie_off)
1743 {
1744 int i, cur_cookie = (cookie_off - args_off) / 8;
1745
1746 for (i = 0; i < tl->nr_links; i++) {
1747 int err;
1748
1749 if (bpf_prog_calls_session_cookie(tl->links[i])) {
1750 u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
1751
1752 emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, meta);
1753 cur_cookie--;
1754 }
1755 err = invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, save_ret);
1756 if (err)
1757 return err;
1758 }
1759
1760 return 0;
1761 }
1762
arch_alloc_bpf_trampoline(unsigned int size)1763 void *arch_alloc_bpf_trampoline(unsigned int size)
1764 {
1765 return bpf_prog_pack_alloc(size, jit_fill_hole);
1766 }
1767
arch_free_bpf_trampoline(void * image,unsigned int size)1768 void arch_free_bpf_trampoline(void *image, unsigned int size)
1769 {
1770 bpf_prog_pack_free(image, size);
1771 }
1772
arch_protect_bpf_trampoline(void * image,unsigned int size)1773 int arch_protect_bpf_trampoline(void *image, unsigned int size)
1774 {
1775 return 0;
1776 }
1777
1778 /*
1779 * Sign-extend the register if necessary
1780 */
sign_extend(struct jit_ctx * ctx,int rd,int rj,u8 size,bool sign)1781 static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign)
1782 {
1783 /* ABI requires unsigned char/short to be zero-extended */
1784 if (!sign && (size == 1 || size == 2)) {
1785 if (rd != rj)
1786 move_reg(ctx, rd, rj);
1787 return;
1788 }
1789
1790 switch (size) {
1791 case 1:
1792 emit_insn(ctx, extwb, rd, rj);
1793 break;
1794 case 2:
1795 emit_insn(ctx, extwh, rd, rj);
1796 break;
1797 case 4:
1798 emit_insn(ctx, addiw, rd, rj, 0);
1799 break;
1800 case 8:
1801 if (rd != rj)
1802 move_reg(ctx, rd, rj);
1803 break;
1804 default:
1805 pr_warn("bpf_jit: invalid size %d for sign_extend\n", size);
1806 }
1807 }
1808
__arch_prepare_bpf_trampoline(struct jit_ctx * ctx,struct bpf_tramp_image * im,const struct btf_func_model * m,struct bpf_tramp_links * tlinks,void * func_addr,u32 flags)1809 static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
1810 const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
1811 void *func_addr, u32 flags)
1812 {
1813 int i, ret, save_ret;
1814 int cookie_cnt, cookie_off;
1815 int stack_size, args_off, stk_args_off, nr_arg_slots = 0;
1816 int retval_off, func_meta_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off;
1817 unsigned long long func_meta;
1818 bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
1819 void *orig_call = func_addr;
1820 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
1821 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
1822 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
1823 u32 **branches = NULL;
1824
1825 /*
1826 * FP + 8 [ RA to parent func ] return address to parent
1827 * function
1828 * FP + 0 [ FP of parent func ] frame pointer of parent
1829 * function
1830 * FP - 8 [ T0 to traced func ] return address of traced
1831 * function
1832 * FP - 16 [ FP of traced func ] frame pointer of traced
1833 * function
1834 *
1835 * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
1836 * BPF_TRAMP_F_RET_FENTRY_RET
1837 * [ arg regN ]
1838 * [ ... ]
1839 * FP - args_off [ arg reg1 ]
1840 *
1841 * FP - func_meta_off [ regs count, etc ]
1842 *
1843 * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG
1844 *
1845 * [ stack cookie N ]
1846 * [ ... ]
1847 * FP - cookie_off [ stack cookie 1 ]
1848 *
1849 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
1850 *
1851 * FP - sreg_off [ callee saved reg ]
1852 *
1853 * FP - tcc_ptr_off [ tail_call_cnt_ptr ]
1854 *
1855 * [ stack_argN ]
1856 * [ ... ]
1857 * FP - stk_args_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG
1858 */
1859
1860 if (m->nr_args > MAX_BPF_FUNC_ARGS)
1861 return -ENOTSUPP;
1862
1863 /* Extra registers for struct arguments */
1864 for (i = 0; i < m->nr_args; i++) {
1865 /*
1866 * The struct argument size is at most 16 bytes,
1867 * enforced by the verifier. The struct argument
1868 * may be passed in a pair of registers if its
1869 * size is more than 8 bytes and no more than 16
1870 * bytes.
1871 */
1872 nr_arg_slots += round_up(m->arg_size[i], 8) / 8;
1873 }
1874
1875 if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
1876 return -ENOTSUPP;
1877
1878 /* Room of trampoline frame to store return address and frame pointer */
1879 stack_size = 16;
1880
1881 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
1882 if (save_ret)
1883 stack_size += 16; /* Save BPF R0 and A0 */
1884
1885 retval_off = stack_size;
1886
1887 /* Room of trampoline frame to store args */
1888 stack_size += nr_arg_slots * 8;
1889 args_off = stack_size;
1890
1891 /* Room of function metadata, such as regs count */
1892 stack_size += 8;
1893 func_meta_off = stack_size;
1894
1895 /* Room of trampoline frame to store ip address */
1896 if (flags & BPF_TRAMP_F_IP_ARG) {
1897 stack_size += 8;
1898 ip_off = stack_size;
1899 }
1900
1901 cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
1902
1903 /* Room for session cookies */
1904 stack_size += cookie_cnt * 8;
1905 cookie_off = stack_size;
1906
1907 /* Room of trampoline frame to store struct bpf_tramp_run_ctx */
1908 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
1909 run_ctx_off = stack_size;
1910
1911 stack_size += 8;
1912 sreg_off = stack_size;
1913
1914 /* Room of trampoline frame to store tail_call_cnt_ptr */
1915 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
1916 stack_size += 8;
1917 tcc_ptr_off = stack_size;
1918 }
1919
1920 if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - LOONGARCH_MAX_REG_ARGS > 0))
1921 stack_size += (nr_arg_slots - LOONGARCH_MAX_REG_ARGS) * 8;
1922
1923 stack_size = round_up(stack_size, 16);
1924
1925 /* Room for args on stack must be at the top of stack */
1926 stk_args_off = stack_size;
1927
1928 if (is_struct_ops) {
1929 /*
1930 * For the trampoline called directly, just handle
1931 * the frame of trampoline.
1932 */
1933 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
1934 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8);
1935 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1936 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
1937 } else {
1938 /*
1939 * For the trampoline called from function entry,
1940 * the frame of traced function and the frame of
1941 * trampoline need to be considered.
1942 */
1943 /* RA and FP for parent function */
1944 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16);
1945 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
1946 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
1947 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16);
1948
1949 /* RA and FP for traced function */
1950 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
1951 emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
1952 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
1953 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
1954 }
1955
1956 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
1957 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
1958
1959 /* callee saved register S1 to pass start time */
1960 emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
1961
1962 /* store ip address of the traced function */
1963 if (flags & BPF_TRAMP_F_IP_ARG)
1964 emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -ip_off, (u64)func_addr);
1965
1966 /* store arg regs count */
1967 func_meta = nr_arg_slots;
1968 emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta);
1969
1970 store_args(ctx, nr_arg_slots, args_off);
1971
1972 if (bpf_fsession_cnt(tlinks)) {
1973 /* clear all session cookies' value */
1974 for (i = 0; i < cookie_cnt; i++)
1975 emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -cookie_off + 8 * i);
1976
1977 /* clear return value to make sure fentry always get 0 */
1978 emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
1979 }
1980
1981 /* To traced function */
1982 /* Ftrace jump skips 2 NOP instructions */
1983 if (is_kernel_text((unsigned long)orig_call) ||
1984 is_module_text_address((unsigned long)orig_call))
1985 orig_call += LOONGARCH_FENTRY_NBYTES;
1986 /* Direct jump skips 5 NOP instructions */
1987 else if (is_bpf_text_address((unsigned long)orig_call))
1988 orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
1989
1990 if (flags & BPF_TRAMP_F_CALL_ORIG) {
1991 move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
1992 ret = emit_call(ctx, (const u64)__bpf_tramp_enter);
1993 if (ret)
1994 return ret;
1995 }
1996
1997 if (fentry->nr_links) {
1998 ret = invoke_bpf(ctx, fentry, args_off, retval_off, run_ctx_off, func_meta_off,
1999 flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off);
2000 if (ret)
2001 return ret;
2002 }
2003 if (fmod_ret->nr_links) {
2004 branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
2005 if (!branches)
2006 return -ENOMEM;
2007
2008 emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
2009 for (i = 0; i < fmod_ret->nr_links; i++) {
2010 ret = invoke_bpf_prog(ctx, fmod_ret->links[i],
2011 args_off, retval_off, run_ctx_off, true);
2012 if (ret)
2013 goto out;
2014 emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
2015 branches[i] = (u32 *)ctx->image + ctx->idx;
2016 emit_insn(ctx, nop);
2017 }
2018 }
2019
2020 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2021 restore_args(ctx, min_t(int, nr_arg_slots, LOONGARCH_MAX_REG_ARGS), args_off);
2022 restore_stk_args(ctx, nr_arg_slots - LOONGARCH_MAX_REG_ARGS, args_off, stk_args_off);
2023
2024 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
2025 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
2026
2027 ret = emit_call(ctx, (const u64)orig_call);
2028 if (ret)
2029 goto out;
2030 emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
2031 emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
2032 im->ip_after_call = ctx->ro_image + ctx->idx;
2033 /* Reserve space for the move_imm + jirl instruction */
2034 for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
2035 emit_insn(ctx, nop);
2036 }
2037
2038 for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
2039 int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
2040 *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
2041 }
2042
2043 /* Set "is_return" flag for fsession */
2044 func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
2045 if (bpf_fsession_cnt(tlinks))
2046 emit_store_stack_imm64(ctx, LOONGARCH_GPR_T1, -func_meta_off, func_meta);
2047
2048 if (fexit->nr_links) {
2049 ret = invoke_bpf(ctx, fexit, args_off, retval_off, run_ctx_off,
2050 func_meta_off, false, func_meta, cookie_off);
2051 if (ret)
2052 goto out;
2053 }
2054
2055 if (flags & BPF_TRAMP_F_CALL_ORIG) {
2056 im->ip_epilogue = ctx->ro_image + ctx->idx;
2057 move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
2058 ret = emit_call(ctx, (const u64)__bpf_tramp_exit);
2059 if (ret)
2060 goto out;
2061 }
2062
2063 if (flags & BPF_TRAMP_F_RESTORE_REGS)
2064 restore_args(ctx, min_t(int, nr_arg_slots, LOONGARCH_MAX_REG_ARGS), args_off);
2065
2066 if (save_ret) {
2067 emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
2068 if (is_struct_ops)
2069 sign_extend(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0],
2070 m->ret_size, m->ret_flags & BTF_FMODEL_SIGNED_ARG);
2071 else
2072 emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
2073 }
2074
2075 emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
2076
2077 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
2078 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off);
2079
2080 if (is_struct_ops) {
2081 /* trampoline called directly */
2082 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8);
2083 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
2084 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
2085
2086 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
2087 } else {
2088 /* trampoline called from function entry */
2089 emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
2090 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
2091 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
2092
2093 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
2094 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
2095 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16);
2096
2097 if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2098 /* return to parent function */
2099 move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0);
2100 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0);
2101 } else {
2102 /* return to traced function */
2103 move_reg(ctx, LOONGARCH_GPR_T1, LOONGARCH_GPR_RA);
2104 move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0);
2105 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, 0);
2106 }
2107 }
2108
2109 ret = ctx->idx;
2110 out:
2111 kfree(branches);
2112
2113 return ret;
2114 }
2115
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * ro_image,void * ro_image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2116 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
2117 void *ro_image_end, const struct btf_func_model *m,
2118 u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
2119 {
2120 int ret, size;
2121 void *image, *tmp;
2122 struct jit_ctx ctx;
2123
2124 size = ro_image_end - ro_image;
2125 image = kvmalloc(size, GFP_KERNEL);
2126 if (!image)
2127 return -ENOMEM;
2128
2129 ctx.image = (union loongarch_instruction *)image;
2130 ctx.ro_image = (union loongarch_instruction *)ro_image;
2131 ctx.idx = 0;
2132
2133 jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2134 ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
2135 if (ret < 0)
2136 goto out;
2137
2138 if (validate_code(&ctx) < 0) {
2139 ret = -EINVAL;
2140 goto out;
2141 }
2142
2143 tmp = bpf_arch_text_copy(ro_image, image, size);
2144 if (IS_ERR(tmp)) {
2145 ret = PTR_ERR(tmp);
2146 goto out;
2147 }
2148
2149 out:
2150 kvfree(image);
2151 return ret < 0 ? ret : size;
2152 }
2153
arch_bpf_trampoline_size(const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2154 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2155 struct bpf_tramp_links *tlinks, void *func_addr)
2156 {
2157 int ret;
2158 struct jit_ctx ctx;
2159 struct bpf_tramp_image im;
2160
2161 ctx.image = NULL;
2162 ctx.idx = 0;
2163
2164 ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
2165
2166 return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
2167 }
2168
bpf_int_jit_compile(struct bpf_verifier_env * env,struct bpf_prog * prog)2169 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
2170 {
2171 bool extra_pass = false;
2172 u8 *image_ptr, *ro_image_ptr;
2173 int image_size, prog_size, extable_size;
2174 struct jit_ctx ctx;
2175 struct jit_data *jit_data;
2176 struct bpf_binary_header *header;
2177 struct bpf_binary_header *ro_header;
2178
2179 /*
2180 * If BPF JIT was not enabled then we must fall back to
2181 * the interpreter.
2182 */
2183 if (!prog->jit_requested)
2184 return prog;
2185
2186 jit_data = prog->aux->jit_data;
2187 if (!jit_data) {
2188 jit_data = kzalloc_obj(*jit_data);
2189 if (!jit_data)
2190 return prog;
2191 prog->aux->jit_data = jit_data;
2192 }
2193 if (jit_data->ctx.offset) {
2194 ctx = jit_data->ctx;
2195 ro_header = jit_data->ro_header;
2196 ro_image_ptr = (void *)ctx.ro_image;
2197 header = jit_data->header;
2198 image_ptr = (void *)header + ((void *)ro_image_ptr - (void *)ro_header);
2199 extra_pass = true;
2200 prog_size = sizeof(u32) * ctx.idx;
2201 goto skip_init_ctx;
2202 }
2203
2204 memset(&ctx, 0, sizeof(ctx));
2205 ctx.prog = prog;
2206 ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
2207 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
2208
2209 ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
2210 if (ctx.offset == NULL)
2211 goto out_offset;
2212
2213 /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
2214 build_prologue(&ctx);
2215 if (build_body(&ctx, extra_pass))
2216 goto out_offset;
2217 ctx.epilogue_offset = ctx.idx;
2218 build_epilogue(&ctx);
2219
2220 extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
2221
2222 /* Now we know the actual image size.
2223 * As each LoongArch instruction is of length 32bit,
2224 * we are translating number of JITed intructions into
2225 * the size required to store these JITed code.
2226 */
2227 prog_size = sizeof(u32) * ctx.idx;
2228 image_size = prog_size + extable_size;
2229 /* Now we know the size of the structure to make */
2230 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32),
2231 &header, &image_ptr, jit_fill_hole);
2232 if (!ro_header)
2233 goto out_offset;
2234
2235 /* 2. Now, the actual pass to generate final JIT code */
2236 /*
2237 * Use the image (RW) for writing the JITed instructions. But also save
2238 * the ro_image (RX) for calculating the offsets in the image. The RW
2239 * image will be later copied to the RX image from where the program will
2240 * run. The bpf_jit_binary_pack_finalize() will do this copy in the final
2241 * step.
2242 */
2243 ctx.image = (union loongarch_instruction *)image_ptr;
2244 ctx.ro_image = (union loongarch_instruction *)ro_image_ptr;
2245 if (extable_size)
2246 prog->aux->extable = (void *)ro_image_ptr + prog_size;
2247
2248 skip_init_ctx:
2249 ctx.idx = 0;
2250 ctx.num_exentries = 0;
2251
2252 build_prologue(&ctx);
2253 if (build_body(&ctx, extra_pass))
2254 goto out_free;
2255 build_epilogue(&ctx);
2256
2257 /* 3. Extra pass to validate JITed code */
2258 if (validate_ctx(&ctx))
2259 goto out_free;
2260
2261 /* And we're done */
2262 if (bpf_jit_enable > 1)
2263 bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
2264
2265 if (!prog->is_func || extra_pass) {
2266 if (extra_pass && ctx.idx != jit_data->ctx.idx) {
2267 pr_err_once("multi-func JIT bug %d != %d\n",
2268 ctx.idx, jit_data->ctx.idx);
2269 goto out_free;
2270 }
2271 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
2272 /* ro_header and header have been freed */
2273 ro_header = NULL;
2274 header = NULL;
2275 goto out_free;
2276 }
2277 /*
2278 * The instructions have now been copied to the ROX region from
2279 * where they will execute. Now the data cache has to be cleaned
2280 * to the PoU and the I-cache has to be invalidated for the VAs.
2281 */
2282 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
2283 } else {
2284 jit_data->ctx = ctx;
2285 jit_data->header = header;
2286 jit_data->ro_header = ro_header;
2287 }
2288 prog->jited = 1;
2289 prog->jited_len = prog_size;
2290 prog->bpf_func = (void *)ctx.ro_image;
2291
2292 if (!prog->is_func || extra_pass) {
2293 int i;
2294
2295 /* offset[prog->len] is the size of program */
2296 for (i = 0; i <= prog->len; i++)
2297 ctx.offset[i] *= LOONGARCH_INSN_SIZE;
2298 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
2299
2300 out_offset:
2301 kvfree(ctx.offset);
2302 kfree(jit_data);
2303 prog->aux->jit_data = NULL;
2304 }
2305
2306 return prog;
2307
2308 out_free:
2309 if (extra_pass) {
2310 prog->bpf_func = NULL;
2311 prog->jited = 0;
2312 prog->jited_len = 0;
2313 }
2314
2315 if (header) {
2316 bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size));
2317 bpf_jit_binary_pack_free(ro_header, header);
2318 }
2319 goto out_offset;
2320 }
2321
bpf_jit_free(struct bpf_prog * prog)2322 void bpf_jit_free(struct bpf_prog *prog)
2323 {
2324 if (prog->jited) {
2325 struct jit_data *jit_data = prog->aux->jit_data;
2326 struct bpf_binary_header *hdr;
2327
2328 /*
2329 * If we fail the final pass of JIT (from jit_subprogs), the
2330 * program may not be finalized yet. Call finalize here before
2331 * freeing it.
2332 */
2333 if (jit_data) {
2334 bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
2335 kfree(jit_data);
2336 }
2337 hdr = bpf_jit_binary_pack_hdr(prog);
2338 bpf_jit_binary_pack_free(hdr, NULL);
2339 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
2340 }
2341
2342 bpf_prog_unlock_free(prog);
2343 }
2344
bpf_jit_bypass_spec_v1(void)2345 bool bpf_jit_bypass_spec_v1(void)
2346 {
2347 return true;
2348 }
2349
bpf_jit_bypass_spec_v4(void)2350 bool bpf_jit_bypass_spec_v4(void)
2351 {
2352 return true;
2353 }
2354
bpf_jit_supports_arena(void)2355 bool bpf_jit_supports_arena(void)
2356 {
2357 return true;
2358 }
2359
bpf_jit_supports_fsession(void)2360 bool bpf_jit_supports_fsession(void)
2361 {
2362 return true;
2363 }
2364
2365 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)2366 bool bpf_jit_supports_subprog_tailcalls(void)
2367 {
2368 return true;
2369 }
2370