1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * BPF JIT compiler for LoongArch 4 * 5 * Copyright (C) 2022 Loongson Technology Corporation Limited 6 */ 7 #include <linux/memory.h> 8 #include "bpf_jit.h" 9 10 #define LOONGARCH_MAX_REG_ARGS 8 11 12 #define LOONGARCH_LONG_JUMP_NINSNS 5 13 #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) 14 15 #define LOONGARCH_FENTRY_NINSNS 2 16 #define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4) 17 #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) 18 19 #define REG_TCC LOONGARCH_GPR_A6 20 #define REG_ARENA LOONGARCH_GPR_S6 /* For storing arena_vm_start */ 21 #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80) 22 23 static const int regmap[] = { 24 /* return value from in-kernel function, and exit value for eBPF program */ 25 [BPF_REG_0] = LOONGARCH_GPR_A5, 26 /* arguments from eBPF program to in-kernel function */ 27 [BPF_REG_1] = LOONGARCH_GPR_A0, 28 [BPF_REG_2] = LOONGARCH_GPR_A1, 29 [BPF_REG_3] = LOONGARCH_GPR_A2, 30 [BPF_REG_4] = LOONGARCH_GPR_A3, 31 [BPF_REG_5] = LOONGARCH_GPR_A4, 32 /* callee saved registers that in-kernel function will preserve */ 33 [BPF_REG_6] = LOONGARCH_GPR_S0, 34 [BPF_REG_7] = LOONGARCH_GPR_S1, 35 [BPF_REG_8] = LOONGARCH_GPR_S2, 36 [BPF_REG_9] = LOONGARCH_GPR_S3, 37 /* read-only frame pointer to access stack */ 38 [BPF_REG_FP] = LOONGARCH_GPR_S4, 39 /* temporary register for blinding constants */ 40 [BPF_REG_AX] = LOONGARCH_GPR_T0, 41 }; 42 43 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset) 44 { 45 const struct bpf_prog *prog = ctx->prog; 46 const bool is_main_prog = !bpf_is_subprog(prog); 47 48 if (is_main_prog) { 49 /* 50 * LOONGARCH_GPR_T3 = MAX_TAIL_CALL_CNT 51 * if (REG_TCC > T3 ) 52 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset 53 * else 54 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset 55 * REG_TCC = LOONGARCH_GPR_SP + store_offset 56 * 57 * std REG_TCC -> LOONGARCH_GPR_SP + store_offset 58 * 59 * The purpose of this code is to first push the TCC into stack, 60 * and then push the address of TCC into stack. 61 * In cases where bpf2bpf and tailcall are used in combination, 62 * the value in REG_TCC may be a count or an address, 63 * these two cases need to be judged and handled separately. 64 */ 65 emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); 66 *store_offset -= sizeof(long); 67 68 emit_cond_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4); 69 70 /* 71 * If REG_TCC < MAX_TAIL_CALL_CNT, the value in REG_TCC is a count, 72 * push tcc into stack 73 */ 74 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); 75 76 /* Push the address of TCC into the REG_TCC */ 77 emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset); 78 79 emit_uncond_jmp(ctx, 2); 80 81 /* 82 * If REG_TCC > MAX_TAIL_CALL_CNT, the value in REG_TCC is an address, 83 * push tcc_ptr into stack 84 */ 85 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); 86 } else { 87 *store_offset -= sizeof(long); 88 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); 89 } 90 91 /* Push tcc_ptr into stack */ 92 *store_offset -= sizeof(long); 93 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset); 94 } 95 96 /* 97 * eBPF prog stack layout: 98 * 99 * high 100 * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP 101 * | $ra | 102 * +-------------------------+ 103 * | $fp | 104 * +-------------------------+ 105 * | $s0 | 106 * +-------------------------+ 107 * | $s1 | 108 * +-------------------------+ 109 * | $s2 | 110 * +-------------------------+ 111 * | $s3 | 112 * +-------------------------+ 113 * | $s4 | 114 * +-------------------------+ 115 * | $s5 | 116 * +-------------------------+ 117 * | tcc | 118 * +-------------------------+ 119 * | tcc_ptr | 120 * +-------------------------+ <--BPF_REG_FP 121 * | prog->aux->stack_depth | 122 * | (optional) | 123 * current $sp -------------> +-------------------------+ 124 * low 125 */ 126 static void build_prologue(struct jit_ctx *ctx) 127 { 128 int i, stack_adjust = 0, store_offset, bpf_stack_adjust; 129 const struct bpf_prog *prog = ctx->prog; 130 const bool is_main_prog = !bpf_is_subprog(prog); 131 132 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); 133 134 /* To store ra, fp, s0, s1, s2, s3, s4, s5 */ 135 stack_adjust += sizeof(long) * 8; 136 137 /* To store tcc and tcc_ptr */ 138 stack_adjust += sizeof(long) * 2; 139 140 if (ctx->arena_vm_start) 141 stack_adjust += 8; 142 143 stack_adjust = round_up(stack_adjust, 16); 144 stack_adjust += bpf_stack_adjust; 145 146 move_reg(ctx, LOONGARCH_GPR_T0, LOONGARCH_GPR_RA); 147 /* Reserve space for the move_imm + jirl instruction */ 148 for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) 149 emit_insn(ctx, nop); 150 151 /* 152 * First instruction initializes the tail call count (TCC) 153 * register to zero. On tail call we skip this instruction, 154 * and the TCC is passed in REG_TCC from the caller. 155 */ 156 if (is_main_prog) 157 emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0); 158 159 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); 160 161 store_offset = stack_adjust - sizeof(long); 162 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); 163 164 store_offset -= sizeof(long); 165 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); 166 167 store_offset -= sizeof(long); 168 emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); 169 170 store_offset -= sizeof(long); 171 emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); 172 173 store_offset -= sizeof(long); 174 emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); 175 176 store_offset -= sizeof(long); 177 emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); 178 179 store_offset -= sizeof(long); 180 emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); 181 182 store_offset -= sizeof(long); 183 emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); 184 185 if (ctx->arena_vm_start) { 186 store_offset -= sizeof(long); 187 emit_insn(ctx, std, REG_ARENA, LOONGARCH_GPR_SP, store_offset); 188 } 189 190 prepare_bpf_tail_call_cnt(ctx, &store_offset); 191 192 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); 193 194 if (bpf_stack_adjust) 195 emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); 196 197 ctx->stack_size = stack_adjust; 198 199 if (ctx->arena_vm_start) 200 move_imm(ctx, REG_ARENA, ctx->arena_vm_start, false); 201 } 202 203 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) 204 { 205 int stack_adjust = ctx->stack_size; 206 int load_offset; 207 208 load_offset = stack_adjust - sizeof(long); 209 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); 210 211 load_offset -= sizeof(long); 212 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); 213 214 load_offset -= sizeof(long); 215 emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); 216 217 load_offset -= sizeof(long); 218 emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); 219 220 load_offset -= sizeof(long); 221 emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); 222 223 load_offset -= sizeof(long); 224 emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); 225 226 load_offset -= sizeof(long); 227 emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); 228 229 load_offset -= sizeof(long); 230 emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); 231 232 if (ctx->arena_vm_start) { 233 load_offset -= sizeof(long); 234 emit_insn(ctx, ldd, REG_ARENA, LOONGARCH_GPR_SP, load_offset); 235 } 236 237 /* 238 * When push into the stack, follow the order of tcc then tcc_ptr. 239 * When pop from the stack, first pop tcc_ptr then followed by tcc. 240 */ 241 load_offset -= 2 * sizeof(long); 242 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset); 243 244 load_offset += sizeof(long); 245 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset); 246 247 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); 248 249 if (!is_tail_call) { 250 /* Set return value */ 251 emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0); 252 /* Return to the caller */ 253 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); 254 } else { 255 /* 256 * Call the next bpf prog and skip the first instruction 257 * of TCC initialization. 258 */ 259 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 7); 260 } 261 } 262 263 static void build_epilogue(struct jit_ctx *ctx) 264 { 265 __build_epilogue(ctx, false); 266 } 267 268 bool bpf_jit_supports_kfunc_call(void) 269 { 270 return true; 271 } 272 273 bool bpf_jit_supports_far_kfunc_call(void) 274 { 275 return true; 276 } 277 278 static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) 279 { 280 int off, tc_ninsn = 0; 281 int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size); 282 u8 a1 = LOONGARCH_GPR_A1; 283 u8 a2 = LOONGARCH_GPR_A2; 284 u8 t1 = LOONGARCH_GPR_T1; 285 u8 t2 = LOONGARCH_GPR_T2; 286 u8 t3 = LOONGARCH_GPR_T3; 287 const int idx0 = ctx->idx; 288 289 #define cur_offset (ctx->idx - idx0) 290 #define jmp_offset (tc_ninsn - (cur_offset)) 291 292 /* 293 * a0: &ctx 294 * a1: &array 295 * a2: index 296 * 297 * if (index >= array->map.max_entries) 298 * goto out; 299 */ 300 tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0]; 301 emit_zext_32(ctx, a2, true); 302 303 off = offsetof(struct bpf_array, map.max_entries); 304 emit_insn(ctx, ldwu, t1, a1, off); 305 /* bgeu $a2, $t1, jmp_offset */ 306 if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0) 307 goto toofar; 308 309 /* 310 * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) 311 * goto out; 312 */ 313 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off); 314 emit_insn(ctx, ldd, t3, REG_TCC, 0); 315 emit_insn(ctx, addid, t3, t3, 1); 316 emit_insn(ctx, std, t3, REG_TCC, 0); 317 emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); 318 if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0) 319 goto toofar; 320 321 /* 322 * prog = array->ptrs[index]; 323 * if (!prog) 324 * goto out; 325 */ 326 emit_insn(ctx, alsld, t2, a2, a1, 2); 327 off = offsetof(struct bpf_array, ptrs); 328 emit_insn(ctx, ldd, t2, t2, off); 329 /* beq $t2, $zero, jmp_offset */ 330 if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0) 331 goto toofar; 332 333 /* goto *(prog->bpf_func + 4); */ 334 off = offsetof(struct bpf_prog, bpf_func); 335 emit_insn(ctx, ldd, t3, t2, off); 336 __build_epilogue(ctx, true); 337 338 return 0; 339 340 toofar: 341 pr_info_once("tail_call: jump too far\n"); 342 return -1; 343 #undef cur_offset 344 #undef jmp_offset 345 } 346 347 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 348 { 349 const u8 t1 = LOONGARCH_GPR_T1; 350 const u8 t2 = LOONGARCH_GPR_T2; 351 const u8 t3 = LOONGARCH_GPR_T3; 352 const u8 r0 = regmap[BPF_REG_0]; 353 const u8 src = regmap[insn->src_reg]; 354 const u8 dst = regmap[insn->dst_reg]; 355 const s16 off = insn->off; 356 const s32 imm = insn->imm; 357 const bool isdw = BPF_SIZE(insn->code) == BPF_DW; 358 359 move_imm(ctx, t1, off, false); 360 emit_insn(ctx, addd, t1, dst, t1); 361 move_reg(ctx, t3, src); 362 363 switch (imm) { 364 /* lock *(size *)(dst + off) <op>= src */ 365 case BPF_ADD: 366 if (isdw) 367 emit_insn(ctx, amaddd, t2, t1, src); 368 else 369 emit_insn(ctx, amaddw, t2, t1, src); 370 break; 371 case BPF_AND: 372 if (isdw) 373 emit_insn(ctx, amandd, t2, t1, src); 374 else 375 emit_insn(ctx, amandw, t2, t1, src); 376 break; 377 case BPF_OR: 378 if (isdw) 379 emit_insn(ctx, amord, t2, t1, src); 380 else 381 emit_insn(ctx, amorw, t2, t1, src); 382 break; 383 case BPF_XOR: 384 if (isdw) 385 emit_insn(ctx, amxord, t2, t1, src); 386 else 387 emit_insn(ctx, amxorw, t2, t1, src); 388 break; 389 /* src = atomic_fetch_<op>(dst + off, src) */ 390 case BPF_ADD | BPF_FETCH: 391 if (isdw) { 392 emit_insn(ctx, amaddd, src, t1, t3); 393 } else { 394 emit_insn(ctx, amaddw, src, t1, t3); 395 emit_zext_32(ctx, src, true); 396 } 397 break; 398 case BPF_AND | BPF_FETCH: 399 if (isdw) { 400 emit_insn(ctx, amandd, src, t1, t3); 401 } else { 402 emit_insn(ctx, amandw, src, t1, t3); 403 emit_zext_32(ctx, src, true); 404 } 405 break; 406 case BPF_OR | BPF_FETCH: 407 if (isdw) { 408 emit_insn(ctx, amord, src, t1, t3); 409 } else { 410 emit_insn(ctx, amorw, src, t1, t3); 411 emit_zext_32(ctx, src, true); 412 } 413 break; 414 case BPF_XOR | BPF_FETCH: 415 if (isdw) { 416 emit_insn(ctx, amxord, src, t1, t3); 417 } else { 418 emit_insn(ctx, amxorw, src, t1, t3); 419 emit_zext_32(ctx, src, true); 420 } 421 break; 422 /* src = atomic_xchg(dst + off, src); */ 423 case BPF_XCHG: 424 if (isdw) { 425 emit_insn(ctx, amswapd, src, t1, t3); 426 } else { 427 emit_insn(ctx, amswapw, src, t1, t3); 428 emit_zext_32(ctx, src, true); 429 } 430 break; 431 /* r0 = atomic_cmpxchg(dst + off, r0, src); */ 432 case BPF_CMPXCHG: 433 move_reg(ctx, t2, r0); 434 if (isdw) { 435 emit_insn(ctx, lld, r0, t1, 0); 436 emit_insn(ctx, bne, t2, r0, 4); 437 move_reg(ctx, t3, src); 438 emit_insn(ctx, scd, t3, t1, 0); 439 emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4); 440 } else { 441 emit_insn(ctx, llw, r0, t1, 0); 442 emit_zext_32(ctx, t2, true); 443 emit_zext_32(ctx, r0, true); 444 emit_insn(ctx, bne, t2, r0, 4); 445 move_reg(ctx, t3, src); 446 emit_insn(ctx, scw, t3, t1, 0); 447 emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); 448 emit_zext_32(ctx, r0, true); 449 } 450 break; 451 } 452 } 453 454 static bool is_signed_bpf_cond(u8 cond) 455 { 456 return cond == BPF_JSGT || cond == BPF_JSLT || 457 cond == BPF_JSGE || cond == BPF_JSLE; 458 } 459 460 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 461 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 462 #define REG_DONT_CLEAR_MARKER 0 463 464 bool ex_handler_bpf(const struct exception_table_entry *ex, 465 struct pt_regs *regs) 466 { 467 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 468 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 469 470 if (dst_reg != REG_DONT_CLEAR_MARKER) 471 regs->regs[dst_reg] = 0; 472 regs->csr_era = (unsigned long)&ex->fixup - offset; 473 474 return true; 475 } 476 477 /* For accesses to BTF pointers, add an entry to the exception table */ 478 static int add_exception_handler(const struct bpf_insn *insn, 479 struct jit_ctx *ctx, 480 int dst_reg) 481 { 482 unsigned long pc; 483 off_t ins_offset, fixup_offset; 484 struct exception_table_entry *ex; 485 486 if (!ctx->image || !ctx->ro_image || !ctx->prog->aux->extable) 487 return 0; 488 489 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 490 BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 491 BPF_MODE(insn->code) != BPF_PROBE_MEM32) 492 return 0; 493 494 if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) 495 return -EINVAL; 496 497 ex = &ctx->prog->aux->extable[ctx->num_exentries]; 498 pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; 499 500 /* 501 * This is the relative offset of the instruction that may fault from 502 * the exception table itself. This will be written to the exception 503 * table and if this instruction faults, the destination register will 504 * be set to '0' and the execution will jump to the next instruction. 505 */ 506 ins_offset = pc - (long)&ex->insn; 507 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 508 return -ERANGE; 509 510 /* 511 * Since the extable follows the program, the fixup offset is always 512 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 513 * to keep things simple, and put the destination register in the upper 514 * bits. We don't need to worry about buildtime or runtime sort 515 * modifying the upper bits because the table is already sorted, and 516 * isn't part of the main exception table. 517 * 518 * The fixup_offset is set to the next instruction from the instruction 519 * that may fault. The execution will jump to this after handling the fault. 520 */ 521 fixup_offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); 522 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 523 return -ERANGE; 524 525 /* 526 * The offsets above have been calculated using the RO buffer but we 527 * need to use the R/W buffer for writes. Switch ex to rw buffer for writing. 528 */ 529 ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); 530 ex->insn = ins_offset; 531 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 532 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 533 ex->type = EX_TYPE_BPF; 534 535 ctx->num_exentries++; 536 537 return 0; 538 } 539 540 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) 541 { 542 u8 tm = -1; 543 u64 func_addr; 544 bool func_addr_fixed, sign_extend; 545 int i = insn - ctx->prog->insnsi; 546 int ret, jmp_offset, tcc_ptr_off; 547 const u8 code = insn->code; 548 const u8 cond = BPF_OP(code); 549 const u8 t1 = LOONGARCH_GPR_T1; 550 const u8 t2 = LOONGARCH_GPR_T2; 551 const u8 t3 = LOONGARCH_GPR_T3; 552 u8 src = regmap[insn->src_reg]; 553 u8 dst = regmap[insn->dst_reg]; 554 const s16 off = insn->off; 555 const s32 imm = insn->imm; 556 const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32; 557 558 switch (code) { 559 /* dst = src */ 560 case BPF_ALU | BPF_MOV | BPF_X: 561 case BPF_ALU64 | BPF_MOV | BPF_X: 562 if (insn_is_cast_user(insn)) { 563 move_reg(ctx, t1, src); 564 emit_zext_32(ctx, t1, true); 565 move_imm(ctx, dst, (ctx->user_vm_start >> 32) << 32, false); 566 emit_insn(ctx, beq, t1, LOONGARCH_GPR_ZERO, 1); 567 emit_insn(ctx, or, t1, dst, t1); 568 move_reg(ctx, dst, t1); 569 break; 570 } 571 switch (off) { 572 case 0: 573 move_reg(ctx, dst, src); 574 emit_zext_32(ctx, dst, is32); 575 break; 576 case 8: 577 emit_insn(ctx, extwb, dst, src); 578 emit_zext_32(ctx, dst, is32); 579 break; 580 case 16: 581 emit_insn(ctx, extwh, dst, src); 582 emit_zext_32(ctx, dst, is32); 583 break; 584 case 32: 585 emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); 586 break; 587 } 588 break; 589 590 /* dst = imm */ 591 case BPF_ALU | BPF_MOV | BPF_K: 592 case BPF_ALU64 | BPF_MOV | BPF_K: 593 move_imm(ctx, dst, imm, is32); 594 break; 595 596 /* dst = dst + src */ 597 case BPF_ALU | BPF_ADD | BPF_X: 598 case BPF_ALU64 | BPF_ADD | BPF_X: 599 emit_insn(ctx, addd, dst, dst, src); 600 emit_zext_32(ctx, dst, is32); 601 break; 602 603 /* dst = dst + imm */ 604 case BPF_ALU | BPF_ADD | BPF_K: 605 case BPF_ALU64 | BPF_ADD | BPF_K: 606 if (is_signed_imm12(imm)) { 607 emit_insn(ctx, addid, dst, dst, imm); 608 } else { 609 move_imm(ctx, t1, imm, is32); 610 emit_insn(ctx, addd, dst, dst, t1); 611 } 612 emit_zext_32(ctx, dst, is32); 613 break; 614 615 /* dst = dst - src */ 616 case BPF_ALU | BPF_SUB | BPF_X: 617 case BPF_ALU64 | BPF_SUB | BPF_X: 618 emit_insn(ctx, subd, dst, dst, src); 619 emit_zext_32(ctx, dst, is32); 620 break; 621 622 /* dst = dst - imm */ 623 case BPF_ALU | BPF_SUB | BPF_K: 624 case BPF_ALU64 | BPF_SUB | BPF_K: 625 if (is_signed_imm12(-imm)) { 626 emit_insn(ctx, addid, dst, dst, -imm); 627 } else { 628 move_imm(ctx, t1, imm, is32); 629 emit_insn(ctx, subd, dst, dst, t1); 630 } 631 emit_zext_32(ctx, dst, is32); 632 break; 633 634 /* dst = dst * src */ 635 case BPF_ALU | BPF_MUL | BPF_X: 636 case BPF_ALU64 | BPF_MUL | BPF_X: 637 emit_insn(ctx, muld, dst, dst, src); 638 emit_zext_32(ctx, dst, is32); 639 break; 640 641 /* dst = dst * imm */ 642 case BPF_ALU | BPF_MUL | BPF_K: 643 case BPF_ALU64 | BPF_MUL | BPF_K: 644 move_imm(ctx, t1, imm, is32); 645 emit_insn(ctx, muld, dst, dst, t1); 646 emit_zext_32(ctx, dst, is32); 647 break; 648 649 /* dst = dst / src */ 650 case BPF_ALU | BPF_DIV | BPF_X: 651 case BPF_ALU64 | BPF_DIV | BPF_X: 652 if (!off) { 653 emit_zext_32(ctx, dst, is32); 654 move_reg(ctx, t1, src); 655 emit_zext_32(ctx, t1, is32); 656 emit_insn(ctx, divdu, dst, dst, t1); 657 emit_zext_32(ctx, dst, is32); 658 } else { 659 emit_sext_32(ctx, dst, is32); 660 move_reg(ctx, t1, src); 661 emit_sext_32(ctx, t1, is32); 662 emit_insn(ctx, divd, dst, dst, t1); 663 emit_sext_32(ctx, dst, is32); 664 } 665 break; 666 667 /* dst = dst / imm */ 668 case BPF_ALU | BPF_DIV | BPF_K: 669 case BPF_ALU64 | BPF_DIV | BPF_K: 670 if (!off) { 671 move_imm(ctx, t1, imm, is32); 672 emit_zext_32(ctx, dst, is32); 673 emit_insn(ctx, divdu, dst, dst, t1); 674 emit_zext_32(ctx, dst, is32); 675 } else { 676 move_imm(ctx, t1, imm, false); 677 emit_sext_32(ctx, t1, is32); 678 emit_sext_32(ctx, dst, is32); 679 emit_insn(ctx, divd, dst, dst, t1); 680 emit_sext_32(ctx, dst, is32); 681 } 682 break; 683 684 /* dst = dst % src */ 685 case BPF_ALU | BPF_MOD | BPF_X: 686 case BPF_ALU64 | BPF_MOD | BPF_X: 687 if (!off) { 688 emit_zext_32(ctx, dst, is32); 689 move_reg(ctx, t1, src); 690 emit_zext_32(ctx, t1, is32); 691 emit_insn(ctx, moddu, dst, dst, t1); 692 emit_zext_32(ctx, dst, is32); 693 } else { 694 emit_sext_32(ctx, dst, is32); 695 move_reg(ctx, t1, src); 696 emit_sext_32(ctx, t1, is32); 697 emit_insn(ctx, modd, dst, dst, t1); 698 emit_sext_32(ctx, dst, is32); 699 } 700 break; 701 702 /* dst = dst % imm */ 703 case BPF_ALU | BPF_MOD | BPF_K: 704 case BPF_ALU64 | BPF_MOD | BPF_K: 705 if (!off) { 706 move_imm(ctx, t1, imm, is32); 707 emit_zext_32(ctx, dst, is32); 708 emit_insn(ctx, moddu, dst, dst, t1); 709 emit_zext_32(ctx, dst, is32); 710 } else { 711 move_imm(ctx, t1, imm, false); 712 emit_sext_32(ctx, t1, is32); 713 emit_sext_32(ctx, dst, is32); 714 emit_insn(ctx, modd, dst, dst, t1); 715 emit_sext_32(ctx, dst, is32); 716 } 717 break; 718 719 /* dst = -dst */ 720 case BPF_ALU | BPF_NEG: 721 case BPF_ALU64 | BPF_NEG: 722 move_imm(ctx, t1, imm, is32); 723 emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); 724 emit_zext_32(ctx, dst, is32); 725 break; 726 727 /* dst = dst & src */ 728 case BPF_ALU | BPF_AND | BPF_X: 729 case BPF_ALU64 | BPF_AND | BPF_X: 730 emit_insn(ctx, and, dst, dst, src); 731 emit_zext_32(ctx, dst, is32); 732 break; 733 734 /* dst = dst & imm */ 735 case BPF_ALU | BPF_AND | BPF_K: 736 case BPF_ALU64 | BPF_AND | BPF_K: 737 if (is_unsigned_imm12(imm)) { 738 emit_insn(ctx, andi, dst, dst, imm); 739 } else { 740 move_imm(ctx, t1, imm, is32); 741 emit_insn(ctx, and, dst, dst, t1); 742 } 743 emit_zext_32(ctx, dst, is32); 744 break; 745 746 /* dst = dst | src */ 747 case BPF_ALU | BPF_OR | BPF_X: 748 case BPF_ALU64 | BPF_OR | BPF_X: 749 emit_insn(ctx, or, dst, dst, src); 750 emit_zext_32(ctx, dst, is32); 751 break; 752 753 /* dst = dst | imm */ 754 case BPF_ALU | BPF_OR | BPF_K: 755 case BPF_ALU64 | BPF_OR | BPF_K: 756 if (is_unsigned_imm12(imm)) { 757 emit_insn(ctx, ori, dst, dst, imm); 758 } else { 759 move_imm(ctx, t1, imm, is32); 760 emit_insn(ctx, or, dst, dst, t1); 761 } 762 emit_zext_32(ctx, dst, is32); 763 break; 764 765 /* dst = dst ^ src */ 766 case BPF_ALU | BPF_XOR | BPF_X: 767 case BPF_ALU64 | BPF_XOR | BPF_X: 768 emit_insn(ctx, xor, dst, dst, src); 769 emit_zext_32(ctx, dst, is32); 770 break; 771 772 /* dst = dst ^ imm */ 773 case BPF_ALU | BPF_XOR | BPF_K: 774 case BPF_ALU64 | BPF_XOR | BPF_K: 775 if (is_unsigned_imm12(imm)) { 776 emit_insn(ctx, xori, dst, dst, imm); 777 } else { 778 move_imm(ctx, t1, imm, is32); 779 emit_insn(ctx, xor, dst, dst, t1); 780 } 781 emit_zext_32(ctx, dst, is32); 782 break; 783 784 /* dst = dst << src (logical) */ 785 case BPF_ALU | BPF_LSH | BPF_X: 786 emit_insn(ctx, sllw, dst, dst, src); 787 emit_zext_32(ctx, dst, is32); 788 break; 789 790 case BPF_ALU64 | BPF_LSH | BPF_X: 791 emit_insn(ctx, slld, dst, dst, src); 792 break; 793 794 /* dst = dst << imm (logical) */ 795 case BPF_ALU | BPF_LSH | BPF_K: 796 emit_insn(ctx, slliw, dst, dst, imm); 797 emit_zext_32(ctx, dst, is32); 798 break; 799 800 case BPF_ALU64 | BPF_LSH | BPF_K: 801 emit_insn(ctx, sllid, dst, dst, imm); 802 break; 803 804 /* dst = dst >> src (logical) */ 805 case BPF_ALU | BPF_RSH | BPF_X: 806 emit_insn(ctx, srlw, dst, dst, src); 807 emit_zext_32(ctx, dst, is32); 808 break; 809 810 case BPF_ALU64 | BPF_RSH | BPF_X: 811 emit_insn(ctx, srld, dst, dst, src); 812 break; 813 814 /* dst = dst >> imm (logical) */ 815 case BPF_ALU | BPF_RSH | BPF_K: 816 emit_insn(ctx, srliw, dst, dst, imm); 817 emit_zext_32(ctx, dst, is32); 818 break; 819 820 case BPF_ALU64 | BPF_RSH | BPF_K: 821 emit_insn(ctx, srlid, dst, dst, imm); 822 break; 823 824 /* dst = dst >> src (arithmetic) */ 825 case BPF_ALU | BPF_ARSH | BPF_X: 826 emit_insn(ctx, sraw, dst, dst, src); 827 emit_zext_32(ctx, dst, is32); 828 break; 829 830 case BPF_ALU64 | BPF_ARSH | BPF_X: 831 emit_insn(ctx, srad, dst, dst, src); 832 break; 833 834 /* dst = dst >> imm (arithmetic) */ 835 case BPF_ALU | BPF_ARSH | BPF_K: 836 emit_insn(ctx, sraiw, dst, dst, imm); 837 emit_zext_32(ctx, dst, is32); 838 break; 839 840 case BPF_ALU64 | BPF_ARSH | BPF_K: 841 emit_insn(ctx, sraid, dst, dst, imm); 842 break; 843 844 /* dst = BSWAP##imm(dst) */ 845 case BPF_ALU | BPF_END | BPF_FROM_LE: 846 switch (imm) { 847 case 16: 848 /* zero-extend 16 bits into 64 bits */ 849 emit_insn(ctx, bstrpickd, dst, dst, 15, 0); 850 break; 851 case 32: 852 /* zero-extend 32 bits into 64 bits */ 853 emit_zext_32(ctx, dst, is32); 854 break; 855 case 64: 856 /* do nothing */ 857 break; 858 } 859 break; 860 861 case BPF_ALU | BPF_END | BPF_FROM_BE: 862 case BPF_ALU64 | BPF_END | BPF_FROM_LE: 863 switch (imm) { 864 case 16: 865 emit_insn(ctx, revb2h, dst, dst); 866 /* zero-extend 16 bits into 64 bits */ 867 emit_insn(ctx, bstrpickd, dst, dst, 15, 0); 868 break; 869 case 32: 870 emit_insn(ctx, revb2w, dst, dst); 871 /* clear the upper 32 bits */ 872 emit_zext_32(ctx, dst, true); 873 break; 874 case 64: 875 emit_insn(ctx, revbd, dst, dst); 876 break; 877 } 878 break; 879 880 /* PC += off if dst cond src */ 881 case BPF_JMP | BPF_JEQ | BPF_X: 882 case BPF_JMP | BPF_JNE | BPF_X: 883 case BPF_JMP | BPF_JGT | BPF_X: 884 case BPF_JMP | BPF_JGE | BPF_X: 885 case BPF_JMP | BPF_JLT | BPF_X: 886 case BPF_JMP | BPF_JLE | BPF_X: 887 case BPF_JMP | BPF_JSGT | BPF_X: 888 case BPF_JMP | BPF_JSGE | BPF_X: 889 case BPF_JMP | BPF_JSLT | BPF_X: 890 case BPF_JMP | BPF_JSLE | BPF_X: 891 case BPF_JMP32 | BPF_JEQ | BPF_X: 892 case BPF_JMP32 | BPF_JNE | BPF_X: 893 case BPF_JMP32 | BPF_JGT | BPF_X: 894 case BPF_JMP32 | BPF_JGE | BPF_X: 895 case BPF_JMP32 | BPF_JLT | BPF_X: 896 case BPF_JMP32 | BPF_JLE | BPF_X: 897 case BPF_JMP32 | BPF_JSGT | BPF_X: 898 case BPF_JMP32 | BPF_JSGE | BPF_X: 899 case BPF_JMP32 | BPF_JSLT | BPF_X: 900 case BPF_JMP32 | BPF_JSLE | BPF_X: 901 jmp_offset = bpf2la_offset(i, off, ctx); 902 move_reg(ctx, t1, dst); 903 move_reg(ctx, t2, src); 904 if (is_signed_bpf_cond(BPF_OP(code))) { 905 emit_sext_32(ctx, t1, is32); 906 emit_sext_32(ctx, t2, is32); 907 } else { 908 emit_zext_32(ctx, t1, is32); 909 emit_zext_32(ctx, t2, is32); 910 } 911 if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0) 912 goto toofar; 913 break; 914 915 /* PC += off if dst cond imm */ 916 case BPF_JMP | BPF_JEQ | BPF_K: 917 case BPF_JMP | BPF_JNE | BPF_K: 918 case BPF_JMP | BPF_JGT | BPF_K: 919 case BPF_JMP | BPF_JGE | BPF_K: 920 case BPF_JMP | BPF_JLT | BPF_K: 921 case BPF_JMP | BPF_JLE | BPF_K: 922 case BPF_JMP | BPF_JSGT | BPF_K: 923 case BPF_JMP | BPF_JSGE | BPF_K: 924 case BPF_JMP | BPF_JSLT | BPF_K: 925 case BPF_JMP | BPF_JSLE | BPF_K: 926 case BPF_JMP32 | BPF_JEQ | BPF_K: 927 case BPF_JMP32 | BPF_JNE | BPF_K: 928 case BPF_JMP32 | BPF_JGT | BPF_K: 929 case BPF_JMP32 | BPF_JGE | BPF_K: 930 case BPF_JMP32 | BPF_JLT | BPF_K: 931 case BPF_JMP32 | BPF_JLE | BPF_K: 932 case BPF_JMP32 | BPF_JSGT | BPF_K: 933 case BPF_JMP32 | BPF_JSGE | BPF_K: 934 case BPF_JMP32 | BPF_JSLT | BPF_K: 935 case BPF_JMP32 | BPF_JSLE | BPF_K: 936 jmp_offset = bpf2la_offset(i, off, ctx); 937 if (imm) { 938 move_imm(ctx, t1, imm, false); 939 tm = t1; 940 } else { 941 /* If imm is 0, simply use zero register. */ 942 tm = LOONGARCH_GPR_ZERO; 943 } 944 move_reg(ctx, t2, dst); 945 if (is_signed_bpf_cond(BPF_OP(code))) { 946 emit_sext_32(ctx, tm, is32); 947 emit_sext_32(ctx, t2, is32); 948 } else { 949 emit_zext_32(ctx, tm, is32); 950 emit_zext_32(ctx, t2, is32); 951 } 952 if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0) 953 goto toofar; 954 break; 955 956 /* PC += off if dst & src */ 957 case BPF_JMP | BPF_JSET | BPF_X: 958 case BPF_JMP32 | BPF_JSET | BPF_X: 959 jmp_offset = bpf2la_offset(i, off, ctx); 960 emit_insn(ctx, and, t1, dst, src); 961 emit_zext_32(ctx, t1, is32); 962 if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) 963 goto toofar; 964 break; 965 966 /* PC += off if dst & imm */ 967 case BPF_JMP | BPF_JSET | BPF_K: 968 case BPF_JMP32 | BPF_JSET | BPF_K: 969 jmp_offset = bpf2la_offset(i, off, ctx); 970 move_imm(ctx, t1, imm, is32); 971 emit_insn(ctx, and, t1, dst, t1); 972 emit_zext_32(ctx, t1, is32); 973 if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) 974 goto toofar; 975 break; 976 977 /* PC += off */ 978 case BPF_JMP | BPF_JA: 979 case BPF_JMP32 | BPF_JA: 980 if (BPF_CLASS(code) == BPF_JMP) 981 jmp_offset = bpf2la_offset(i, off, ctx); 982 else 983 jmp_offset = bpf2la_offset(i, imm, ctx); 984 if (emit_uncond_jmp(ctx, jmp_offset) < 0) 985 goto toofar; 986 break; 987 988 /* function call */ 989 case BPF_JMP | BPF_CALL: 990 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 991 &func_addr, &func_addr_fixed); 992 if (ret < 0) 993 return ret; 994 995 if (insn->src_reg == BPF_PSEUDO_CALL) { 996 tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size); 997 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off); 998 } 999 1000 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 1001 const struct btf_func_model *m; 1002 int i; 1003 1004 m = bpf_jit_find_kfunc_model(ctx->prog, insn); 1005 if (!m) 1006 return -EINVAL; 1007 1008 for (i = 0; i < m->nr_args; i++) { 1009 u8 reg = regmap[BPF_REG_1 + i]; 1010 bool sign = m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG; 1011 1012 emit_abi_ext(ctx, reg, m->arg_size[i], sign); 1013 } 1014 } 1015 1016 move_addr(ctx, t1, func_addr); 1017 emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); 1018 1019 if (insn->src_reg != BPF_PSEUDO_CALL) 1020 move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); 1021 1022 break; 1023 1024 /* tail call */ 1025 case BPF_JMP | BPF_TAIL_CALL: 1026 if (emit_bpf_tail_call(ctx, i) < 0) 1027 return -EINVAL; 1028 break; 1029 1030 /* function return */ 1031 case BPF_JMP | BPF_EXIT: 1032 if (i == ctx->prog->len - 1) 1033 break; 1034 1035 jmp_offset = epilogue_offset(ctx); 1036 if (emit_uncond_jmp(ctx, jmp_offset) < 0) 1037 goto toofar; 1038 break; 1039 1040 /* dst = imm64 */ 1041 case BPF_LD | BPF_IMM | BPF_DW: 1042 { 1043 const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; 1044 1045 if (bpf_pseudo_func(insn)) 1046 move_addr(ctx, dst, imm64); 1047 else 1048 move_imm(ctx, dst, imm64, is32); 1049 return 1; 1050 } 1051 1052 /* dst = *(size *)(src + off) */ 1053 case BPF_LDX | BPF_MEM | BPF_B: 1054 case BPF_LDX | BPF_MEM | BPF_H: 1055 case BPF_LDX | BPF_MEM | BPF_W: 1056 case BPF_LDX | BPF_MEM | BPF_DW: 1057 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1058 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1059 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1060 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1061 /* dst_reg = (s64)*(signed size *)(src_reg + off) */ 1062 case BPF_LDX | BPF_MEMSX | BPF_B: 1063 case BPF_LDX | BPF_MEMSX | BPF_H: 1064 case BPF_LDX | BPF_MEMSX | BPF_W: 1065 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1066 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1067 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1068 /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + REG_ARENA + off) */ 1069 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1070 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1071 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1072 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1073 sign_extend = BPF_MODE(code) == BPF_MEMSX || 1074 BPF_MODE(code) == BPF_PROBE_MEMSX; 1075 1076 if (BPF_MODE(code) == BPF_PROBE_MEM32) { 1077 emit_insn(ctx, addd, t2, src, REG_ARENA); 1078 src = t2; 1079 } 1080 1081 switch (BPF_SIZE(code)) { 1082 case BPF_B: 1083 if (is_signed_imm12(off)) { 1084 if (sign_extend) 1085 emit_insn(ctx, ldb, dst, src, off); 1086 else 1087 emit_insn(ctx, ldbu, dst, src, off); 1088 } else { 1089 move_imm(ctx, t1, off, is32); 1090 if (sign_extend) 1091 emit_insn(ctx, ldxb, dst, src, t1); 1092 else 1093 emit_insn(ctx, ldxbu, dst, src, t1); 1094 } 1095 break; 1096 case BPF_H: 1097 if (is_signed_imm12(off)) { 1098 if (sign_extend) 1099 emit_insn(ctx, ldh, dst, src, off); 1100 else 1101 emit_insn(ctx, ldhu, dst, src, off); 1102 } else { 1103 move_imm(ctx, t1, off, is32); 1104 if (sign_extend) 1105 emit_insn(ctx, ldxh, dst, src, t1); 1106 else 1107 emit_insn(ctx, ldxhu, dst, src, t1); 1108 } 1109 break; 1110 case BPF_W: 1111 if (is_signed_imm12(off)) { 1112 if (sign_extend) 1113 emit_insn(ctx, ldw, dst, src, off); 1114 else 1115 emit_insn(ctx, ldwu, dst, src, off); 1116 } else { 1117 move_imm(ctx, t1, off, is32); 1118 if (sign_extend) 1119 emit_insn(ctx, ldxw, dst, src, t1); 1120 else 1121 emit_insn(ctx, ldxwu, dst, src, t1); 1122 } 1123 break; 1124 case BPF_DW: 1125 move_imm(ctx, t1, off, is32); 1126 emit_insn(ctx, ldxd, dst, src, t1); 1127 break; 1128 } 1129 1130 ret = add_exception_handler(insn, ctx, dst); 1131 if (ret) 1132 return ret; 1133 break; 1134 1135 /* *(size *)(dst + off) = imm */ 1136 case BPF_ST | BPF_MEM | BPF_B: 1137 case BPF_ST | BPF_MEM | BPF_H: 1138 case BPF_ST | BPF_MEM | BPF_W: 1139 case BPF_ST | BPF_MEM | BPF_DW: 1140 /* ST | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = imm */ 1141 case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1142 case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1143 case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1144 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1145 if (BPF_MODE(code) == BPF_PROBE_MEM32) { 1146 emit_insn(ctx, addd, t3, dst, REG_ARENA); 1147 dst = t3; 1148 } 1149 1150 switch (BPF_SIZE(code)) { 1151 case BPF_B: 1152 move_imm(ctx, t1, imm, is32); 1153 if (is_signed_imm12(off)) { 1154 emit_insn(ctx, stb, t1, dst, off); 1155 } else { 1156 move_imm(ctx, t2, off, is32); 1157 emit_insn(ctx, stxb, t1, dst, t2); 1158 } 1159 break; 1160 case BPF_H: 1161 move_imm(ctx, t1, imm, is32); 1162 if (is_signed_imm12(off)) { 1163 emit_insn(ctx, sth, t1, dst, off); 1164 } else { 1165 move_imm(ctx, t2, off, is32); 1166 emit_insn(ctx, stxh, t1, dst, t2); 1167 } 1168 break; 1169 case BPF_W: 1170 move_imm(ctx, t1, imm, is32); 1171 if (is_signed_imm12(off)) { 1172 emit_insn(ctx, stw, t1, dst, off); 1173 } else if (is_signed_imm14(off)) { 1174 emit_insn(ctx, stptrw, t1, dst, off); 1175 } else { 1176 move_imm(ctx, t2, off, is32); 1177 emit_insn(ctx, stxw, t1, dst, t2); 1178 } 1179 break; 1180 case BPF_DW: 1181 move_imm(ctx, t1, imm, is32); 1182 if (is_signed_imm12(off)) { 1183 emit_insn(ctx, std, t1, dst, off); 1184 } else if (is_signed_imm14(off)) { 1185 emit_insn(ctx, stptrd, t1, dst, off); 1186 } else { 1187 move_imm(ctx, t2, off, is32); 1188 emit_insn(ctx, stxd, t1, dst, t2); 1189 } 1190 break; 1191 } 1192 1193 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER); 1194 if (ret) 1195 return ret; 1196 break; 1197 1198 /* *(size *)(dst + off) = src */ 1199 case BPF_STX | BPF_MEM | BPF_B: 1200 case BPF_STX | BPF_MEM | BPF_H: 1201 case BPF_STX | BPF_MEM | BPF_W: 1202 case BPF_STX | BPF_MEM | BPF_DW: 1203 /* STX | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = src */ 1204 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 1205 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 1206 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 1207 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 1208 if (BPF_MODE(code) == BPF_PROBE_MEM32) { 1209 emit_insn(ctx, addd, t2, dst, REG_ARENA); 1210 dst = t2; 1211 } 1212 1213 switch (BPF_SIZE(code)) { 1214 case BPF_B: 1215 if (is_signed_imm12(off)) { 1216 emit_insn(ctx, stb, src, dst, off); 1217 } else { 1218 move_imm(ctx, t1, off, is32); 1219 emit_insn(ctx, stxb, src, dst, t1); 1220 } 1221 break; 1222 case BPF_H: 1223 if (is_signed_imm12(off)) { 1224 emit_insn(ctx, sth, src, dst, off); 1225 } else { 1226 move_imm(ctx, t1, off, is32); 1227 emit_insn(ctx, stxh, src, dst, t1); 1228 } 1229 break; 1230 case BPF_W: 1231 if (is_signed_imm12(off)) { 1232 emit_insn(ctx, stw, src, dst, off); 1233 } else if (is_signed_imm14(off)) { 1234 emit_insn(ctx, stptrw, src, dst, off); 1235 } else { 1236 move_imm(ctx, t1, off, is32); 1237 emit_insn(ctx, stxw, src, dst, t1); 1238 } 1239 break; 1240 case BPF_DW: 1241 if (is_signed_imm12(off)) { 1242 emit_insn(ctx, std, src, dst, off); 1243 } else if (is_signed_imm14(off)) { 1244 emit_insn(ctx, stptrd, src, dst, off); 1245 } else { 1246 move_imm(ctx, t1, off, is32); 1247 emit_insn(ctx, stxd, src, dst, t1); 1248 } 1249 break; 1250 } 1251 1252 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER); 1253 if (ret) 1254 return ret; 1255 break; 1256 1257 case BPF_STX | BPF_ATOMIC | BPF_W: 1258 case BPF_STX | BPF_ATOMIC | BPF_DW: 1259 emit_atomic(insn, ctx); 1260 break; 1261 1262 /* Speculation barrier */ 1263 case BPF_ST | BPF_NOSPEC: 1264 break; 1265 1266 default: 1267 pr_err("bpf_jit: unknown opcode %02x\n", code); 1268 return -EINVAL; 1269 } 1270 1271 return 0; 1272 1273 toofar: 1274 pr_info_once("bpf_jit: opcode %02x, jump too far\n", code); 1275 return -E2BIG; 1276 } 1277 1278 static int build_body(struct jit_ctx *ctx, bool extra_pass) 1279 { 1280 int i; 1281 const struct bpf_prog *prog = ctx->prog; 1282 1283 for (i = 0; i < prog->len; i++) { 1284 const struct bpf_insn *insn = &prog->insnsi[i]; 1285 int ret; 1286 1287 if (ctx->image == NULL) 1288 ctx->offset[i] = ctx->idx; 1289 1290 ret = build_insn(insn, ctx, extra_pass); 1291 if (ret > 0) { 1292 i++; 1293 if (ctx->image == NULL) 1294 ctx->offset[i] = ctx->idx; 1295 continue; 1296 } 1297 if (ret) 1298 return ret; 1299 } 1300 1301 if (ctx->image == NULL) 1302 ctx->offset[i] = ctx->idx; 1303 1304 return 0; 1305 } 1306 1307 /* Fill space with break instructions */ 1308 static void jit_fill_hole(void *area, unsigned int size) 1309 { 1310 u32 *ptr; 1311 1312 /* We are guaranteed to have aligned memory */ 1313 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 1314 *ptr++ = INSN_BREAK; 1315 } 1316 1317 static int validate_code(struct jit_ctx *ctx) 1318 { 1319 int i; 1320 union loongarch_instruction insn; 1321 1322 for (i = 0; i < ctx->idx; i++) { 1323 insn = ctx->image[i]; 1324 /* Check INSN_BREAK */ 1325 if (insn.word == INSN_BREAK) 1326 return -1; 1327 } 1328 1329 return 0; 1330 } 1331 1332 static int validate_ctx(struct jit_ctx *ctx) 1333 { 1334 if (validate_code(ctx)) 1335 return -1; 1336 1337 if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries)) 1338 return -1; 1339 1340 return 0; 1341 } 1342 1343 static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) 1344 { 1345 if (!target) { 1346 pr_err("bpf_jit: jump target address is error\n"); 1347 return -EFAULT; 1348 } 1349 1350 move_imm(ctx, LOONGARCH_GPR_T1, target, false); 1351 emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); 1352 1353 return 0; 1354 } 1355 1356 static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) 1357 { 1358 int i; 1359 struct jit_ctx ctx; 1360 1361 ctx.idx = 0; 1362 ctx.image = (union loongarch_instruction *)insns; 1363 1364 if (!target) { 1365 for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) 1366 emit_insn((&ctx), nop); 1367 return 0; 1368 } 1369 1370 return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_RA : LOONGARCH_GPR_ZERO, (u64)target); 1371 } 1372 1373 static int emit_call(struct jit_ctx *ctx, u64 addr) 1374 { 1375 return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr); 1376 } 1377 1378 void *bpf_arch_text_copy(void *dst, void *src, size_t len) 1379 { 1380 int ret; 1381 1382 mutex_lock(&text_mutex); 1383 ret = larch_insn_text_copy(dst, src, len); 1384 mutex_unlock(&text_mutex); 1385 1386 return ret ? ERR_PTR(-EINVAL) : dst; 1387 } 1388 1389 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, 1390 enum bpf_text_poke_type new_t, void *old_addr, 1391 void *new_addr) 1392 { 1393 int ret; 1394 bool is_call; 1395 unsigned long size = 0; 1396 unsigned long offset = 0; 1397 void *image = NULL; 1398 char namebuf[KSYM_NAME_LEN]; 1399 u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; 1400 u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; 1401 1402 /* Only poking bpf text is supported. Since kernel function entry 1403 * is set up by ftrace, we rely on ftrace to poke kernel functions. 1404 */ 1405 if (!bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) 1406 return -ENOTSUPP; 1407 1408 image = ip - offset; 1409 1410 /* zero offset means we're poking bpf prog entry */ 1411 if (offset == 0) { 1412 /* skip to the nop instruction in bpf prog entry: 1413 * move t0, ra 1414 * nop 1415 */ 1416 ip = image + LOONGARCH_INSN_SIZE; 1417 } 1418 1419 is_call = old_t == BPF_MOD_CALL; 1420 ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call); 1421 if (ret) 1422 return ret; 1423 1424 if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) 1425 return -EFAULT; 1426 1427 is_call = new_t == BPF_MOD_CALL; 1428 ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call); 1429 if (ret) 1430 return ret; 1431 1432 mutex_lock(&text_mutex); 1433 if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) 1434 ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); 1435 mutex_unlock(&text_mutex); 1436 1437 return ret; 1438 } 1439 1440 int bpf_arch_text_invalidate(void *dst, size_t len) 1441 { 1442 int i; 1443 int ret = 0; 1444 u32 *inst; 1445 1446 inst = kvmalloc(len, GFP_KERNEL); 1447 if (!inst) 1448 return -ENOMEM; 1449 1450 for (i = 0; i < (len / sizeof(u32)); i++) 1451 inst[i] = INSN_BREAK; 1452 1453 mutex_lock(&text_mutex); 1454 if (larch_insn_text_copy(dst, inst, len)) 1455 ret = -EINVAL; 1456 mutex_unlock(&text_mutex); 1457 1458 kvfree(inst); 1459 1460 return ret; 1461 } 1462 1463 static void store_args(struct jit_ctx *ctx, int nargs, int args_off) 1464 { 1465 int i; 1466 1467 for (i = 0; i < nargs; i++) { 1468 emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); 1469 args_off -= 8; 1470 } 1471 } 1472 1473 static void restore_args(struct jit_ctx *ctx, int nargs, int args_off) 1474 { 1475 int i; 1476 1477 for (i = 0; i < nargs; i++) { 1478 emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); 1479 args_off -= 8; 1480 } 1481 } 1482 1483 static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, 1484 int args_off, int retval_off, int run_ctx_off, bool save_ret) 1485 { 1486 int ret; 1487 u32 *branch; 1488 struct bpf_prog *p = l->link.prog; 1489 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 1490 1491 if (l->cookie) { 1492 move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false); 1493 emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); 1494 } else { 1495 emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); 1496 } 1497 1498 /* arg1: prog */ 1499 move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); 1500 /* arg2: &run_ctx */ 1501 emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off); 1502 ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p)); 1503 if (ret) 1504 return ret; 1505 1506 /* store prog start time */ 1507 move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0); 1508 1509 /* 1510 * if (__bpf_prog_enter(prog) == 0) 1511 * goto skip_exec_of_prog; 1512 */ 1513 branch = (u32 *)ctx->image + ctx->idx; 1514 /* nop reserved for conditional jump */ 1515 emit_insn(ctx, nop); 1516 1517 /* arg1: &args_off */ 1518 emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off); 1519 if (!p->jited) 1520 move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false); 1521 ret = emit_call(ctx, (const u64)p->bpf_func); 1522 if (ret) 1523 return ret; 1524 1525 if (save_ret) { 1526 emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); 1527 emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); 1528 } 1529 1530 /* update branch with beqz */ 1531 if (ctx->image) { 1532 int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch; 1533 *branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset); 1534 } 1535 1536 /* arg1: prog */ 1537 move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); 1538 /* arg2: prog start time */ 1539 move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1); 1540 /* arg3: &run_ctx */ 1541 emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off); 1542 ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p)); 1543 1544 return ret; 1545 } 1546 1547 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, 1548 int args_off, int retval_off, int run_ctx_off, u32 **branches) 1549 { 1550 int i; 1551 1552 emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); 1553 for (i = 0; i < tl->nr_links; i++) { 1554 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true); 1555 emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off); 1556 branches[i] = (u32 *)ctx->image + ctx->idx; 1557 emit_insn(ctx, nop); 1558 } 1559 } 1560 1561 void *arch_alloc_bpf_trampoline(unsigned int size) 1562 { 1563 return bpf_prog_pack_alloc(size, jit_fill_hole); 1564 } 1565 1566 void arch_free_bpf_trampoline(void *image, unsigned int size) 1567 { 1568 bpf_prog_pack_free(image, size); 1569 } 1570 1571 /* 1572 * Sign-extend the register if necessary 1573 */ 1574 static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign) 1575 { 1576 /* ABI requires unsigned char/short to be zero-extended */ 1577 if (!sign && (size == 1 || size == 2)) { 1578 if (rd != rj) 1579 move_reg(ctx, rd, rj); 1580 return; 1581 } 1582 1583 switch (size) { 1584 case 1: 1585 emit_insn(ctx, extwb, rd, rj); 1586 break; 1587 case 2: 1588 emit_insn(ctx, extwh, rd, rj); 1589 break; 1590 case 4: 1591 emit_insn(ctx, addiw, rd, rj, 0); 1592 break; 1593 case 8: 1594 if (rd != rj) 1595 move_reg(ctx, rd, rj); 1596 break; 1597 default: 1598 pr_warn("bpf_jit: invalid size %d for sign_extend\n", size); 1599 } 1600 } 1601 1602 static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, 1603 const struct btf_func_model *m, struct bpf_tramp_links *tlinks, 1604 void *func_addr, u32 flags) 1605 { 1606 int i, ret, save_ret; 1607 int stack_size, nargs; 1608 int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off; 1609 bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; 1610 void *orig_call = func_addr; 1611 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 1612 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 1613 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 1614 u32 **branches = NULL; 1615 1616 /* 1617 * FP + 8 [ RA to parent func ] return address to parent 1618 * function 1619 * FP + 0 [ FP of parent func ] frame pointer of parent 1620 * function 1621 * FP - 8 [ T0 to traced func ] return address of traced 1622 * function 1623 * FP - 16 [ FP of traced func ] frame pointer of traced 1624 * function 1625 * 1626 * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 1627 * BPF_TRAMP_F_RET_FENTRY_RET 1628 * [ argN ] 1629 * [ ... ] 1630 * FP - args_off [ arg1 ] 1631 * 1632 * FP - nargs_off [ regs count ] 1633 * 1634 * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG 1635 * 1636 * FP - run_ctx_off [ bpf_tramp_run_ctx ] 1637 * 1638 * FP - sreg_off [ callee saved reg ] 1639 * 1640 * FP - tcc_ptr_off [ tail_call_cnt_ptr ] 1641 */ 1642 1643 if (m->nr_args > LOONGARCH_MAX_REG_ARGS) 1644 return -ENOTSUPP; 1645 1646 /* FIXME: No support of struct argument */ 1647 for (i = 0; i < m->nr_args; i++) { 1648 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 1649 return -ENOTSUPP; 1650 } 1651 1652 if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) 1653 return -ENOTSUPP; 1654 1655 /* Room of trampoline frame to store return address and frame pointer */ 1656 stack_size = 16; 1657 1658 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 1659 if (save_ret) 1660 stack_size += 16; /* Save BPF R0 and A0 */ 1661 1662 retval_off = stack_size; 1663 1664 /* Room of trampoline frame to store args */ 1665 nargs = m->nr_args; 1666 stack_size += nargs * 8; 1667 args_off = stack_size; 1668 1669 /* Room of trampoline frame to store args number */ 1670 stack_size += 8; 1671 nargs_off = stack_size; 1672 1673 /* Room of trampoline frame to store ip address */ 1674 if (flags & BPF_TRAMP_F_IP_ARG) { 1675 stack_size += 8; 1676 ip_off = stack_size; 1677 } 1678 1679 /* Room of trampoline frame to store struct bpf_tramp_run_ctx */ 1680 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 1681 run_ctx_off = stack_size; 1682 1683 stack_size += 8; 1684 sreg_off = stack_size; 1685 1686 /* Room of trampoline frame to store tail_call_cnt_ptr */ 1687 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { 1688 stack_size += 8; 1689 tcc_ptr_off = stack_size; 1690 } 1691 1692 stack_size = round_up(stack_size, 16); 1693 1694 if (is_struct_ops) { 1695 /* 1696 * For the trampoline called directly, just handle 1697 * the frame of trampoline. 1698 */ 1699 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); 1700 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); 1701 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); 1702 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); 1703 } else { 1704 /* 1705 * For the trampoline called from function entry, 1706 * the frame of traced function and the frame of 1707 * trampoline need to be considered. 1708 */ 1709 /* RA and FP for parent function */ 1710 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); 1711 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); 1712 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); 1713 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); 1714 1715 /* RA and FP for traced function */ 1716 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); 1717 emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); 1718 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); 1719 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); 1720 } 1721 1722 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 1723 emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); 1724 1725 /* callee saved register S1 to pass start time */ 1726 emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); 1727 1728 /* store ip address of the traced function */ 1729 if (flags & BPF_TRAMP_F_IP_ARG) { 1730 move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false); 1731 emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off); 1732 } 1733 1734 /* store nargs number */ 1735 move_imm(ctx, LOONGARCH_GPR_T1, nargs, false); 1736 emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off); 1737 1738 store_args(ctx, nargs, args_off); 1739 1740 /* To traced function */ 1741 /* Ftrace jump skips 2 NOP instructions */ 1742 if (is_kernel_text((unsigned long)orig_call) || 1743 is_module_text_address((unsigned long)orig_call)) 1744 orig_call += LOONGARCH_FENTRY_NBYTES; 1745 /* Direct jump skips 5 NOP instructions */ 1746 else if (is_bpf_text_address((unsigned long)orig_call)) 1747 orig_call += LOONGARCH_BPF_FENTRY_NBYTES; 1748 1749 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1750 move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); 1751 ret = emit_call(ctx, (const u64)__bpf_tramp_enter); 1752 if (ret) 1753 return ret; 1754 } 1755 1756 for (i = 0; i < fentry->nr_links; i++) { 1757 ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off, 1758 run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); 1759 if (ret) 1760 return ret; 1761 } 1762 if (fmod_ret->nr_links) { 1763 branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL); 1764 if (!branches) 1765 return -ENOMEM; 1766 1767 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches); 1768 } 1769 1770 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1771 restore_args(ctx, m->nr_args, args_off); 1772 1773 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 1774 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); 1775 1776 ret = emit_call(ctx, (const u64)orig_call); 1777 if (ret) 1778 goto out; 1779 emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); 1780 emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); 1781 im->ip_after_call = ctx->ro_image + ctx->idx; 1782 /* Reserve space for the move_imm + jirl instruction */ 1783 for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) 1784 emit_insn(ctx, nop); 1785 } 1786 1787 for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) { 1788 int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i]; 1789 *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset); 1790 } 1791 1792 for (i = 0; i < fexit->nr_links; i++) { 1793 ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false); 1794 if (ret) 1795 goto out; 1796 } 1797 1798 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1799 im->ip_epilogue = ctx->ro_image + ctx->idx; 1800 move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); 1801 ret = emit_call(ctx, (const u64)__bpf_tramp_exit); 1802 if (ret) 1803 goto out; 1804 } 1805 1806 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1807 restore_args(ctx, m->nr_args, args_off); 1808 1809 if (save_ret) { 1810 emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); 1811 if (is_struct_ops) 1812 sign_extend(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 1813 m->ret_size, m->ret_flags & BTF_FMODEL_SIGNED_ARG); 1814 else 1815 emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); 1816 } 1817 1818 emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); 1819 1820 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 1821 emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_FP, -tcc_ptr_off); 1822 1823 if (is_struct_ops) { 1824 /* trampoline called directly */ 1825 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); 1826 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); 1827 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); 1828 1829 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); 1830 } else { 1831 /* trampoline called from function entry */ 1832 emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); 1833 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); 1834 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); 1835 1836 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); 1837 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); 1838 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); 1839 1840 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 1841 /* return to parent function */ 1842 move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0); 1843 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); 1844 } else { 1845 /* return to traced function */ 1846 move_reg(ctx, LOONGARCH_GPR_T1, LOONGARCH_GPR_RA); 1847 move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0); 1848 emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, 0); 1849 } 1850 } 1851 1852 ret = ctx->idx; 1853 out: 1854 kfree(branches); 1855 1856 return ret; 1857 } 1858 1859 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, 1860 void *ro_image_end, const struct btf_func_model *m, 1861 u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) 1862 { 1863 int ret, size; 1864 void *image, *tmp; 1865 struct jit_ctx ctx; 1866 1867 size = ro_image_end - ro_image; 1868 image = kvmalloc(size, GFP_KERNEL); 1869 if (!image) 1870 return -ENOMEM; 1871 1872 ctx.image = (union loongarch_instruction *)image; 1873 ctx.ro_image = (union loongarch_instruction *)ro_image; 1874 ctx.idx = 0; 1875 1876 jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); 1877 ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags); 1878 if (ret < 0) 1879 goto out; 1880 1881 if (validate_code(&ctx) < 0) { 1882 ret = -EINVAL; 1883 goto out; 1884 } 1885 1886 tmp = bpf_arch_text_copy(ro_image, image, size); 1887 if (IS_ERR(tmp)) { 1888 ret = PTR_ERR(tmp); 1889 goto out; 1890 } 1891 1892 out: 1893 kvfree(image); 1894 return ret < 0 ? ret : size; 1895 } 1896 1897 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 1898 struct bpf_tramp_links *tlinks, void *func_addr) 1899 { 1900 int ret; 1901 struct jit_ctx ctx; 1902 struct bpf_tramp_image im; 1903 1904 ctx.image = NULL; 1905 ctx.idx = 0; 1906 1907 ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags); 1908 1909 return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE; 1910 } 1911 1912 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1913 { 1914 bool tmp_blinded = false, extra_pass = false; 1915 u8 *image_ptr, *ro_image_ptr; 1916 int image_size, prog_size, extable_size; 1917 struct jit_ctx ctx; 1918 struct jit_data *jit_data; 1919 struct bpf_binary_header *header; 1920 struct bpf_binary_header *ro_header; 1921 struct bpf_prog *tmp, *orig_prog = prog; 1922 1923 /* 1924 * If BPF JIT was not enabled then we must fall back to 1925 * the interpreter. 1926 */ 1927 if (!prog->jit_requested) 1928 return orig_prog; 1929 1930 tmp = bpf_jit_blind_constants(prog); 1931 /* 1932 * If blinding was requested and we failed during blinding, 1933 * we must fall back to the interpreter. Otherwise, we save 1934 * the new JITed code. 1935 */ 1936 if (IS_ERR(tmp)) 1937 return orig_prog; 1938 1939 if (tmp != prog) { 1940 tmp_blinded = true; 1941 prog = tmp; 1942 } 1943 1944 jit_data = prog->aux->jit_data; 1945 if (!jit_data) { 1946 jit_data = kzalloc_obj(*jit_data); 1947 if (!jit_data) { 1948 prog = orig_prog; 1949 goto out; 1950 } 1951 prog->aux->jit_data = jit_data; 1952 } 1953 if (jit_data->ctx.offset) { 1954 ctx = jit_data->ctx; 1955 ro_header = jit_data->ro_header; 1956 ro_image_ptr = (void *)ctx.ro_image; 1957 header = jit_data->header; 1958 image_ptr = (void *)header + ((void *)ro_image_ptr - (void *)ro_header); 1959 extra_pass = true; 1960 prog_size = sizeof(u32) * ctx.idx; 1961 goto skip_init_ctx; 1962 } 1963 1964 memset(&ctx, 0, sizeof(ctx)); 1965 ctx.prog = prog; 1966 ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); 1967 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); 1968 1969 ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); 1970 if (ctx.offset == NULL) { 1971 prog = orig_prog; 1972 goto out_offset; 1973 } 1974 1975 /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ 1976 build_prologue(&ctx); 1977 if (build_body(&ctx, extra_pass)) { 1978 prog = orig_prog; 1979 goto out_offset; 1980 } 1981 ctx.epilogue_offset = ctx.idx; 1982 build_epilogue(&ctx); 1983 1984 extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); 1985 1986 /* Now we know the actual image size. 1987 * As each LoongArch instruction is of length 32bit, 1988 * we are translating number of JITed intructions into 1989 * the size required to store these JITed code. 1990 */ 1991 prog_size = sizeof(u32) * ctx.idx; 1992 image_size = prog_size + extable_size; 1993 /* Now we know the size of the structure to make */ 1994 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32), 1995 &header, &image_ptr, jit_fill_hole); 1996 if (!ro_header) { 1997 prog = orig_prog; 1998 goto out_offset; 1999 } 2000 2001 /* 2. Now, the actual pass to generate final JIT code */ 2002 /* 2003 * Use the image (RW) for writing the JITed instructions. But also save 2004 * the ro_image (RX) for calculating the offsets in the image. The RW 2005 * image will be later copied to the RX image from where the program will 2006 * run. The bpf_jit_binary_pack_finalize() will do this copy in the final 2007 * step. 2008 */ 2009 ctx.image = (union loongarch_instruction *)image_ptr; 2010 ctx.ro_image = (union loongarch_instruction *)ro_image_ptr; 2011 if (extable_size) 2012 prog->aux->extable = (void *)ro_image_ptr + prog_size; 2013 2014 skip_init_ctx: 2015 ctx.idx = 0; 2016 ctx.num_exentries = 0; 2017 2018 build_prologue(&ctx); 2019 if (build_body(&ctx, extra_pass)) { 2020 prog = orig_prog; 2021 goto out_free; 2022 } 2023 build_epilogue(&ctx); 2024 2025 /* 3. Extra pass to validate JITed code */ 2026 if (validate_ctx(&ctx)) { 2027 prog = orig_prog; 2028 goto out_free; 2029 } 2030 2031 /* And we're done */ 2032 if (bpf_jit_enable > 1) 2033 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 2034 2035 if (!prog->is_func || extra_pass) { 2036 if (extra_pass && ctx.idx != jit_data->ctx.idx) { 2037 pr_err_once("multi-func JIT bug %d != %d\n", 2038 ctx.idx, jit_data->ctx.idx); 2039 goto out_free; 2040 } 2041 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { 2042 /* ro_header has been freed */ 2043 ro_header = NULL; 2044 prog = orig_prog; 2045 goto out_free; 2046 } 2047 /* 2048 * The instructions have now been copied to the ROX region from 2049 * where they will execute. Now the data cache has to be cleaned 2050 * to the PoU and the I-cache has to be invalidated for the VAs. 2051 */ 2052 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); 2053 } else { 2054 jit_data->ctx = ctx; 2055 jit_data->header = header; 2056 jit_data->ro_header = ro_header; 2057 } 2058 prog->jited = 1; 2059 prog->jited_len = prog_size; 2060 prog->bpf_func = (void *)ctx.ro_image; 2061 2062 if (!prog->is_func || extra_pass) { 2063 int i; 2064 2065 /* offset[prog->len] is the size of program */ 2066 for (i = 0; i <= prog->len; i++) 2067 ctx.offset[i] *= LOONGARCH_INSN_SIZE; 2068 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); 2069 2070 out_offset: 2071 kvfree(ctx.offset); 2072 kfree(jit_data); 2073 prog->aux->jit_data = NULL; 2074 } 2075 2076 out: 2077 if (tmp_blinded) 2078 bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); 2079 2080 return prog; 2081 2082 out_free: 2083 if (header) { 2084 bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size)); 2085 bpf_jit_binary_pack_free(ro_header, header); 2086 } 2087 goto out_offset; 2088 } 2089 2090 void bpf_jit_free(struct bpf_prog *prog) 2091 { 2092 if (prog->jited) { 2093 struct jit_data *jit_data = prog->aux->jit_data; 2094 struct bpf_binary_header *hdr; 2095 2096 /* 2097 * If we fail the final pass of JIT (from jit_subprogs), the 2098 * program may not be finalized yet. Call finalize here before 2099 * freeing it. 2100 */ 2101 if (jit_data) { 2102 bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header); 2103 kfree(jit_data); 2104 } 2105 hdr = bpf_jit_binary_pack_hdr(prog); 2106 bpf_jit_binary_pack_free(hdr, NULL); 2107 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 2108 } 2109 2110 bpf_prog_unlock_free(prog); 2111 } 2112 2113 bool bpf_jit_bypass_spec_v1(void) 2114 { 2115 return true; 2116 } 2117 2118 bool bpf_jit_bypass_spec_v4(void) 2119 { 2120 return true; 2121 } 2122 2123 bool bpf_jit_supports_arena(void) 2124 { 2125 return true; 2126 } 2127 2128 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 2129 bool bpf_jit_supports_subprog_tailcalls(void) 2130 { 2131 return true; 2132 } 2133