1 // SPDX-License-Identifier: GPL-2.0 2 /* BPF JIT compiler for RV64G 3 * 4 * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com> 5 * 6 */ 7 8 #include <linux/bitfield.h> 9 #include <linux/bpf.h> 10 #include <linux/filter.h> 11 #include <linux/memory.h> 12 #include <linux/stop_machine.h> 13 #include <asm/text-patching.h> 14 #include <asm/cfi.h> 15 #include <asm/percpu.h> 16 #include "bpf_jit.h" 17 18 #define RV_MAX_REG_ARGS 8 19 #define RV_FENTRY_NINSNS 2 20 #define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4) 21 #define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI) ? 1 : 0) 22 /* imm that allows emit_imm to emit max count insns */ 23 #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF 24 25 #define RV_REG_TCC RV_REG_A6 26 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ 27 #define RV_REG_ARENA RV_REG_S7 /* For storing arena_vm_start */ 28 29 static const int regmap[] = { 30 [BPF_REG_0] = RV_REG_A5, 31 [BPF_REG_1] = RV_REG_A0, 32 [BPF_REG_2] = RV_REG_A1, 33 [BPF_REG_3] = RV_REG_A2, 34 [BPF_REG_4] = RV_REG_A3, 35 [BPF_REG_5] = RV_REG_A4, 36 [BPF_REG_6] = RV_REG_S1, 37 [BPF_REG_7] = RV_REG_S2, 38 [BPF_REG_8] = RV_REG_S3, 39 [BPF_REG_9] = RV_REG_S4, 40 [BPF_REG_FP] = RV_REG_S5, 41 [BPF_REG_AX] = RV_REG_T0, 42 }; 43 44 static const int pt_regmap[] = { 45 [RV_REG_A0] = offsetof(struct pt_regs, a0), 46 [RV_REG_A1] = offsetof(struct pt_regs, a1), 47 [RV_REG_A2] = offsetof(struct pt_regs, a2), 48 [RV_REG_A3] = offsetof(struct pt_regs, a3), 49 [RV_REG_A4] = offsetof(struct pt_regs, a4), 50 [RV_REG_A5] = offsetof(struct pt_regs, a5), 51 [RV_REG_S1] = offsetof(struct pt_regs, s1), 52 [RV_REG_S2] = offsetof(struct pt_regs, s2), 53 [RV_REG_S3] = offsetof(struct pt_regs, s3), 54 [RV_REG_S4] = offsetof(struct pt_regs, s4), 55 [RV_REG_S5] = offsetof(struct pt_regs, s5), 56 [RV_REG_T0] = offsetof(struct pt_regs, t0), 57 }; 58 59 enum { 60 RV_CTX_F_SEEN_TAIL_CALL = 0, 61 RV_CTX_F_SEEN_CALL = RV_REG_RA, 62 RV_CTX_F_SEEN_S1 = RV_REG_S1, 63 RV_CTX_F_SEEN_S2 = RV_REG_S2, 64 RV_CTX_F_SEEN_S3 = RV_REG_S3, 65 RV_CTX_F_SEEN_S4 = RV_REG_S4, 66 RV_CTX_F_SEEN_S5 = RV_REG_S5, 67 RV_CTX_F_SEEN_S6 = RV_REG_S6, 68 }; 69 70 static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) 71 { 72 u8 reg = regmap[bpf_reg]; 73 74 switch (reg) { 75 case RV_CTX_F_SEEN_S1: 76 case RV_CTX_F_SEEN_S2: 77 case RV_CTX_F_SEEN_S3: 78 case RV_CTX_F_SEEN_S4: 79 case RV_CTX_F_SEEN_S5: 80 case RV_CTX_F_SEEN_S6: 81 __set_bit(reg, &ctx->flags); 82 } 83 return reg; 84 }; 85 86 static bool seen_reg(int reg, struct rv_jit_context *ctx) 87 { 88 switch (reg) { 89 case RV_CTX_F_SEEN_CALL: 90 case RV_CTX_F_SEEN_S1: 91 case RV_CTX_F_SEEN_S2: 92 case RV_CTX_F_SEEN_S3: 93 case RV_CTX_F_SEEN_S4: 94 case RV_CTX_F_SEEN_S5: 95 case RV_CTX_F_SEEN_S6: 96 return test_bit(reg, &ctx->flags); 97 } 98 return false; 99 } 100 101 static void mark_fp(struct rv_jit_context *ctx) 102 { 103 __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); 104 } 105 106 static void mark_call(struct rv_jit_context *ctx) 107 { 108 __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); 109 } 110 111 static bool seen_call(struct rv_jit_context *ctx) 112 { 113 return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); 114 } 115 116 static void mark_tail_call(struct rv_jit_context *ctx) 117 { 118 __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); 119 } 120 121 static bool seen_tail_call(struct rv_jit_context *ctx) 122 { 123 return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); 124 } 125 126 static u8 rv_tail_call_reg(struct rv_jit_context *ctx) 127 { 128 mark_tail_call(ctx); 129 130 if (seen_call(ctx)) { 131 __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags); 132 return RV_REG_S6; 133 } 134 return RV_REG_A6; 135 } 136 137 static bool is_32b_int(s64 val) 138 { 139 return -(1L << 31) <= val && val < (1L << 31); 140 } 141 142 static bool in_auipc_jalr_range(s64 val) 143 { 144 /* 145 * auipc+jalr can reach any signed PC-relative offset in the range 146 * [-2^31 - 2^11, 2^31 - 2^11). 147 */ 148 return (-(1L << 31) - (1L << 11)) <= val && 149 val < ((1L << 31) - (1L << 11)); 150 } 151 152 /* Modify rd pointer to alternate reg to avoid corrupting original reg */ 153 static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) 154 { 155 emit_sextw(ra, *rd, ctx); 156 *rd = ra; 157 } 158 159 static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) 160 { 161 emit_zextw(ra, *rd, ctx); 162 *rd = ra; 163 } 164 165 /* Emit fixed-length instructions for address */ 166 static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx) 167 { 168 /* 169 * Use the ro_insns(RX) to calculate the offset as the BPF program will 170 * finally run from this memory region. 171 */ 172 u64 ip = (u64)(ctx->ro_insns + ctx->ninsns); 173 s64 off = addr - ip; 174 s64 upper = (off + (1 << 11)) >> 12; 175 s64 lower = off & 0xfff; 176 177 if (extra_pass && !in_auipc_jalr_range(off)) { 178 pr_err("bpf-jit: target offset 0x%llx is out of range\n", off); 179 return -ERANGE; 180 } 181 182 emit(rv_auipc(rd, upper), ctx); 183 emit(rv_addi(rd, rd, lower), ctx); 184 return 0; 185 } 186 187 /* Emit variable-length instructions for 32-bit and 64-bit imm */ 188 static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) 189 { 190 /* Note that the immediate from the add is sign-extended, 191 * which means that we need to compensate this by adding 2^12, 192 * when the 12th bit is set. A simpler way of doing this, and 193 * getting rid of the check, is to just add 2**11 before the 194 * shift. The "Loading a 32-Bit constant" example from the 195 * "Computer Organization and Design, RISC-V edition" book by 196 * Patterson/Hennessy highlights this fact. 197 * 198 * This also means that we need to process LSB to MSB. 199 */ 200 s64 upper = (val + (1 << 11)) >> 12; 201 /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw, 202 * and addi are signed and RVC checks will perform signed comparisons. 203 */ 204 s64 lower = ((val & 0xfff) << 52) >> 52; 205 int shift; 206 207 if (is_32b_int(val)) { 208 if (upper) 209 emit_lui(rd, upper, ctx); 210 211 if (!upper) { 212 emit_li(rd, lower, ctx); 213 return; 214 } 215 216 emit_addiw(rd, rd, lower, ctx); 217 return; 218 } 219 220 shift = __ffs(upper); 221 upper >>= shift; 222 shift += 12; 223 224 emit_imm(rd, upper, ctx); 225 226 emit_slli(rd, rd, shift, ctx); 227 if (lower) 228 emit_addi(rd, rd, lower, ctx); 229 } 230 231 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) 232 { 233 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; 234 235 if (seen_reg(RV_REG_RA, ctx)) { 236 emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx); 237 store_offset -= 8; 238 } 239 emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx); 240 store_offset -= 8; 241 if (seen_reg(RV_REG_S1, ctx)) { 242 emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx); 243 store_offset -= 8; 244 } 245 if (seen_reg(RV_REG_S2, ctx)) { 246 emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx); 247 store_offset -= 8; 248 } 249 if (seen_reg(RV_REG_S3, ctx)) { 250 emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx); 251 store_offset -= 8; 252 } 253 if (seen_reg(RV_REG_S4, ctx)) { 254 emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx); 255 store_offset -= 8; 256 } 257 if (seen_reg(RV_REG_S5, ctx)) { 258 emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx); 259 store_offset -= 8; 260 } 261 if (seen_reg(RV_REG_S6, ctx)) { 262 emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); 263 store_offset -= 8; 264 } 265 if (ctx->arena_vm_start) { 266 emit_ld(RV_REG_ARENA, store_offset, RV_REG_SP, ctx); 267 store_offset -= 8; 268 } 269 270 emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); 271 /* Set return value. */ 272 if (!is_tail_call) 273 emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); 274 emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 275 /* kcfi, fentry and TCC init insns will be skipped on tailcall */ 276 is_tail_call ? (RV_KCFI_NINSNS + RV_FENTRY_NINSNS + 1) * 4 : 0, 277 ctx); 278 } 279 280 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, 281 struct rv_jit_context *ctx) 282 { 283 switch (cond) { 284 case BPF_JEQ: 285 emit(rv_beq(rd, rs, rvoff >> 1), ctx); 286 return; 287 case BPF_JGT: 288 emit(rv_bltu(rs, rd, rvoff >> 1), ctx); 289 return; 290 case BPF_JLT: 291 emit(rv_bltu(rd, rs, rvoff >> 1), ctx); 292 return; 293 case BPF_JGE: 294 emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); 295 return; 296 case BPF_JLE: 297 emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); 298 return; 299 case BPF_JNE: 300 emit(rv_bne(rd, rs, rvoff >> 1), ctx); 301 return; 302 case BPF_JSGT: 303 emit(rv_blt(rs, rd, rvoff >> 1), ctx); 304 return; 305 case BPF_JSLT: 306 emit(rv_blt(rd, rs, rvoff >> 1), ctx); 307 return; 308 case BPF_JSGE: 309 emit(rv_bge(rd, rs, rvoff >> 1), ctx); 310 return; 311 case BPF_JSLE: 312 emit(rv_bge(rs, rd, rvoff >> 1), ctx); 313 } 314 } 315 316 static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, 317 struct rv_jit_context *ctx) 318 { 319 s64 upper, lower; 320 321 if (is_13b_int(rvoff)) { 322 emit_bcc(cond, rd, rs, rvoff, ctx); 323 return; 324 } 325 326 /* Adjust for jal */ 327 rvoff -= 4; 328 329 /* Transform, e.g.: 330 * bne rd,rs,foo 331 * to 332 * beq rd,rs,<.L1> 333 * (auipc foo) 334 * jal(r) foo 335 * .L1 336 */ 337 cond = invert_bpf_cond(cond); 338 if (is_21b_int(rvoff)) { 339 emit_bcc(cond, rd, rs, 8, ctx); 340 emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); 341 return; 342 } 343 344 /* 32b No need for an additional rvoff adjustment, since we 345 * get that from the auipc at PC', where PC = PC' + 4. 346 */ 347 upper = (rvoff + (1 << 11)) >> 12; 348 lower = rvoff & 0xfff; 349 350 emit_bcc(cond, rd, rs, 12, ctx); 351 emit(rv_auipc(RV_REG_T1, upper), ctx); 352 emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); 353 } 354 355 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) 356 { 357 int tc_ninsn, off, start_insn = ctx->ninsns; 358 u8 tcc = rv_tail_call_reg(ctx); 359 360 /* a0: &ctx 361 * a1: &array 362 * a2: index 363 * 364 * if (index >= array->map.max_entries) 365 * goto out; 366 */ 367 tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : 368 ctx->offset[0]; 369 emit_zextw(RV_REG_A2, RV_REG_A2, ctx); 370 371 off = offsetof(struct bpf_array, map.max_entries); 372 if (is_12b_check(off, insn)) 373 return -1; 374 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); 375 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 376 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); 377 378 /* if (--TCC < 0) 379 * goto out; 380 */ 381 emit_addi(RV_REG_TCC, tcc, -1, ctx); 382 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 383 emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); 384 385 /* prog = array->ptrs[index]; 386 * if (!prog) 387 * goto out; 388 */ 389 emit_sh3add(RV_REG_T2, RV_REG_A2, RV_REG_A1, ctx); 390 off = offsetof(struct bpf_array, ptrs); 391 if (is_12b_check(off, insn)) 392 return -1; 393 emit_ld(RV_REG_T2, off, RV_REG_T2, ctx); 394 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 395 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); 396 397 /* goto *(prog->bpf_func + 4); */ 398 off = offsetof(struct bpf_prog, bpf_func); 399 if (is_12b_check(off, insn)) 400 return -1; 401 emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); 402 __build_epilogue(true, ctx); 403 return 0; 404 } 405 406 static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, 407 struct rv_jit_context *ctx) 408 { 409 u8 code = insn->code; 410 411 switch (code) { 412 case BPF_JMP | BPF_JA: 413 case BPF_JMP | BPF_CALL: 414 case BPF_JMP | BPF_EXIT: 415 case BPF_JMP | BPF_TAIL_CALL: 416 break; 417 default: 418 *rd = bpf_to_rv_reg(insn->dst_reg, ctx); 419 } 420 421 if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) || 422 code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) || 423 code & BPF_LDX || code & BPF_STX) 424 *rs = bpf_to_rv_reg(insn->src_reg, ctx); 425 } 426 427 static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr, 428 struct rv_jit_context *ctx) 429 { 430 s64 upper, lower; 431 432 if (rvoff && fixed_addr && is_21b_int(rvoff)) { 433 emit(rv_jal(rd, rvoff >> 1), ctx); 434 return 0; 435 } else if (in_auipc_jalr_range(rvoff)) { 436 upper = (rvoff + (1 << 11)) >> 12; 437 lower = rvoff & 0xfff; 438 emit(rv_auipc(RV_REG_T1, upper), ctx); 439 emit(rv_jalr(rd, RV_REG_T1, lower), ctx); 440 return 0; 441 } 442 443 pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff); 444 return -ERANGE; 445 } 446 447 static bool is_signed_bpf_cond(u8 cond) 448 { 449 return cond == BPF_JSGT || cond == BPF_JSLT || 450 cond == BPF_JSGE || cond == BPF_JSLE; 451 } 452 453 static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx) 454 { 455 s64 off = 0; 456 u64 ip; 457 458 if (addr && ctx->insns && ctx->ro_insns) { 459 /* 460 * Use the ro_insns(RX) to calculate the offset as the BPF 461 * program will finally run from this memory region. 462 */ 463 ip = (u64)(long)(ctx->ro_insns + ctx->ninsns); 464 off = addr - ip; 465 } 466 467 return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx); 468 } 469 470 static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx) 471 { 472 if (IS_ENABLED(CONFIG_CFI)) 473 emit(hash, ctx); 474 } 475 476 static void emit_ldx_insn(u8 rd, s16 off, u8 rs, u8 size, bool sign_ext, 477 struct rv_jit_context *ctx) 478 { 479 switch (size) { 480 case BPF_B: 481 emit(sign_ext ? rv_lb(rd, off, rs) : rv_lbu(rd, off, rs), ctx); 482 break; 483 case BPF_H: 484 emit(sign_ext ? rv_lh(rd, off, rs) : rv_lhu(rd, off, rs), ctx); 485 break; 486 case BPF_W: 487 emit(sign_ext ? rv_lw(rd, off, rs) : rv_lwu(rd, off, rs), ctx); 488 break; 489 case BPF_DW: 490 emit_ld(rd, off, rs, ctx); 491 break; 492 } 493 494 } 495 496 static void emit_stx_insn(u8 rd, s16 off, u8 rs, u8 size, struct rv_jit_context *ctx) 497 { 498 switch (size) { 499 case BPF_B: 500 emit(rv_sb(rd, off, rs), ctx); 501 break; 502 case BPF_H: 503 emit(rv_sh(rd, off, rs), ctx); 504 break; 505 case BPF_W: 506 emit_sw(rd, off, rs, ctx); 507 break; 508 case BPF_DW: 509 emit_sd(rd, off, rs, ctx); 510 break; 511 } 512 } 513 514 static void emit_ldx(u8 rd, s16 off, u8 rs, u8 size, bool sign_ext, 515 struct rv_jit_context *ctx) 516 { 517 if (is_12b_int(off)) { 518 ctx->ex_insn_off = ctx->ninsns; 519 emit_ldx_insn(rd, off, rs, size, sign_ext, ctx); 520 ctx->ex_jmp_off = ctx->ninsns; 521 return; 522 } 523 524 emit_imm(RV_REG_T1, off, ctx); 525 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 526 ctx->ex_insn_off = ctx->ninsns; 527 emit_ldx_insn(rd, 0, RV_REG_T1, size, sign_ext, ctx); 528 ctx->ex_jmp_off = ctx->ninsns; 529 } 530 531 static void emit_st(u8 rd, s16 off, s32 imm, u8 size, struct rv_jit_context *ctx) 532 { 533 emit_imm(RV_REG_T1, imm, ctx); 534 if (is_12b_int(off)) { 535 ctx->ex_insn_off = ctx->ninsns; 536 emit_stx_insn(rd, off, RV_REG_T1, size, ctx); 537 ctx->ex_jmp_off = ctx->ninsns; 538 return; 539 } 540 541 emit_imm(RV_REG_T2, off, ctx); 542 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 543 ctx->ex_insn_off = ctx->ninsns; 544 emit_stx_insn(RV_REG_T2, 0, RV_REG_T1, size, ctx); 545 ctx->ex_jmp_off = ctx->ninsns; 546 } 547 548 static void emit_stx(u8 rd, s16 off, u8 rs, u8 size, struct rv_jit_context *ctx) 549 { 550 if (is_12b_int(off)) { 551 ctx->ex_insn_off = ctx->ninsns; 552 emit_stx_insn(rd, off, rs, size, ctx); 553 ctx->ex_jmp_off = ctx->ninsns; 554 return; 555 } 556 557 emit_imm(RV_REG_T1, off, ctx); 558 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 559 ctx->ex_insn_off = ctx->ninsns; 560 emit_stx_insn(RV_REG_T1, 0, rs, size, ctx); 561 ctx->ex_jmp_off = ctx->ninsns; 562 } 563 564 static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn, 565 struct rv_jit_context *ctx) 566 { 567 u8 code = insn->code; 568 s32 imm = insn->imm; 569 s16 off = insn->off; 570 571 switch (imm) { 572 /* dst_reg = load_acquire(src_reg + off16) */ 573 case BPF_LOAD_ACQ: 574 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { 575 emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx); 576 rs = RV_REG_T2; 577 } 578 579 emit_ldx(rd, off, rs, BPF_SIZE(code), false, ctx); 580 emit_fence_r_rw(ctx); 581 582 /* If our next insn is a redundant zext, return 1 to tell 583 * build_body() to skip it. 584 */ 585 if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) 586 return 1; 587 break; 588 /* store_release(dst_reg + off16, src_reg) */ 589 case BPF_STORE_REL: 590 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { 591 emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx); 592 rd = RV_REG_T2; 593 } 594 595 emit_fence_rw_w(ctx); 596 emit_stx(rd, off, rs, BPF_SIZE(code), ctx); 597 break; 598 default: 599 pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm); 600 return -EINVAL; 601 } 602 603 return 0; 604 } 605 606 static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn, 607 struct rv_jit_context *ctx) 608 { 609 u8 code = insn->code; 610 s16 off = insn->off; 611 s32 imm = insn->imm; 612 bool is64 = BPF_SIZE(code) == BPF_DW; 613 614 if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) { 615 pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n"); 616 return -EINVAL; 617 } 618 619 if (off) { 620 if (is_12b_int(off)) { 621 emit_addi(RV_REG_T1, rd, off, ctx); 622 } else { 623 emit_imm(RV_REG_T1, off, ctx); 624 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 625 } 626 rd = RV_REG_T1; 627 } 628 629 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { 630 emit_add(RV_REG_T1, rd, RV_REG_ARENA, ctx); 631 rd = RV_REG_T1; 632 } 633 634 switch (imm) { 635 /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */ 636 case BPF_ADD: 637 ctx->ex_insn_off = ctx->ninsns; 638 emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) : 639 rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 640 ctx->ex_jmp_off = ctx->ninsns; 641 break; 642 case BPF_AND: 643 ctx->ex_insn_off = ctx->ninsns; 644 emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) : 645 rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 646 ctx->ex_jmp_off = ctx->ninsns; 647 break; 648 case BPF_OR: 649 ctx->ex_insn_off = ctx->ninsns; 650 emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) : 651 rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 652 ctx->ex_jmp_off = ctx->ninsns; 653 break; 654 case BPF_XOR: 655 ctx->ex_insn_off = ctx->ninsns; 656 emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) : 657 rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 658 ctx->ex_jmp_off = ctx->ninsns; 659 break; 660 /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ 661 case BPF_ADD | BPF_FETCH: 662 ctx->ex_insn_off = ctx->ninsns; 663 emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) : 664 rv_amoadd_w(rs, rs, rd, 1, 1), ctx); 665 ctx->ex_jmp_off = ctx->ninsns; 666 if (!is64) 667 emit_zextw(rs, rs, ctx); 668 break; 669 case BPF_AND | BPF_FETCH: 670 ctx->ex_insn_off = ctx->ninsns; 671 emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) : 672 rv_amoand_w(rs, rs, rd, 1, 1), ctx); 673 ctx->ex_jmp_off = ctx->ninsns; 674 if (!is64) 675 emit_zextw(rs, rs, ctx); 676 break; 677 case BPF_OR | BPF_FETCH: 678 ctx->ex_insn_off = ctx->ninsns; 679 emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) : 680 rv_amoor_w(rs, rs, rd, 1, 1), ctx); 681 ctx->ex_jmp_off = ctx->ninsns; 682 if (!is64) 683 emit_zextw(rs, rs, ctx); 684 break; 685 case BPF_XOR | BPF_FETCH: 686 ctx->ex_insn_off = ctx->ninsns; 687 emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) : 688 rv_amoxor_w(rs, rs, rd, 1, 1), ctx); 689 ctx->ex_jmp_off = ctx->ninsns; 690 if (!is64) 691 emit_zextw(rs, rs, ctx); 692 break; 693 /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ 694 case BPF_XCHG: 695 ctx->ex_insn_off = ctx->ninsns; 696 emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) : 697 rv_amoswap_w(rs, rs, rd, 1, 1), ctx); 698 ctx->ex_jmp_off = ctx->ninsns; 699 if (!is64) 700 emit_zextw(rs, rs, ctx); 701 break; 702 /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ 703 case BPF_CMPXCHG: 704 emit_cmpxchg(rd, rs, regmap[BPF_REG_0], is64, ctx); 705 break; 706 default: 707 pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm); 708 return -EINVAL; 709 } 710 711 return 0; 712 } 713 714 /* 715 * Sign-extend the register if necessary 716 */ 717 static int sign_extend(u8 rd, u8 rs, u8 sz, bool sign, struct rv_jit_context *ctx) 718 { 719 if (!sign && (sz == 1 || sz == 2)) { 720 if (rd != rs) 721 emit_mv(rd, rs, ctx); 722 return 0; 723 } 724 725 switch (sz) { 726 case 1: 727 emit_sextb(rd, rs, ctx); 728 break; 729 case 2: 730 emit_sexth(rd, rs, ctx); 731 break; 732 case 4: 733 emit_sextw(rd, rs, ctx); 734 break; 735 case 8: 736 if (rd != rs) 737 emit_mv(rd, rs, ctx); 738 break; 739 default: 740 pr_err("bpf-jit: invalid size %d for sign_extend\n", sz); 741 return -EINVAL; 742 } 743 744 return 0; 745 } 746 747 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 748 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 749 #define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */ 750 751 bool ex_handler_bpf(const struct exception_table_entry *ex, 752 struct pt_regs *regs) 753 { 754 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 755 int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 756 757 if (regs_offset != REG_DONT_CLEAR_MARKER) 758 *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; 759 regs->epc = (unsigned long)&ex->fixup - offset; 760 761 return true; 762 } 763 764 /* For accesses to BTF pointers, add an entry to the exception table */ 765 static int add_exception_handler(const struct bpf_insn *insn, int dst_reg, 766 struct rv_jit_context *ctx) 767 { 768 struct exception_table_entry *ex; 769 unsigned long pc; 770 off_t ins_offset; 771 off_t fixup_offset; 772 773 if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable || 774 ctx->ex_insn_off <= 0 || ctx->ex_jmp_off <= 0) 775 return 0; 776 777 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 778 BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 779 BPF_MODE(insn->code) != BPF_PROBE_MEM32 && 780 BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) 781 return 0; 782 783 if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) 784 return -EINVAL; 785 786 if (WARN_ON_ONCE(ctx->ex_insn_off > ctx->ninsns || ctx->ex_jmp_off > ctx->ninsns)) 787 return -EINVAL; 788 789 ex = &ctx->prog->aux->extable[ctx->nexentries]; 790 pc = (unsigned long)&ctx->ro_insns[ctx->ex_insn_off]; 791 792 /* 793 * This is the relative offset of the instruction that may fault from 794 * the exception table itself. This will be written to the exception 795 * table and if this instruction faults, the destination register will 796 * be set to '0' and the execution will jump to the next instruction. 797 */ 798 ins_offset = pc - (long)&ex->insn; 799 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 800 return -ERANGE; 801 802 /* 803 * Since the extable follows the program, the fixup offset is always 804 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 805 * to keep things simple, and put the destination register in the upper 806 * bits. We don't need to worry about buildtime or runtime sort 807 * modifying the upper bits because the table is already sorted, and 808 * isn't part of the main exception table. 809 * 810 * The fixup_offset is set to the next instruction from the instruction 811 * that may fault. The execution will jump to this after handling the 812 * fault. 813 */ 814 fixup_offset = (long)&ex->fixup - (long)&ctx->ro_insns[ctx->ex_jmp_off]; 815 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 816 return -ERANGE; 817 818 /* 819 * The offsets above have been calculated using the RO buffer but we 820 * need to use the R/W buffer for writes. 821 * switch ex to rw buffer for writing. 822 */ 823 ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns); 824 825 ex->insn = ins_offset; 826 827 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 828 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 829 ex->type = EX_TYPE_BPF; 830 831 ctx->ex_insn_off = 0; 832 ctx->ex_jmp_off = 0; 833 ctx->nexentries++; 834 return 0; 835 } 836 837 static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) 838 { 839 s64 rvoff; 840 struct rv_jit_context ctx; 841 842 ctx.ninsns = 0; 843 ctx.insns = (u16 *)insns; 844 845 if (!target) { 846 emit(rv_nop(), &ctx); 847 emit(rv_nop(), &ctx); 848 return 0; 849 } 850 851 rvoff = (s64)(target - ip); 852 return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx); 853 } 854 855 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, 856 enum bpf_text_poke_type new_t, void *old_addr, 857 void *new_addr) 858 { 859 u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS]; 860 bool is_call; 861 int ret; 862 863 if (!is_kernel_text((unsigned long)ip) && 864 !is_bpf_text_address((unsigned long)ip)) 865 return -ENOTSUPP; 866 867 is_call = old_t == BPF_MOD_CALL; 868 ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); 869 if (ret) 870 return ret; 871 872 if (memcmp(ip, old_insns, RV_FENTRY_NBYTES)) 873 return -EFAULT; 874 875 is_call = new_t == BPF_MOD_CALL; 876 ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); 877 if (ret) 878 return ret; 879 880 cpus_read_lock(); 881 mutex_lock(&text_mutex); 882 if (memcmp(ip, new_insns, RV_FENTRY_NBYTES)) 883 ret = patch_text(ip, new_insns, RV_FENTRY_NBYTES); 884 mutex_unlock(&text_mutex); 885 cpus_read_unlock(); 886 887 return ret; 888 } 889 890 static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx) 891 { 892 int i; 893 894 for (i = 0; i < nr_arg_slots; i++) { 895 if (i < RV_MAX_REG_ARGS) { 896 emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx); 897 } else { 898 /* skip slots for T0 and FP of traced function */ 899 emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx); 900 emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx); 901 } 902 args_off -= 8; 903 } 904 } 905 906 static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx) 907 { 908 int i; 909 910 for (i = 0; i < nr_reg_args; i++) { 911 emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx); 912 args_off -= 8; 913 } 914 } 915 916 static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off, 917 struct rv_jit_context *ctx) 918 { 919 int i; 920 921 for (i = 0; i < nr_stack_args; i++) { 922 emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx); 923 emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx); 924 args_off -= 8; 925 stk_arg_off -= 8; 926 } 927 } 928 929 static void emit_store_stack_imm64(u8 reg, int stack_off, u64 imm64, 930 struct rv_jit_context *ctx) 931 { 932 /* Load imm64 into reg and store it at [FP + stack_off]. */ 933 emit_imm(reg, (s64)imm64, ctx); 934 emit_sd(RV_REG_FP, stack_off, reg, ctx); 935 } 936 937 static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off, 938 int run_ctx_off, bool save_ret, struct rv_jit_context *ctx) 939 { 940 int ret, branch_off; 941 struct bpf_prog *p = l->link.prog; 942 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 943 944 if (l->cookie) 945 emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, l->cookie, ctx); 946 else 947 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx); 948 949 /* arg1: prog */ 950 emit_imm(RV_REG_A0, (const s64)p, ctx); 951 /* arg2: &run_ctx */ 952 emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx); 953 ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx); 954 if (ret) 955 return ret; 956 957 /* store prog start time */ 958 emit_mv(RV_REG_S1, RV_REG_A0, ctx); 959 960 /* if (__bpf_prog_enter(prog) == 0) 961 * goto skip_exec_of_prog; 962 */ 963 branch_off = ctx->ninsns; 964 /* nop reserved for conditional jump */ 965 emit(rv_nop(), ctx); 966 967 /* arg1: &args_off */ 968 emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); 969 if (!p->jited) 970 /* arg2: progs[i]->insnsi for interpreter */ 971 emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx); 972 ret = emit_call((const u64)p->bpf_func, true, ctx); 973 if (ret) 974 return ret; 975 976 if (save_ret) { 977 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); 978 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); 979 } 980 981 /* update branch with beqz */ 982 if (ctx->insns) { 983 int offset = ninsns_rvoff(ctx->ninsns - branch_off); 984 u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1); 985 *(u32 *)(ctx->insns + branch_off) = insn; 986 } 987 988 /* arg1: prog */ 989 emit_imm(RV_REG_A0, (const s64)p, ctx); 990 /* arg2: prog start time */ 991 emit_mv(RV_REG_A1, RV_REG_S1, ctx); 992 /* arg3: &run_ctx */ 993 emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx); 994 ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx); 995 996 return ret; 997 } 998 999 static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off, 1000 int run_ctx_off, int func_meta_off, bool save_ret, u64 func_meta, 1001 int cookie_off, struct rv_jit_context *ctx) 1002 { 1003 int i, cur_cookie = (cookie_off - args_off) / 8; 1004 1005 for (i = 0; i < tl->nr_links; i++) { 1006 int err; 1007 1008 if (bpf_prog_calls_session_cookie(tl->links[i])) { 1009 u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT); 1010 1011 emit_store_stack_imm64(RV_REG_T1, -func_meta_off, meta, ctx); 1012 cur_cookie--; 1013 } 1014 err = invoke_bpf_prog(tl->links[i], args_off, retval_off, run_ctx_off, 1015 save_ret, ctx); 1016 if (err) 1017 return err; 1018 } 1019 return 0; 1020 } 1021 1022 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, 1023 const struct btf_func_model *m, 1024 struct bpf_tramp_links *tlinks, 1025 void *func_addr, u32 flags, 1026 struct rv_jit_context *ctx) 1027 { 1028 int i, ret, offset; 1029 int *branches_off = NULL; 1030 int stack_size = 0, nr_arg_slots = 0; 1031 int retval_off, args_off, func_meta_off, ip_off, run_ctx_off, sreg_off, stk_arg_off; 1032 int cookie_off, cookie_cnt; 1033 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 1034 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 1035 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 1036 bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; 1037 void *orig_call = func_addr; 1038 bool save_ret; 1039 u64 func_meta; 1040 u32 insn; 1041 1042 /* Two types of generated trampoline stack layout: 1043 * 1044 * 1. trampoline called from function entry 1045 * -------------------------------------- 1046 * FP + 8 [ RA to parent func ] return address to parent 1047 * function 1048 * FP + 0 [ FP of parent func ] frame pointer of parent 1049 * function 1050 * FP - 8 [ T0 to traced func ] return address of traced 1051 * function 1052 * FP - 16 [ FP of traced func ] frame pointer of traced 1053 * function 1054 * -------------------------------------- 1055 * 1056 * 2. trampoline called directly 1057 * -------------------------------------- 1058 * FP - 8 [ RA to caller func ] return address to caller 1059 * function 1060 * FP - 16 [ FP of caller func ] frame pointer of caller 1061 * function 1062 * -------------------------------------- 1063 * 1064 * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 1065 * BPF_TRAMP_F_RET_FENTRY_RET 1066 * [ argN ] 1067 * [ ... ] 1068 * FP - args_off [ arg1 ] 1069 * 1070 * FP - func_meta_off [ regs count, etc ] 1071 * 1072 * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG 1073 * 1074 * [ stack cookie N ] 1075 * [ ... ] 1076 * FP - cookie_off [ stack cookie 1 ] 1077 * 1078 * FP - run_ctx_off [ bpf_tramp_run_ctx ] 1079 * 1080 * FP - sreg_off [ callee saved reg ] 1081 * 1082 * [ pads ] pads for 16 bytes alignment 1083 * 1084 * [ stack_argN ] 1085 * [ ... ] 1086 * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG 1087 */ 1088 1089 if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) 1090 return -ENOTSUPP; 1091 1092 if (m->nr_args > MAX_BPF_FUNC_ARGS) 1093 return -ENOTSUPP; 1094 1095 for (i = 0; i < m->nr_args; i++) 1096 nr_arg_slots += round_up(m->arg_size[i], 8) / 8; 1097 1098 /* room of trampoline frame to store return address and frame pointer */ 1099 stack_size += 16; 1100 1101 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 1102 if (save_ret) 1103 stack_size += 16; /* Save both A5 (BPF R0) and A0 */ 1104 retval_off = stack_size; 1105 1106 stack_size += nr_arg_slots * 8; 1107 args_off = stack_size; 1108 1109 /* function metadata, such as regs count */ 1110 stack_size += 8; 1111 func_meta_off = stack_size; 1112 1113 if (flags & BPF_TRAMP_F_IP_ARG) { 1114 stack_size += 8; 1115 ip_off = stack_size; 1116 } 1117 1118 cookie_cnt = bpf_fsession_cookie_cnt(tlinks); 1119 /* room for session cookies */ 1120 stack_size += cookie_cnt * 8; 1121 cookie_off = stack_size; 1122 1123 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 1124 run_ctx_off = stack_size; 1125 1126 stack_size += 8; 1127 sreg_off = stack_size; 1128 1129 if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0)) 1130 stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8; 1131 1132 stack_size = round_up(stack_size, STACK_ALIGN); 1133 1134 /* room for args on stack must be at the top of stack */ 1135 stk_arg_off = stack_size; 1136 1137 if (!is_struct_ops) { 1138 /* For the trampoline called from function entry, 1139 * the frame of traced function and the frame of 1140 * trampoline need to be considered. 1141 */ 1142 emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx); 1143 emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx); 1144 emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx); 1145 emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx); 1146 1147 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); 1148 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx); 1149 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); 1150 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); 1151 } else { 1152 /* emit kcfi hash */ 1153 emit_kcfi(cfi_get_func_hash(func_addr), ctx); 1154 /* For the trampoline called directly, just handle 1155 * the frame of trampoline. 1156 */ 1157 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); 1158 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx); 1159 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); 1160 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); 1161 } 1162 1163 /* callee saved register S1 to pass start time */ 1164 emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx); 1165 1166 /* store ip address of the traced function */ 1167 if (flags & BPF_TRAMP_F_IP_ARG) 1168 emit_store_stack_imm64(RV_REG_T1, -ip_off, (u64)func_addr, ctx); 1169 1170 func_meta = nr_arg_slots; 1171 emit_store_stack_imm64(RV_REG_T1, -func_meta_off, func_meta, ctx); 1172 1173 store_args(nr_arg_slots, args_off, ctx); 1174 1175 if (bpf_fsession_cnt(tlinks)) { 1176 /* clear all session cookies' value */ 1177 for (i = 0; i < cookie_cnt; i++) 1178 emit_sd(RV_REG_FP, -cookie_off + 8 * i, RV_REG_ZERO, ctx); 1179 /* clear return value to make sure fentry always get 0 */ 1180 emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx); 1181 } 1182 1183 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1184 emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx); 1185 ret = emit_call((const u64)__bpf_tramp_enter, true, ctx); 1186 if (ret) 1187 return ret; 1188 } 1189 1190 if (fentry->nr_links) { 1191 ret = invoke_bpf(fentry, args_off, retval_off, run_ctx_off, func_meta_off, 1192 flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off, ctx); 1193 if (ret) 1194 return ret; 1195 } 1196 1197 if (fmod_ret->nr_links) { 1198 branches_off = kzalloc_objs(int, fmod_ret->nr_links); 1199 if (!branches_off) 1200 return -ENOMEM; 1201 1202 /* cleanup to avoid garbage return value confusion */ 1203 emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx); 1204 for (i = 0; i < fmod_ret->nr_links; i++) { 1205 ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off, 1206 run_ctx_off, true, ctx); 1207 if (ret) 1208 goto out; 1209 emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx); 1210 branches_off[i] = ctx->ninsns; 1211 /* nop reserved for conditional jump */ 1212 emit(rv_nop(), ctx); 1213 } 1214 } 1215 1216 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1217 /* skip to actual body of traced function */ 1218 orig_call += RV_FENTRY_NINSNS * 4; 1219 restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx); 1220 restore_stack_args(nr_arg_slots - RV_MAX_REG_ARGS, args_off, stk_arg_off, ctx); 1221 ret = emit_call((const u64)orig_call, true, ctx); 1222 if (ret) 1223 goto out; 1224 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); 1225 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); 1226 im->ip_after_call = ctx->ro_insns + ctx->ninsns; 1227 /* 2 nops reserved for auipc+jalr pair */ 1228 emit(rv_nop(), ctx); 1229 emit(rv_nop(), ctx); 1230 } 1231 1232 /* update branches saved in invoke_bpf_mod_ret with bnez */ 1233 for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) { 1234 offset = ninsns_rvoff(ctx->ninsns - branches_off[i]); 1235 insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1); 1236 *(u32 *)(ctx->insns + branches_off[i]) = insn; 1237 } 1238 1239 /* set "is_return" flag for fsession */ 1240 func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT); 1241 if (bpf_fsession_cnt(tlinks)) 1242 emit_store_stack_imm64(RV_REG_T1, -func_meta_off, func_meta, ctx); 1243 1244 if (fexit->nr_links) { 1245 ret = invoke_bpf(fexit, args_off, retval_off, run_ctx_off, func_meta_off, 1246 false, func_meta, cookie_off, ctx); 1247 if (ret) 1248 goto out; 1249 } 1250 1251 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1252 im->ip_epilogue = ctx->ro_insns + ctx->ninsns; 1253 emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx); 1254 ret = emit_call((const u64)__bpf_tramp_exit, true, ctx); 1255 if (ret) 1256 goto out; 1257 } 1258 1259 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1260 restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx); 1261 1262 if (save_ret) { 1263 emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx); 1264 if (is_struct_ops) { 1265 ret = sign_extend(RV_REG_A0, regmap[BPF_REG_0], m->ret_size, 1266 m->ret_flags & BTF_FMODEL_SIGNED_ARG, ctx); 1267 if (ret) 1268 goto out; 1269 } else { 1270 emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); 1271 } 1272 } 1273 1274 emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); 1275 1276 if (!is_struct_ops) { 1277 /* trampoline called from function entry */ 1278 emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx); 1279 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); 1280 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); 1281 1282 emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx); 1283 emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx); 1284 emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx); 1285 1286 if (flags & BPF_TRAMP_F_SKIP_FRAME) 1287 /* return to parent function */ 1288 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); 1289 else 1290 /* return to traced function */ 1291 emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx); 1292 } else { 1293 /* trampoline called directly */ 1294 emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx); 1295 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); 1296 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); 1297 1298 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); 1299 } 1300 1301 ret = ctx->ninsns; 1302 out: 1303 kfree(branches_off); 1304 return ret; 1305 } 1306 1307 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 1308 struct bpf_tramp_links *tlinks, void *func_addr) 1309 { 1310 struct bpf_tramp_image im; 1311 struct rv_jit_context ctx; 1312 int ret; 1313 1314 ctx.ninsns = 0; 1315 ctx.insns = NULL; 1316 ctx.ro_insns = NULL; 1317 ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx); 1318 1319 return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns); 1320 } 1321 1322 void *arch_alloc_bpf_trampoline(unsigned int size) 1323 { 1324 return bpf_prog_pack_alloc(size, bpf_fill_ill_insns); 1325 } 1326 1327 void arch_free_bpf_trampoline(void *image, unsigned int size) 1328 { 1329 bpf_prog_pack_free(image, size); 1330 } 1331 1332 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, 1333 void *ro_image_end, const struct btf_func_model *m, 1334 u32 flags, struct bpf_tramp_links *tlinks, 1335 void *func_addr) 1336 { 1337 int ret; 1338 void *image, *res; 1339 struct rv_jit_context ctx; 1340 u32 size = ro_image_end - ro_image; 1341 1342 image = kvmalloc(size, GFP_KERNEL); 1343 if (!image) 1344 return -ENOMEM; 1345 1346 ctx.ninsns = 0; 1347 ctx.insns = image; 1348 ctx.ro_insns = ro_image; 1349 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); 1350 if (ret < 0) 1351 goto out; 1352 1353 if (WARN_ON(size < ninsns_rvoff(ctx.ninsns))) { 1354 ret = -E2BIG; 1355 goto out; 1356 } 1357 1358 res = bpf_arch_text_copy(ro_image, image, size); 1359 if (IS_ERR(res)) { 1360 ret = PTR_ERR(res); 1361 goto out; 1362 } 1363 1364 out: 1365 kvfree(image); 1366 return ret < 0 ? ret : size; 1367 } 1368 1369 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 1370 bool extra_pass) 1371 { 1372 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 1373 BPF_CLASS(insn->code) == BPF_JMP; 1374 int s, e, rvoff, ret, i = insn - ctx->prog->insnsi; 1375 struct bpf_prog_aux *aux = ctx->prog->aux; 1376 u8 rd = -1, rs = -1, code = insn->code; 1377 s16 off = insn->off; 1378 s32 imm = insn->imm; 1379 1380 init_regs(&rd, &rs, insn, ctx); 1381 1382 switch (code) { 1383 /* dst = src */ 1384 case BPF_ALU | BPF_MOV | BPF_X: 1385 case BPF_ALU64 | BPF_MOV | BPF_X: 1386 if (insn_is_cast_user(insn)) { 1387 emit_mv(RV_REG_T1, rs, ctx); 1388 emit_zextw(RV_REG_T1, RV_REG_T1, ctx); 1389 emit_imm(rd, (ctx->user_vm_start >> 32) << 32, ctx); 1390 emit(rv_beq(RV_REG_T1, RV_REG_ZERO, 4), ctx); 1391 emit_or(RV_REG_T1, rd, RV_REG_T1, ctx); 1392 emit_mv(rd, RV_REG_T1, ctx); 1393 break; 1394 } else if (insn_is_mov_percpu_addr(insn)) { 1395 if (rd != rs) 1396 emit_mv(rd, rs, ctx); 1397 #ifdef CONFIG_SMP 1398 /* Load current CPU number in T1 */ 1399 emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu), 1400 RV_REG_TP, ctx); 1401 /* Load address of __per_cpu_offset array in T2 */ 1402 emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); 1403 /* Get address of __per_cpu_offset[cpu] in T1 */ 1404 emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx); 1405 /* Load __per_cpu_offset[cpu] in T1 */ 1406 emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); 1407 /* Add the offset to Rd */ 1408 emit_add(rd, rd, RV_REG_T1, ctx); 1409 #endif 1410 } 1411 if (imm == 1) { 1412 /* Special mov32 for zext */ 1413 emit_zextw(rd, rd, ctx); 1414 break; 1415 } 1416 switch (insn->off) { 1417 case 0: 1418 emit_mv(rd, rs, ctx); 1419 break; 1420 case 8: 1421 emit_sextb(rd, rs, ctx); 1422 break; 1423 case 16: 1424 emit_sexth(rd, rs, ctx); 1425 break; 1426 case 32: 1427 emit_sextw(rd, rs, ctx); 1428 break; 1429 } 1430 if (!is64 && !aux->verifier_zext) 1431 emit_zextw(rd, rd, ctx); 1432 break; 1433 1434 /* dst = dst OP src */ 1435 case BPF_ALU | BPF_ADD | BPF_X: 1436 case BPF_ALU64 | BPF_ADD | BPF_X: 1437 emit_add(rd, rd, rs, ctx); 1438 if (!is64 && !aux->verifier_zext) 1439 emit_zextw(rd, rd, ctx); 1440 break; 1441 case BPF_ALU | BPF_SUB | BPF_X: 1442 case BPF_ALU64 | BPF_SUB | BPF_X: 1443 if (is64) 1444 emit_sub(rd, rd, rs, ctx); 1445 else 1446 emit_subw(rd, rd, rs, ctx); 1447 1448 if (!is64 && !aux->verifier_zext) 1449 emit_zextw(rd, rd, ctx); 1450 break; 1451 case BPF_ALU | BPF_AND | BPF_X: 1452 case BPF_ALU64 | BPF_AND | BPF_X: 1453 emit_and(rd, rd, rs, ctx); 1454 if (!is64 && !aux->verifier_zext) 1455 emit_zextw(rd, rd, ctx); 1456 break; 1457 case BPF_ALU | BPF_OR | BPF_X: 1458 case BPF_ALU64 | BPF_OR | BPF_X: 1459 emit_or(rd, rd, rs, ctx); 1460 if (!is64 && !aux->verifier_zext) 1461 emit_zextw(rd, rd, ctx); 1462 break; 1463 case BPF_ALU | BPF_XOR | BPF_X: 1464 case BPF_ALU64 | BPF_XOR | BPF_X: 1465 emit_xor(rd, rd, rs, ctx); 1466 if (!is64 && !aux->verifier_zext) 1467 emit_zextw(rd, rd, ctx); 1468 break; 1469 case BPF_ALU | BPF_MUL | BPF_X: 1470 case BPF_ALU64 | BPF_MUL | BPF_X: 1471 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); 1472 if (!is64 && !aux->verifier_zext) 1473 emit_zextw(rd, rd, ctx); 1474 break; 1475 case BPF_ALU | BPF_DIV | BPF_X: 1476 case BPF_ALU64 | BPF_DIV | BPF_X: 1477 if (off) 1478 emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx); 1479 else 1480 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 1481 if (!is64 && !aux->verifier_zext) 1482 emit_zextw(rd, rd, ctx); 1483 break; 1484 case BPF_ALU | BPF_MOD | BPF_X: 1485 case BPF_ALU64 | BPF_MOD | BPF_X: 1486 if (off) 1487 emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx); 1488 else 1489 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 1490 if (!is64 && !aux->verifier_zext) 1491 emit_zextw(rd, rd, ctx); 1492 break; 1493 case BPF_ALU | BPF_LSH | BPF_X: 1494 case BPF_ALU64 | BPF_LSH | BPF_X: 1495 emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); 1496 if (!is64 && !aux->verifier_zext) 1497 emit_zextw(rd, rd, ctx); 1498 break; 1499 case BPF_ALU | BPF_RSH | BPF_X: 1500 case BPF_ALU64 | BPF_RSH | BPF_X: 1501 emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); 1502 if (!is64 && !aux->verifier_zext) 1503 emit_zextw(rd, rd, ctx); 1504 break; 1505 case BPF_ALU | BPF_ARSH | BPF_X: 1506 case BPF_ALU64 | BPF_ARSH | BPF_X: 1507 emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); 1508 if (!is64 && !aux->verifier_zext) 1509 emit_zextw(rd, rd, ctx); 1510 break; 1511 1512 /* dst = -dst */ 1513 case BPF_ALU | BPF_NEG: 1514 case BPF_ALU64 | BPF_NEG: 1515 emit_sub(rd, RV_REG_ZERO, rd, ctx); 1516 if (!is64 && !aux->verifier_zext) 1517 emit_zextw(rd, rd, ctx); 1518 break; 1519 1520 /* dst = BSWAP##imm(dst) */ 1521 case BPF_ALU | BPF_END | BPF_FROM_LE: 1522 switch (imm) { 1523 case 16: 1524 emit_zexth(rd, rd, ctx); 1525 break; 1526 case 32: 1527 if (!aux->verifier_zext) 1528 emit_zextw(rd, rd, ctx); 1529 break; 1530 case 64: 1531 /* Do nothing */ 1532 break; 1533 } 1534 break; 1535 case BPF_ALU | BPF_END | BPF_FROM_BE: 1536 case BPF_ALU64 | BPF_END | BPF_FROM_LE: 1537 emit_bswap(rd, imm, ctx); 1538 break; 1539 1540 /* dst = imm */ 1541 case BPF_ALU | BPF_MOV | BPF_K: 1542 case BPF_ALU64 | BPF_MOV | BPF_K: 1543 emit_imm(rd, imm, ctx); 1544 if (!is64 && !aux->verifier_zext) 1545 emit_zextw(rd, rd, ctx); 1546 break; 1547 1548 /* dst = dst OP imm */ 1549 case BPF_ALU | BPF_ADD | BPF_K: 1550 case BPF_ALU64 | BPF_ADD | BPF_K: 1551 if (is_12b_int(imm)) { 1552 emit_addi(rd, rd, imm, ctx); 1553 } else { 1554 emit_imm(RV_REG_T1, imm, ctx); 1555 emit_add(rd, rd, RV_REG_T1, ctx); 1556 } 1557 if (!is64 && !aux->verifier_zext) 1558 emit_zextw(rd, rd, ctx); 1559 break; 1560 case BPF_ALU | BPF_SUB | BPF_K: 1561 case BPF_ALU64 | BPF_SUB | BPF_K: 1562 if (is_12b_int(-imm)) { 1563 emit_addi(rd, rd, -imm, ctx); 1564 } else { 1565 emit_imm(RV_REG_T1, imm, ctx); 1566 emit_sub(rd, rd, RV_REG_T1, ctx); 1567 } 1568 if (!is64 && !aux->verifier_zext) 1569 emit_zextw(rd, rd, ctx); 1570 break; 1571 case BPF_ALU | BPF_AND | BPF_K: 1572 case BPF_ALU64 | BPF_AND | BPF_K: 1573 if (is_12b_int(imm)) { 1574 emit_andi(rd, rd, imm, ctx); 1575 } else { 1576 emit_imm(RV_REG_T1, imm, ctx); 1577 emit_and(rd, rd, RV_REG_T1, ctx); 1578 } 1579 if (!is64 && !aux->verifier_zext) 1580 emit_zextw(rd, rd, ctx); 1581 break; 1582 case BPF_ALU | BPF_OR | BPF_K: 1583 case BPF_ALU64 | BPF_OR | BPF_K: 1584 if (is_12b_int(imm)) { 1585 emit(rv_ori(rd, rd, imm), ctx); 1586 } else { 1587 emit_imm(RV_REG_T1, imm, ctx); 1588 emit_or(rd, rd, RV_REG_T1, ctx); 1589 } 1590 if (!is64 && !aux->verifier_zext) 1591 emit_zextw(rd, rd, ctx); 1592 break; 1593 case BPF_ALU | BPF_XOR | BPF_K: 1594 case BPF_ALU64 | BPF_XOR | BPF_K: 1595 if (is_12b_int(imm)) { 1596 emit(rv_xori(rd, rd, imm), ctx); 1597 } else { 1598 emit_imm(RV_REG_T1, imm, ctx); 1599 emit_xor(rd, rd, RV_REG_T1, ctx); 1600 } 1601 if (!is64 && !aux->verifier_zext) 1602 emit_zextw(rd, rd, ctx); 1603 break; 1604 case BPF_ALU | BPF_MUL | BPF_K: 1605 case BPF_ALU64 | BPF_MUL | BPF_K: 1606 emit_imm(RV_REG_T1, imm, ctx); 1607 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : 1608 rv_mulw(rd, rd, RV_REG_T1), ctx); 1609 if (!is64 && !aux->verifier_zext) 1610 emit_zextw(rd, rd, ctx); 1611 break; 1612 case BPF_ALU | BPF_DIV | BPF_K: 1613 case BPF_ALU64 | BPF_DIV | BPF_K: 1614 emit_imm(RV_REG_T1, imm, ctx); 1615 if (off) 1616 emit(is64 ? rv_div(rd, rd, RV_REG_T1) : 1617 rv_divw(rd, rd, RV_REG_T1), ctx); 1618 else 1619 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 1620 rv_divuw(rd, rd, RV_REG_T1), ctx); 1621 if (!is64 && !aux->verifier_zext) 1622 emit_zextw(rd, rd, ctx); 1623 break; 1624 case BPF_ALU | BPF_MOD | BPF_K: 1625 case BPF_ALU64 | BPF_MOD | BPF_K: 1626 emit_imm(RV_REG_T1, imm, ctx); 1627 if (off) 1628 emit(is64 ? rv_rem(rd, rd, RV_REG_T1) : 1629 rv_remw(rd, rd, RV_REG_T1), ctx); 1630 else 1631 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 1632 rv_remuw(rd, rd, RV_REG_T1), ctx); 1633 if (!is64 && !aux->verifier_zext) 1634 emit_zextw(rd, rd, ctx); 1635 break; 1636 case BPF_ALU | BPF_LSH | BPF_K: 1637 case BPF_ALU64 | BPF_LSH | BPF_K: 1638 emit_slli(rd, rd, imm, ctx); 1639 1640 if (!is64 && !aux->verifier_zext) 1641 emit_zextw(rd, rd, ctx); 1642 break; 1643 case BPF_ALU | BPF_RSH | BPF_K: 1644 case BPF_ALU64 | BPF_RSH | BPF_K: 1645 if (is64) 1646 emit_srli(rd, rd, imm, ctx); 1647 else 1648 emit(rv_srliw(rd, rd, imm), ctx); 1649 1650 if (!is64 && !aux->verifier_zext) 1651 emit_zextw(rd, rd, ctx); 1652 break; 1653 case BPF_ALU | BPF_ARSH | BPF_K: 1654 case BPF_ALU64 | BPF_ARSH | BPF_K: 1655 if (is64) 1656 emit_srai(rd, rd, imm, ctx); 1657 else 1658 emit(rv_sraiw(rd, rd, imm), ctx); 1659 1660 if (!is64 && !aux->verifier_zext) 1661 emit_zextw(rd, rd, ctx); 1662 break; 1663 1664 /* JUMP off */ 1665 case BPF_JMP | BPF_JA: 1666 case BPF_JMP32 | BPF_JA: 1667 if (BPF_CLASS(code) == BPF_JMP) 1668 rvoff = rv_offset(i, off, ctx); 1669 else 1670 rvoff = rv_offset(i, imm, ctx); 1671 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 1672 if (ret) 1673 return ret; 1674 break; 1675 1676 /* IF (dst COND src) JUMP off */ 1677 case BPF_JMP | BPF_JEQ | BPF_X: 1678 case BPF_JMP32 | BPF_JEQ | BPF_X: 1679 case BPF_JMP | BPF_JGT | BPF_X: 1680 case BPF_JMP32 | BPF_JGT | BPF_X: 1681 case BPF_JMP | BPF_JLT | BPF_X: 1682 case BPF_JMP32 | BPF_JLT | BPF_X: 1683 case BPF_JMP | BPF_JGE | BPF_X: 1684 case BPF_JMP32 | BPF_JGE | BPF_X: 1685 case BPF_JMP | BPF_JLE | BPF_X: 1686 case BPF_JMP32 | BPF_JLE | BPF_X: 1687 case BPF_JMP | BPF_JNE | BPF_X: 1688 case BPF_JMP32 | BPF_JNE | BPF_X: 1689 case BPF_JMP | BPF_JSGT | BPF_X: 1690 case BPF_JMP32 | BPF_JSGT | BPF_X: 1691 case BPF_JMP | BPF_JSLT | BPF_X: 1692 case BPF_JMP32 | BPF_JSLT | BPF_X: 1693 case BPF_JMP | BPF_JSGE | BPF_X: 1694 case BPF_JMP32 | BPF_JSGE | BPF_X: 1695 case BPF_JMP | BPF_JSLE | BPF_X: 1696 case BPF_JMP32 | BPF_JSLE | BPF_X: 1697 case BPF_JMP | BPF_JSET | BPF_X: 1698 case BPF_JMP32 | BPF_JSET | BPF_X: 1699 rvoff = rv_offset(i, off, ctx); 1700 if (!is64) { 1701 s = ctx->ninsns; 1702 if (is_signed_bpf_cond(BPF_OP(code))) { 1703 emit_sextw_alt(&rs, RV_REG_T1, ctx); 1704 emit_sextw_alt(&rd, RV_REG_T2, ctx); 1705 } else { 1706 emit_zextw_alt(&rs, RV_REG_T1, ctx); 1707 emit_zextw_alt(&rd, RV_REG_T2, ctx); 1708 } 1709 e = ctx->ninsns; 1710 1711 /* Adjust for extra insns */ 1712 rvoff -= ninsns_rvoff(e - s); 1713 } 1714 1715 if (BPF_OP(code) == BPF_JSET) { 1716 /* Adjust for and */ 1717 rvoff -= 4; 1718 emit_and(RV_REG_T1, rd, rs, ctx); 1719 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 1720 } else { 1721 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 1722 } 1723 break; 1724 1725 /* IF (dst COND imm) JUMP off */ 1726 case BPF_JMP | BPF_JEQ | BPF_K: 1727 case BPF_JMP32 | BPF_JEQ | BPF_K: 1728 case BPF_JMP | BPF_JGT | BPF_K: 1729 case BPF_JMP32 | BPF_JGT | BPF_K: 1730 case BPF_JMP | BPF_JLT | BPF_K: 1731 case BPF_JMP32 | BPF_JLT | BPF_K: 1732 case BPF_JMP | BPF_JGE | BPF_K: 1733 case BPF_JMP32 | BPF_JGE | BPF_K: 1734 case BPF_JMP | BPF_JLE | BPF_K: 1735 case BPF_JMP32 | BPF_JLE | BPF_K: 1736 case BPF_JMP | BPF_JNE | BPF_K: 1737 case BPF_JMP32 | BPF_JNE | BPF_K: 1738 case BPF_JMP | BPF_JSGT | BPF_K: 1739 case BPF_JMP32 | BPF_JSGT | BPF_K: 1740 case BPF_JMP | BPF_JSLT | BPF_K: 1741 case BPF_JMP32 | BPF_JSLT | BPF_K: 1742 case BPF_JMP | BPF_JSGE | BPF_K: 1743 case BPF_JMP32 | BPF_JSGE | BPF_K: 1744 case BPF_JMP | BPF_JSLE | BPF_K: 1745 case BPF_JMP32 | BPF_JSLE | BPF_K: 1746 rvoff = rv_offset(i, off, ctx); 1747 s = ctx->ninsns; 1748 if (imm) 1749 emit_imm(RV_REG_T1, imm, ctx); 1750 rs = imm ? RV_REG_T1 : RV_REG_ZERO; 1751 if (!is64) { 1752 if (is_signed_bpf_cond(BPF_OP(code))) { 1753 emit_sextw_alt(&rd, RV_REG_T2, ctx); 1754 /* rs has been sign extended */ 1755 } else { 1756 emit_zextw_alt(&rd, RV_REG_T2, ctx); 1757 if (imm) 1758 emit_zextw(rs, rs, ctx); 1759 } 1760 } 1761 e = ctx->ninsns; 1762 1763 /* Adjust for extra insns */ 1764 rvoff -= ninsns_rvoff(e - s); 1765 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 1766 break; 1767 1768 case BPF_JMP | BPF_JSET | BPF_K: 1769 case BPF_JMP32 | BPF_JSET | BPF_K: 1770 rvoff = rv_offset(i, off, ctx); 1771 s = ctx->ninsns; 1772 if (is_12b_int(imm)) { 1773 emit_andi(RV_REG_T1, rd, imm, ctx); 1774 } else { 1775 emit_imm(RV_REG_T1, imm, ctx); 1776 emit_and(RV_REG_T1, rd, RV_REG_T1, ctx); 1777 } 1778 /* For jset32, we should clear the upper 32 bits of t1, but 1779 * sign-extension is sufficient here and saves one instruction, 1780 * as t1 is used only in comparison against zero. 1781 */ 1782 if (!is64 && imm < 0) 1783 emit_sextw(RV_REG_T1, RV_REG_T1, ctx); 1784 e = ctx->ninsns; 1785 rvoff -= ninsns_rvoff(e - s); 1786 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 1787 break; 1788 1789 /* function call */ 1790 case BPF_JMP | BPF_CALL: 1791 { 1792 bool fixed_addr; 1793 u64 addr; 1794 1795 /* Inline calls to bpf_get_smp_processor_id() 1796 * 1797 * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is 1798 * at offset 0 in task_struct. 1799 * Load cpu from thread_info: 1800 * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu 1801 * 1802 * This replicates the implementation of raw_smp_processor_id() on RISCV 1803 */ 1804 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { 1805 /* Load current CPU number in R0 */ 1806 emit_lw(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), 1807 RV_REG_TP, ctx); 1808 break; 1809 } 1810 1811 mark_call(ctx); 1812 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1813 &addr, &fixed_addr); 1814 if (ret < 0) 1815 return ret; 1816 1817 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 1818 const struct btf_func_model *fm; 1819 int idx; 1820 1821 fm = bpf_jit_find_kfunc_model(ctx->prog, insn); 1822 if (!fm) 1823 return -EINVAL; 1824 1825 for (idx = 0; idx < fm->nr_args; idx++) { 1826 u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); 1827 1828 if (fm->arg_size[idx] == sizeof(int)) 1829 emit_sextw(reg, reg, ctx); 1830 } 1831 } 1832 1833 ret = emit_call(addr, fixed_addr, ctx); 1834 if (ret) 1835 return ret; 1836 1837 if (insn->src_reg != BPF_PSEUDO_CALL) 1838 emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); 1839 break; 1840 } 1841 /* tail call */ 1842 case BPF_JMP | BPF_TAIL_CALL: 1843 if (emit_bpf_tail_call(i, ctx)) 1844 return -1; 1845 break; 1846 1847 /* function return */ 1848 case BPF_JMP | BPF_EXIT: 1849 if (i == ctx->prog->len - 1) 1850 break; 1851 1852 rvoff = epilogue_offset(ctx); 1853 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 1854 if (ret) 1855 return ret; 1856 break; 1857 1858 /* dst = imm64 */ 1859 case BPF_LD | BPF_IMM | BPF_DW: 1860 { 1861 struct bpf_insn insn1 = insn[1]; 1862 u64 imm64; 1863 1864 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1865 if (bpf_pseudo_func(insn)) { 1866 /* fixed-length insns for extra jit pass */ 1867 ret = emit_addr(rd, imm64, extra_pass, ctx); 1868 if (ret) 1869 return ret; 1870 } else { 1871 emit_imm(rd, imm64, ctx); 1872 } 1873 1874 return 1; 1875 } 1876 1877 /* LDX: dst = *(unsigned size *)(src + off) */ 1878 case BPF_LDX | BPF_MEM | BPF_B: 1879 case BPF_LDX | BPF_MEM | BPF_H: 1880 case BPF_LDX | BPF_MEM | BPF_W: 1881 case BPF_LDX | BPF_MEM | BPF_DW: 1882 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1883 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1884 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1885 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1886 /* LDSX: dst = *(signed size *)(src + off) */ 1887 case BPF_LDX | BPF_MEMSX | BPF_B: 1888 case BPF_LDX | BPF_MEMSX | BPF_H: 1889 case BPF_LDX | BPF_MEMSX | BPF_W: 1890 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1891 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1892 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1893 /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + RV_REG_ARENA + off) */ 1894 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1895 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1896 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1897 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1898 { 1899 bool sign_ext; 1900 1901 sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || 1902 BPF_MODE(insn->code) == BPF_PROBE_MEMSX; 1903 1904 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1905 emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx); 1906 rs = RV_REG_T2; 1907 } 1908 1909 emit_ldx(rd, off, rs, BPF_SIZE(code), sign_ext, ctx); 1910 1911 ret = add_exception_handler(insn, rd, ctx); 1912 if (ret) 1913 return ret; 1914 1915 if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) 1916 return 1; 1917 break; 1918 } 1919 1920 /* speculation barrier */ 1921 case BPF_ST | BPF_NOSPEC: 1922 break; 1923 1924 /* ST: *(size *)(dst + off) = imm */ 1925 case BPF_ST | BPF_MEM | BPF_B: 1926 case BPF_ST | BPF_MEM | BPF_H: 1927 case BPF_ST | BPF_MEM | BPF_W: 1928 case BPF_ST | BPF_MEM | BPF_DW: 1929 /* ST | PROBE_MEM32: *(size *)(dst + RV_REG_ARENA + off) = imm */ 1930 case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1931 case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1932 case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1933 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1934 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1935 emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx); 1936 rd = RV_REG_T3; 1937 } 1938 1939 emit_st(rd, off, imm, BPF_SIZE(code), ctx); 1940 1941 ret = add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx); 1942 if (ret) 1943 return ret; 1944 break; 1945 1946 /* STX: *(size *)(dst + off) = src */ 1947 case BPF_STX | BPF_MEM | BPF_B: 1948 case BPF_STX | BPF_MEM | BPF_H: 1949 case BPF_STX | BPF_MEM | BPF_W: 1950 case BPF_STX | BPF_MEM | BPF_DW: 1951 /* STX | PROBE_MEM32: *(size *)(dst + RV_REG_ARENA + off) = src */ 1952 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 1953 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 1954 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 1955 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 1956 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1957 emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx); 1958 rd = RV_REG_T2; 1959 } 1960 1961 emit_stx(rd, off, rs, BPF_SIZE(code), ctx); 1962 1963 ret = add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx); 1964 if (ret) 1965 return ret; 1966 break; 1967 1968 /* Atomics */ 1969 case BPF_STX | BPF_ATOMIC | BPF_B: 1970 case BPF_STX | BPF_ATOMIC | BPF_H: 1971 case BPF_STX | BPF_ATOMIC | BPF_W: 1972 case BPF_STX | BPF_ATOMIC | BPF_DW: 1973 case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: 1974 case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: 1975 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: 1976 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: 1977 if (bpf_atomic_is_load_store(insn)) 1978 ret = emit_atomic_ld_st(rd, rs, insn, ctx); 1979 else 1980 ret = emit_atomic_rmw(rd, rs, insn, ctx); 1981 1982 ret = ret ?: add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx); 1983 if (ret) 1984 return ret; 1985 break; 1986 1987 default: 1988 pr_err("bpf-jit: unknown opcode %02x\n", code); 1989 return -EINVAL; 1990 } 1991 1992 return 0; 1993 } 1994 1995 void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) 1996 { 1997 int i, stack_adjust = 0, store_offset, bpf_stack_adjust; 1998 1999 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, STACK_ALIGN); 2000 if (bpf_stack_adjust) 2001 mark_fp(ctx); 2002 2003 if (seen_reg(RV_REG_RA, ctx)) 2004 stack_adjust += 8; 2005 stack_adjust += 8; /* RV_REG_FP */ 2006 if (seen_reg(RV_REG_S1, ctx)) 2007 stack_adjust += 8; 2008 if (seen_reg(RV_REG_S2, ctx)) 2009 stack_adjust += 8; 2010 if (seen_reg(RV_REG_S3, ctx)) 2011 stack_adjust += 8; 2012 if (seen_reg(RV_REG_S4, ctx)) 2013 stack_adjust += 8; 2014 if (seen_reg(RV_REG_S5, ctx)) 2015 stack_adjust += 8; 2016 if (seen_reg(RV_REG_S6, ctx)) 2017 stack_adjust += 8; 2018 if (ctx->arena_vm_start) 2019 stack_adjust += 8; 2020 2021 stack_adjust = round_up(stack_adjust, STACK_ALIGN); 2022 stack_adjust += bpf_stack_adjust; 2023 2024 store_offset = stack_adjust - 8; 2025 2026 /* emit kcfi type preamble immediately before the first insn */ 2027 emit_kcfi(is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash, ctx); 2028 2029 /* nops reserved for auipc+jalr pair */ 2030 for (i = 0; i < RV_FENTRY_NINSNS; i++) 2031 emit(rv_nop(), ctx); 2032 2033 /* First instruction is always setting the tail-call-counter 2034 * (TCC) register. This instruction is skipped for tail calls. 2035 * Force using a 4-byte (non-compressed) instruction. 2036 */ 2037 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); 2038 2039 emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx); 2040 2041 if (seen_reg(RV_REG_RA, ctx)) { 2042 emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx); 2043 store_offset -= 8; 2044 } 2045 emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx); 2046 store_offset -= 8; 2047 if (seen_reg(RV_REG_S1, ctx)) { 2048 emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx); 2049 store_offset -= 8; 2050 } 2051 if (seen_reg(RV_REG_S2, ctx)) { 2052 emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx); 2053 store_offset -= 8; 2054 } 2055 if (seen_reg(RV_REG_S3, ctx)) { 2056 emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx); 2057 store_offset -= 8; 2058 } 2059 if (seen_reg(RV_REG_S4, ctx)) { 2060 emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx); 2061 store_offset -= 8; 2062 } 2063 if (seen_reg(RV_REG_S5, ctx)) { 2064 emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx); 2065 store_offset -= 8; 2066 } 2067 if (seen_reg(RV_REG_S6, ctx)) { 2068 emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); 2069 store_offset -= 8; 2070 } 2071 if (ctx->arena_vm_start) { 2072 emit_sd(RV_REG_SP, store_offset, RV_REG_ARENA, ctx); 2073 store_offset -= 8; 2074 } 2075 2076 emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); 2077 2078 if (bpf_stack_adjust) 2079 emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx); 2080 2081 /* Program contains calls and tail calls, so RV_REG_TCC need 2082 * to be saved across calls. 2083 */ 2084 if (seen_tail_call(ctx) && seen_call(ctx)) 2085 emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); 2086 2087 ctx->stack_size = stack_adjust; 2088 2089 if (ctx->arena_vm_start) 2090 emit_imm(RV_REG_ARENA, ctx->arena_vm_start, ctx); 2091 } 2092 2093 void bpf_jit_build_epilogue(struct rv_jit_context *ctx) 2094 { 2095 __build_epilogue(false, ctx); 2096 } 2097 2098 bool bpf_jit_supports_kfunc_call(void) 2099 { 2100 return true; 2101 } 2102 2103 bool bpf_jit_supports_ptr_xchg(void) 2104 { 2105 return true; 2106 } 2107 2108 bool bpf_jit_supports_arena(void) 2109 { 2110 return true; 2111 } 2112 2113 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) 2114 { 2115 if (in_arena) { 2116 switch (insn->code) { 2117 case BPF_STX | BPF_ATOMIC | BPF_W: 2118 case BPF_STX | BPF_ATOMIC | BPF_DW: 2119 if (insn->imm == BPF_CMPXCHG) 2120 return rv_ext_enabled(ZACAS); 2121 break; 2122 case BPF_LDX | BPF_MEMSX | BPF_B: 2123 case BPF_LDX | BPF_MEMSX | BPF_H: 2124 case BPF_LDX | BPF_MEMSX | BPF_W: 2125 return false; 2126 } 2127 } 2128 2129 return true; 2130 } 2131 2132 bool bpf_jit_supports_percpu_insn(void) 2133 { 2134 return true; 2135 } 2136 2137 bool bpf_jit_inlines_helper_call(s32 imm) 2138 { 2139 switch (imm) { 2140 case BPF_FUNC_get_smp_processor_id: 2141 return true; 2142 default: 2143 return false; 2144 } 2145 } 2146 2147 bool bpf_jit_supports_fsession(void) 2148 { 2149 return true; 2150 } 2151