1 // SPDX-License-Identifier: GPL-2.0 2 /* BPF JIT compiler for RV64G 3 * 4 * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com> 5 * 6 */ 7 8 #include <linux/bitfield.h> 9 #include <linux/bpf.h> 10 #include <linux/filter.h> 11 #include <linux/memory.h> 12 #include <linux/stop_machine.h> 13 #include <asm/text-patching.h> 14 #include <asm/cfi.h> 15 #include <asm/percpu.h> 16 #include "bpf_jit.h" 17 18 #define RV_MAX_REG_ARGS 8 19 #define RV_FENTRY_NINSNS 2 20 #define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4) 21 #define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI_CLANG) ? 1 : 0) 22 /* imm that allows emit_imm to emit max count insns */ 23 #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF 24 25 #define RV_REG_TCC RV_REG_A6 26 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ 27 #define RV_REG_ARENA RV_REG_S7 /* For storing arena_vm_start */ 28 29 static const int regmap[] = { 30 [BPF_REG_0] = RV_REG_A5, 31 [BPF_REG_1] = RV_REG_A0, 32 [BPF_REG_2] = RV_REG_A1, 33 [BPF_REG_3] = RV_REG_A2, 34 [BPF_REG_4] = RV_REG_A3, 35 [BPF_REG_5] = RV_REG_A4, 36 [BPF_REG_6] = RV_REG_S1, 37 [BPF_REG_7] = RV_REG_S2, 38 [BPF_REG_8] = RV_REG_S3, 39 [BPF_REG_9] = RV_REG_S4, 40 [BPF_REG_FP] = RV_REG_S5, 41 [BPF_REG_AX] = RV_REG_T0, 42 }; 43 44 static const int pt_regmap[] = { 45 [RV_REG_A0] = offsetof(struct pt_regs, a0), 46 [RV_REG_A1] = offsetof(struct pt_regs, a1), 47 [RV_REG_A2] = offsetof(struct pt_regs, a2), 48 [RV_REG_A3] = offsetof(struct pt_regs, a3), 49 [RV_REG_A4] = offsetof(struct pt_regs, a4), 50 [RV_REG_A5] = offsetof(struct pt_regs, a5), 51 [RV_REG_S1] = offsetof(struct pt_regs, s1), 52 [RV_REG_S2] = offsetof(struct pt_regs, s2), 53 [RV_REG_S3] = offsetof(struct pt_regs, s3), 54 [RV_REG_S4] = offsetof(struct pt_regs, s4), 55 [RV_REG_S5] = offsetof(struct pt_regs, s5), 56 [RV_REG_T0] = offsetof(struct pt_regs, t0), 57 }; 58 59 enum { 60 RV_CTX_F_SEEN_TAIL_CALL = 0, 61 RV_CTX_F_SEEN_CALL = RV_REG_RA, 62 RV_CTX_F_SEEN_S1 = RV_REG_S1, 63 RV_CTX_F_SEEN_S2 = RV_REG_S2, 64 RV_CTX_F_SEEN_S3 = RV_REG_S3, 65 RV_CTX_F_SEEN_S4 = RV_REG_S4, 66 RV_CTX_F_SEEN_S5 = RV_REG_S5, 67 RV_CTX_F_SEEN_S6 = RV_REG_S6, 68 }; 69 70 static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) 71 { 72 u8 reg = regmap[bpf_reg]; 73 74 switch (reg) { 75 case RV_CTX_F_SEEN_S1: 76 case RV_CTX_F_SEEN_S2: 77 case RV_CTX_F_SEEN_S3: 78 case RV_CTX_F_SEEN_S4: 79 case RV_CTX_F_SEEN_S5: 80 case RV_CTX_F_SEEN_S6: 81 __set_bit(reg, &ctx->flags); 82 } 83 return reg; 84 }; 85 86 static bool seen_reg(int reg, struct rv_jit_context *ctx) 87 { 88 switch (reg) { 89 case RV_CTX_F_SEEN_CALL: 90 case RV_CTX_F_SEEN_S1: 91 case RV_CTX_F_SEEN_S2: 92 case RV_CTX_F_SEEN_S3: 93 case RV_CTX_F_SEEN_S4: 94 case RV_CTX_F_SEEN_S5: 95 case RV_CTX_F_SEEN_S6: 96 return test_bit(reg, &ctx->flags); 97 } 98 return false; 99 } 100 101 static void mark_fp(struct rv_jit_context *ctx) 102 { 103 __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); 104 } 105 106 static void mark_call(struct rv_jit_context *ctx) 107 { 108 __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); 109 } 110 111 static bool seen_call(struct rv_jit_context *ctx) 112 { 113 return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); 114 } 115 116 static void mark_tail_call(struct rv_jit_context *ctx) 117 { 118 __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); 119 } 120 121 static bool seen_tail_call(struct rv_jit_context *ctx) 122 { 123 return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); 124 } 125 126 static u8 rv_tail_call_reg(struct rv_jit_context *ctx) 127 { 128 mark_tail_call(ctx); 129 130 if (seen_call(ctx)) { 131 __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags); 132 return RV_REG_S6; 133 } 134 return RV_REG_A6; 135 } 136 137 static bool is_32b_int(s64 val) 138 { 139 return -(1L << 31) <= val && val < (1L << 31); 140 } 141 142 static bool in_auipc_jalr_range(s64 val) 143 { 144 /* 145 * auipc+jalr can reach any signed PC-relative offset in the range 146 * [-2^31 - 2^11, 2^31 - 2^11). 147 */ 148 return (-(1L << 31) - (1L << 11)) <= val && 149 val < ((1L << 31) - (1L << 11)); 150 } 151 152 /* Modify rd pointer to alternate reg to avoid corrupting original reg */ 153 static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) 154 { 155 emit_sextw(ra, *rd, ctx); 156 *rd = ra; 157 } 158 159 static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) 160 { 161 emit_zextw(ra, *rd, ctx); 162 *rd = ra; 163 } 164 165 /* Emit fixed-length instructions for address */ 166 static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx) 167 { 168 /* 169 * Use the ro_insns(RX) to calculate the offset as the BPF program will 170 * finally run from this memory region. 171 */ 172 u64 ip = (u64)(ctx->ro_insns + ctx->ninsns); 173 s64 off = addr - ip; 174 s64 upper = (off + (1 << 11)) >> 12; 175 s64 lower = off & 0xfff; 176 177 if (extra_pass && !in_auipc_jalr_range(off)) { 178 pr_err("bpf-jit: target offset 0x%llx is out of range\n", off); 179 return -ERANGE; 180 } 181 182 emit(rv_auipc(rd, upper), ctx); 183 emit(rv_addi(rd, rd, lower), ctx); 184 return 0; 185 } 186 187 /* Emit variable-length instructions for 32-bit and 64-bit imm */ 188 static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) 189 { 190 /* Note that the immediate from the add is sign-extended, 191 * which means that we need to compensate this by adding 2^12, 192 * when the 12th bit is set. A simpler way of doing this, and 193 * getting rid of the check, is to just add 2**11 before the 194 * shift. The "Loading a 32-Bit constant" example from the 195 * "Computer Organization and Design, RISC-V edition" book by 196 * Patterson/Hennessy highlights this fact. 197 * 198 * This also means that we need to process LSB to MSB. 199 */ 200 s64 upper = (val + (1 << 11)) >> 12; 201 /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw, 202 * and addi are signed and RVC checks will perform signed comparisons. 203 */ 204 s64 lower = ((val & 0xfff) << 52) >> 52; 205 int shift; 206 207 if (is_32b_int(val)) { 208 if (upper) 209 emit_lui(rd, upper, ctx); 210 211 if (!upper) { 212 emit_li(rd, lower, ctx); 213 return; 214 } 215 216 emit_addiw(rd, rd, lower, ctx); 217 return; 218 } 219 220 shift = __ffs(upper); 221 upper >>= shift; 222 shift += 12; 223 224 emit_imm(rd, upper, ctx); 225 226 emit_slli(rd, rd, shift, ctx); 227 if (lower) 228 emit_addi(rd, rd, lower, ctx); 229 } 230 231 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) 232 { 233 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; 234 235 if (seen_reg(RV_REG_RA, ctx)) { 236 emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx); 237 store_offset -= 8; 238 } 239 emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx); 240 store_offset -= 8; 241 if (seen_reg(RV_REG_S1, ctx)) { 242 emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx); 243 store_offset -= 8; 244 } 245 if (seen_reg(RV_REG_S2, ctx)) { 246 emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx); 247 store_offset -= 8; 248 } 249 if (seen_reg(RV_REG_S3, ctx)) { 250 emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx); 251 store_offset -= 8; 252 } 253 if (seen_reg(RV_REG_S4, ctx)) { 254 emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx); 255 store_offset -= 8; 256 } 257 if (seen_reg(RV_REG_S5, ctx)) { 258 emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx); 259 store_offset -= 8; 260 } 261 if (seen_reg(RV_REG_S6, ctx)) { 262 emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); 263 store_offset -= 8; 264 } 265 if (ctx->arena_vm_start) { 266 emit_ld(RV_REG_ARENA, store_offset, RV_REG_SP, ctx); 267 store_offset -= 8; 268 } 269 270 emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); 271 /* Set return value. */ 272 if (!is_tail_call) 273 emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); 274 emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 275 /* kcfi, fentry and TCC init insns will be skipped on tailcall */ 276 is_tail_call ? (RV_KCFI_NINSNS + RV_FENTRY_NINSNS + 1) * 4 : 0, 277 ctx); 278 } 279 280 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, 281 struct rv_jit_context *ctx) 282 { 283 switch (cond) { 284 case BPF_JEQ: 285 emit(rv_beq(rd, rs, rvoff >> 1), ctx); 286 return; 287 case BPF_JGT: 288 emit(rv_bltu(rs, rd, rvoff >> 1), ctx); 289 return; 290 case BPF_JLT: 291 emit(rv_bltu(rd, rs, rvoff >> 1), ctx); 292 return; 293 case BPF_JGE: 294 emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); 295 return; 296 case BPF_JLE: 297 emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); 298 return; 299 case BPF_JNE: 300 emit(rv_bne(rd, rs, rvoff >> 1), ctx); 301 return; 302 case BPF_JSGT: 303 emit(rv_blt(rs, rd, rvoff >> 1), ctx); 304 return; 305 case BPF_JSLT: 306 emit(rv_blt(rd, rs, rvoff >> 1), ctx); 307 return; 308 case BPF_JSGE: 309 emit(rv_bge(rd, rs, rvoff >> 1), ctx); 310 return; 311 case BPF_JSLE: 312 emit(rv_bge(rs, rd, rvoff >> 1), ctx); 313 } 314 } 315 316 static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, 317 struct rv_jit_context *ctx) 318 { 319 s64 upper, lower; 320 321 if (is_13b_int(rvoff)) { 322 emit_bcc(cond, rd, rs, rvoff, ctx); 323 return; 324 } 325 326 /* Adjust for jal */ 327 rvoff -= 4; 328 329 /* Transform, e.g.: 330 * bne rd,rs,foo 331 * to 332 * beq rd,rs,<.L1> 333 * (auipc foo) 334 * jal(r) foo 335 * .L1 336 */ 337 cond = invert_bpf_cond(cond); 338 if (is_21b_int(rvoff)) { 339 emit_bcc(cond, rd, rs, 8, ctx); 340 emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); 341 return; 342 } 343 344 /* 32b No need for an additional rvoff adjustment, since we 345 * get that from the auipc at PC', where PC = PC' + 4. 346 */ 347 upper = (rvoff + (1 << 11)) >> 12; 348 lower = rvoff & 0xfff; 349 350 emit_bcc(cond, rd, rs, 12, ctx); 351 emit(rv_auipc(RV_REG_T1, upper), ctx); 352 emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); 353 } 354 355 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) 356 { 357 int tc_ninsn, off, start_insn = ctx->ninsns; 358 u8 tcc = rv_tail_call_reg(ctx); 359 360 /* a0: &ctx 361 * a1: &array 362 * a2: index 363 * 364 * if (index >= array->map.max_entries) 365 * goto out; 366 */ 367 tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : 368 ctx->offset[0]; 369 emit_zextw(RV_REG_A2, RV_REG_A2, ctx); 370 371 off = offsetof(struct bpf_array, map.max_entries); 372 if (is_12b_check(off, insn)) 373 return -1; 374 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); 375 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 376 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); 377 378 /* if (--TCC < 0) 379 * goto out; 380 */ 381 emit_addi(RV_REG_TCC, tcc, -1, ctx); 382 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 383 emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); 384 385 /* prog = array->ptrs[index]; 386 * if (!prog) 387 * goto out; 388 */ 389 emit_sh3add(RV_REG_T2, RV_REG_A2, RV_REG_A1, ctx); 390 off = offsetof(struct bpf_array, ptrs); 391 if (is_12b_check(off, insn)) 392 return -1; 393 emit_ld(RV_REG_T2, off, RV_REG_T2, ctx); 394 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 395 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); 396 397 /* goto *(prog->bpf_func + 4); */ 398 off = offsetof(struct bpf_prog, bpf_func); 399 if (is_12b_check(off, insn)) 400 return -1; 401 emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); 402 __build_epilogue(true, ctx); 403 return 0; 404 } 405 406 static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, 407 struct rv_jit_context *ctx) 408 { 409 u8 code = insn->code; 410 411 switch (code) { 412 case BPF_JMP | BPF_JA: 413 case BPF_JMP | BPF_CALL: 414 case BPF_JMP | BPF_EXIT: 415 case BPF_JMP | BPF_TAIL_CALL: 416 break; 417 default: 418 *rd = bpf_to_rv_reg(insn->dst_reg, ctx); 419 } 420 421 if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) || 422 code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) || 423 code & BPF_LDX || code & BPF_STX) 424 *rs = bpf_to_rv_reg(insn->src_reg, ctx); 425 } 426 427 static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr, 428 struct rv_jit_context *ctx) 429 { 430 s64 upper, lower; 431 432 if (rvoff && fixed_addr && is_21b_int(rvoff)) { 433 emit(rv_jal(rd, rvoff >> 1), ctx); 434 return 0; 435 } else if (in_auipc_jalr_range(rvoff)) { 436 upper = (rvoff + (1 << 11)) >> 12; 437 lower = rvoff & 0xfff; 438 emit(rv_auipc(RV_REG_T1, upper), ctx); 439 emit(rv_jalr(rd, RV_REG_T1, lower), ctx); 440 return 0; 441 } 442 443 pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff); 444 return -ERANGE; 445 } 446 447 static bool is_signed_bpf_cond(u8 cond) 448 { 449 return cond == BPF_JSGT || cond == BPF_JSLT || 450 cond == BPF_JSGE || cond == BPF_JSLE; 451 } 452 453 static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx) 454 { 455 s64 off = 0; 456 u64 ip; 457 458 if (addr && ctx->insns && ctx->ro_insns) { 459 /* 460 * Use the ro_insns(RX) to calculate the offset as the BPF 461 * program will finally run from this memory region. 462 */ 463 ip = (u64)(long)(ctx->ro_insns + ctx->ninsns); 464 off = addr - ip; 465 } 466 467 return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx); 468 } 469 470 static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx) 471 { 472 if (IS_ENABLED(CONFIG_CFI_CLANG)) 473 emit(hash, ctx); 474 } 475 476 static int emit_load_8(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 477 { 478 int insns_start; 479 480 if (is_12b_int(off)) { 481 insns_start = ctx->ninsns; 482 if (sign_ext) 483 emit(rv_lb(rd, off, rs), ctx); 484 else 485 emit(rv_lbu(rd, off, rs), ctx); 486 return ctx->ninsns - insns_start; 487 } 488 489 emit_imm(RV_REG_T1, off, ctx); 490 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 491 insns_start = ctx->ninsns; 492 if (sign_ext) 493 emit(rv_lb(rd, 0, RV_REG_T1), ctx); 494 else 495 emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 496 return ctx->ninsns - insns_start; 497 } 498 499 static int emit_load_16(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 500 { 501 int insns_start; 502 503 if (is_12b_int(off)) { 504 insns_start = ctx->ninsns; 505 if (sign_ext) 506 emit(rv_lh(rd, off, rs), ctx); 507 else 508 emit(rv_lhu(rd, off, rs), ctx); 509 return ctx->ninsns - insns_start; 510 } 511 512 emit_imm(RV_REG_T1, off, ctx); 513 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 514 insns_start = ctx->ninsns; 515 if (sign_ext) 516 emit(rv_lh(rd, 0, RV_REG_T1), ctx); 517 else 518 emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 519 return ctx->ninsns - insns_start; 520 } 521 522 static int emit_load_32(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 523 { 524 int insns_start; 525 526 if (is_12b_int(off)) { 527 insns_start = ctx->ninsns; 528 if (sign_ext) 529 emit(rv_lw(rd, off, rs), ctx); 530 else 531 emit(rv_lwu(rd, off, rs), ctx); 532 return ctx->ninsns - insns_start; 533 } 534 535 emit_imm(RV_REG_T1, off, ctx); 536 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 537 insns_start = ctx->ninsns; 538 if (sign_ext) 539 emit(rv_lw(rd, 0, RV_REG_T1), ctx); 540 else 541 emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 542 return ctx->ninsns - insns_start; 543 } 544 545 static int emit_load_64(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 546 { 547 int insns_start; 548 549 if (is_12b_int(off)) { 550 insns_start = ctx->ninsns; 551 emit_ld(rd, off, rs, ctx); 552 return ctx->ninsns - insns_start; 553 } 554 555 emit_imm(RV_REG_T1, off, ctx); 556 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 557 insns_start = ctx->ninsns; 558 emit_ld(rd, 0, RV_REG_T1, ctx); 559 return ctx->ninsns - insns_start; 560 } 561 562 static void emit_store_8(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 563 { 564 if (is_12b_int(off)) { 565 emit(rv_sb(rd, off, rs), ctx); 566 return; 567 } 568 569 emit_imm(RV_REG_T1, off, ctx); 570 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 571 emit(rv_sb(RV_REG_T1, 0, rs), ctx); 572 } 573 574 static void emit_store_16(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 575 { 576 if (is_12b_int(off)) { 577 emit(rv_sh(rd, off, rs), ctx); 578 return; 579 } 580 581 emit_imm(RV_REG_T1, off, ctx); 582 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 583 emit(rv_sh(RV_REG_T1, 0, rs), ctx); 584 } 585 586 static void emit_store_32(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 587 { 588 if (is_12b_int(off)) { 589 emit_sw(rd, off, rs, ctx); 590 return; 591 } 592 593 emit_imm(RV_REG_T1, off, ctx); 594 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 595 emit_sw(RV_REG_T1, 0, rs, ctx); 596 } 597 598 static void emit_store_64(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx) 599 { 600 if (is_12b_int(off)) { 601 emit_sd(rd, off, rs, ctx); 602 return; 603 } 604 605 emit_imm(RV_REG_T1, off, ctx); 606 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 607 emit_sd(RV_REG_T1, 0, rs, ctx); 608 } 609 610 static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn, 611 struct rv_jit_context *ctx) 612 { 613 u8 code = insn->code; 614 s32 imm = insn->imm; 615 s16 off = insn->off; 616 617 switch (imm) { 618 /* dst_reg = load_acquire(src_reg + off16) */ 619 case BPF_LOAD_ACQ: 620 switch (BPF_SIZE(code)) { 621 case BPF_B: 622 emit_load_8(false, rd, off, rs, ctx); 623 break; 624 case BPF_H: 625 emit_load_16(false, rd, off, rs, ctx); 626 break; 627 case BPF_W: 628 emit_load_32(false, rd, off, rs, ctx); 629 break; 630 case BPF_DW: 631 emit_load_64(false, rd, off, rs, ctx); 632 break; 633 } 634 emit_fence_r_rw(ctx); 635 636 /* If our next insn is a redundant zext, return 1 to tell 637 * build_body() to skip it. 638 */ 639 if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) 640 return 1; 641 break; 642 /* store_release(dst_reg + off16, src_reg) */ 643 case BPF_STORE_REL: 644 emit_fence_rw_w(ctx); 645 switch (BPF_SIZE(code)) { 646 case BPF_B: 647 emit_store_8(rd, off, rs, ctx); 648 break; 649 case BPF_H: 650 emit_store_16(rd, off, rs, ctx); 651 break; 652 case BPF_W: 653 emit_store_32(rd, off, rs, ctx); 654 break; 655 case BPF_DW: 656 emit_store_64(rd, off, rs, ctx); 657 break; 658 } 659 break; 660 default: 661 pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm); 662 return -EINVAL; 663 } 664 665 return 0; 666 } 667 668 static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn, 669 struct rv_jit_context *ctx) 670 { 671 u8 r0, code = insn->code; 672 s16 off = insn->off; 673 s32 imm = insn->imm; 674 int jmp_offset; 675 bool is64; 676 677 if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) { 678 pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n"); 679 return -EINVAL; 680 } 681 is64 = BPF_SIZE(code) == BPF_DW; 682 683 if (off) { 684 if (is_12b_int(off)) { 685 emit_addi(RV_REG_T1, rd, off, ctx); 686 } else { 687 emit_imm(RV_REG_T1, off, ctx); 688 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 689 } 690 rd = RV_REG_T1; 691 } 692 693 switch (imm) { 694 /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */ 695 case BPF_ADD: 696 emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) : 697 rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 698 break; 699 case BPF_AND: 700 emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) : 701 rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 702 break; 703 case BPF_OR: 704 emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) : 705 rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 706 break; 707 case BPF_XOR: 708 emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) : 709 rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 710 break; 711 /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ 712 case BPF_ADD | BPF_FETCH: 713 emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) : 714 rv_amoadd_w(rs, rs, rd, 1, 1), ctx); 715 if (!is64) 716 emit_zextw(rs, rs, ctx); 717 break; 718 case BPF_AND | BPF_FETCH: 719 emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) : 720 rv_amoand_w(rs, rs, rd, 1, 1), ctx); 721 if (!is64) 722 emit_zextw(rs, rs, ctx); 723 break; 724 case BPF_OR | BPF_FETCH: 725 emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) : 726 rv_amoor_w(rs, rs, rd, 1, 1), ctx); 727 if (!is64) 728 emit_zextw(rs, rs, ctx); 729 break; 730 case BPF_XOR | BPF_FETCH: 731 emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) : 732 rv_amoxor_w(rs, rs, rd, 1, 1), ctx); 733 if (!is64) 734 emit_zextw(rs, rs, ctx); 735 break; 736 /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ 737 case BPF_XCHG: 738 emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) : 739 rv_amoswap_w(rs, rs, rd, 1, 1), ctx); 740 if (!is64) 741 emit_zextw(rs, rs, ctx); 742 break; 743 /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ 744 case BPF_CMPXCHG: 745 r0 = bpf_to_rv_reg(BPF_REG_0, ctx); 746 if (is64) 747 emit_mv(RV_REG_T2, r0, ctx); 748 else 749 emit_addiw(RV_REG_T2, r0, 0, ctx); 750 emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) : 751 rv_lr_w(r0, 0, rd, 0, 0), ctx); 752 jmp_offset = ninsns_rvoff(8); 753 emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); 754 emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) : 755 rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx); 756 jmp_offset = ninsns_rvoff(-6); 757 emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); 758 emit_fence_rw_rw(ctx); 759 break; 760 default: 761 pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm); 762 return -EINVAL; 763 } 764 765 return 0; 766 } 767 768 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 769 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 770 #define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */ 771 772 bool ex_handler_bpf(const struct exception_table_entry *ex, 773 struct pt_regs *regs) 774 { 775 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 776 int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 777 778 if (regs_offset != REG_DONT_CLEAR_MARKER) 779 *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; 780 regs->epc = (unsigned long)&ex->fixup - offset; 781 782 return true; 783 } 784 785 /* For accesses to BTF pointers, add an entry to the exception table */ 786 static int add_exception_handler(const struct bpf_insn *insn, 787 struct rv_jit_context *ctx, 788 int dst_reg, int insn_len) 789 { 790 struct exception_table_entry *ex; 791 unsigned long pc; 792 off_t ins_offset; 793 off_t fixup_offset; 794 795 if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable || 796 (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 797 BPF_MODE(insn->code) != BPF_PROBE_MEM32)) 798 return 0; 799 800 if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) 801 return -EINVAL; 802 803 if (WARN_ON_ONCE(insn_len > ctx->ninsns)) 804 return -EINVAL; 805 806 if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1)) 807 return -EINVAL; 808 809 ex = &ctx->prog->aux->extable[ctx->nexentries]; 810 pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len]; 811 812 /* 813 * This is the relative offset of the instruction that may fault from 814 * the exception table itself. This will be written to the exception 815 * table and if this instruction faults, the destination register will 816 * be set to '0' and the execution will jump to the next instruction. 817 */ 818 ins_offset = pc - (long)&ex->insn; 819 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 820 return -ERANGE; 821 822 /* 823 * Since the extable follows the program, the fixup offset is always 824 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 825 * to keep things simple, and put the destination register in the upper 826 * bits. We don't need to worry about buildtime or runtime sort 827 * modifying the upper bits because the table is already sorted, and 828 * isn't part of the main exception table. 829 * 830 * The fixup_offset is set to the next instruction from the instruction 831 * that may fault. The execution will jump to this after handling the 832 * fault. 833 */ 834 fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16)); 835 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 836 return -ERANGE; 837 838 /* 839 * The offsets above have been calculated using the RO buffer but we 840 * need to use the R/W buffer for writes. 841 * switch ex to rw buffer for writing. 842 */ 843 ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns); 844 845 ex->insn = ins_offset; 846 847 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 848 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 849 ex->type = EX_TYPE_BPF; 850 851 ctx->nexentries++; 852 return 0; 853 } 854 855 static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) 856 { 857 s64 rvoff; 858 struct rv_jit_context ctx; 859 860 ctx.ninsns = 0; 861 ctx.insns = (u16 *)insns; 862 863 if (!target) { 864 emit(rv_nop(), &ctx); 865 emit(rv_nop(), &ctx); 866 return 0; 867 } 868 869 rvoff = (s64)(target - ip); 870 return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx); 871 } 872 873 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 874 void *old_addr, void *new_addr) 875 { 876 u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS]; 877 bool is_call = poke_type == BPF_MOD_CALL; 878 int ret; 879 880 if (!is_kernel_text((unsigned long)ip) && 881 !is_bpf_text_address((unsigned long)ip)) 882 return -ENOTSUPP; 883 884 ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); 885 if (ret) 886 return ret; 887 888 if (memcmp(ip, old_insns, RV_FENTRY_NBYTES)) 889 return -EFAULT; 890 891 ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); 892 if (ret) 893 return ret; 894 895 cpus_read_lock(); 896 mutex_lock(&text_mutex); 897 if (memcmp(ip, new_insns, RV_FENTRY_NBYTES)) 898 ret = patch_text(ip, new_insns, RV_FENTRY_NBYTES); 899 mutex_unlock(&text_mutex); 900 cpus_read_unlock(); 901 902 return ret; 903 } 904 905 static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx) 906 { 907 int i; 908 909 for (i = 0; i < nr_arg_slots; i++) { 910 if (i < RV_MAX_REG_ARGS) { 911 emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx); 912 } else { 913 /* skip slots for T0 and FP of traced function */ 914 emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx); 915 emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx); 916 } 917 args_off -= 8; 918 } 919 } 920 921 static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx) 922 { 923 int i; 924 925 for (i = 0; i < nr_reg_args; i++) { 926 emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx); 927 args_off -= 8; 928 } 929 } 930 931 static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off, 932 struct rv_jit_context *ctx) 933 { 934 int i; 935 936 for (i = 0; i < nr_stack_args; i++) { 937 emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx); 938 emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx); 939 args_off -= 8; 940 stk_arg_off -= 8; 941 } 942 } 943 944 static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off, 945 int run_ctx_off, bool save_ret, struct rv_jit_context *ctx) 946 { 947 int ret, branch_off; 948 struct bpf_prog *p = l->link.prog; 949 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 950 951 if (l->cookie) { 952 emit_imm(RV_REG_T1, l->cookie, ctx); 953 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_T1, ctx); 954 } else { 955 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx); 956 } 957 958 /* arg1: prog */ 959 emit_imm(RV_REG_A0, (const s64)p, ctx); 960 /* arg2: &run_ctx */ 961 emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx); 962 ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx); 963 if (ret) 964 return ret; 965 966 /* store prog start time */ 967 emit_mv(RV_REG_S1, RV_REG_A0, ctx); 968 969 /* if (__bpf_prog_enter(prog) == 0) 970 * goto skip_exec_of_prog; 971 */ 972 branch_off = ctx->ninsns; 973 /* nop reserved for conditional jump */ 974 emit(rv_nop(), ctx); 975 976 /* arg1: &args_off */ 977 emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); 978 if (!p->jited) 979 /* arg2: progs[i]->insnsi for interpreter */ 980 emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx); 981 ret = emit_call((const u64)p->bpf_func, true, ctx); 982 if (ret) 983 return ret; 984 985 if (save_ret) { 986 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); 987 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); 988 } 989 990 /* update branch with beqz */ 991 if (ctx->insns) { 992 int offset = ninsns_rvoff(ctx->ninsns - branch_off); 993 u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1); 994 *(u32 *)(ctx->insns + branch_off) = insn; 995 } 996 997 /* arg1: prog */ 998 emit_imm(RV_REG_A0, (const s64)p, ctx); 999 /* arg2: prog start time */ 1000 emit_mv(RV_REG_A1, RV_REG_S1, ctx); 1001 /* arg3: &run_ctx */ 1002 emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx); 1003 ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx); 1004 1005 return ret; 1006 } 1007 1008 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, 1009 const struct btf_func_model *m, 1010 struct bpf_tramp_links *tlinks, 1011 void *func_addr, u32 flags, 1012 struct rv_jit_context *ctx) 1013 { 1014 int i, ret, offset; 1015 int *branches_off = NULL; 1016 int stack_size = 0, nr_arg_slots = 0; 1017 int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off; 1018 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 1019 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 1020 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 1021 bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; 1022 void *orig_call = func_addr; 1023 bool save_ret; 1024 u32 insn; 1025 1026 /* Two types of generated trampoline stack layout: 1027 * 1028 * 1. trampoline called from function entry 1029 * -------------------------------------- 1030 * FP + 8 [ RA to parent func ] return address to parent 1031 * function 1032 * FP + 0 [ FP of parent func ] frame pointer of parent 1033 * function 1034 * FP - 8 [ T0 to traced func ] return address of traced 1035 * function 1036 * FP - 16 [ FP of traced func ] frame pointer of traced 1037 * function 1038 * -------------------------------------- 1039 * 1040 * 2. trampoline called directly 1041 * -------------------------------------- 1042 * FP - 8 [ RA to caller func ] return address to caller 1043 * function 1044 * FP - 16 [ FP of caller func ] frame pointer of caller 1045 * function 1046 * -------------------------------------- 1047 * 1048 * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 1049 * BPF_TRAMP_F_RET_FENTRY_RET 1050 * [ argN ] 1051 * [ ... ] 1052 * FP - args_off [ arg1 ] 1053 * 1054 * FP - nregs_off [ regs count ] 1055 * 1056 * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG 1057 * 1058 * FP - run_ctx_off [ bpf_tramp_run_ctx ] 1059 * 1060 * FP - sreg_off [ callee saved reg ] 1061 * 1062 * [ pads ] pads for 16 bytes alignment 1063 * 1064 * [ stack_argN ] 1065 * [ ... ] 1066 * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG 1067 */ 1068 1069 if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) 1070 return -ENOTSUPP; 1071 1072 if (m->nr_args > MAX_BPF_FUNC_ARGS) 1073 return -ENOTSUPP; 1074 1075 for (i = 0; i < m->nr_args; i++) 1076 nr_arg_slots += round_up(m->arg_size[i], 8) / 8; 1077 1078 /* room of trampoline frame to store return address and frame pointer */ 1079 stack_size += 16; 1080 1081 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 1082 if (save_ret) { 1083 stack_size += 16; /* Save both A5 (BPF R0) and A0 */ 1084 retval_off = stack_size; 1085 } 1086 1087 stack_size += nr_arg_slots * 8; 1088 args_off = stack_size; 1089 1090 stack_size += 8; 1091 nregs_off = stack_size; 1092 1093 if (flags & BPF_TRAMP_F_IP_ARG) { 1094 stack_size += 8; 1095 ip_off = stack_size; 1096 } 1097 1098 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 1099 run_ctx_off = stack_size; 1100 1101 stack_size += 8; 1102 sreg_off = stack_size; 1103 1104 if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0)) 1105 stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8; 1106 1107 stack_size = round_up(stack_size, STACK_ALIGN); 1108 1109 /* room for args on stack must be at the top of stack */ 1110 stk_arg_off = stack_size; 1111 1112 if (!is_struct_ops) { 1113 /* For the trampoline called from function entry, 1114 * the frame of traced function and the frame of 1115 * trampoline need to be considered. 1116 */ 1117 emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx); 1118 emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx); 1119 emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx); 1120 emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx); 1121 1122 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); 1123 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx); 1124 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); 1125 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); 1126 } else { 1127 /* emit kcfi hash */ 1128 emit_kcfi(cfi_get_func_hash(func_addr), ctx); 1129 /* For the trampoline called directly, just handle 1130 * the frame of trampoline. 1131 */ 1132 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); 1133 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx); 1134 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); 1135 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); 1136 } 1137 1138 /* callee saved register S1 to pass start time */ 1139 emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx); 1140 1141 /* store ip address of the traced function */ 1142 if (flags & BPF_TRAMP_F_IP_ARG) { 1143 emit_imm(RV_REG_T1, (const s64)func_addr, ctx); 1144 emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx); 1145 } 1146 1147 emit_li(RV_REG_T1, nr_arg_slots, ctx); 1148 emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx); 1149 1150 store_args(nr_arg_slots, args_off, ctx); 1151 1152 /* skip to actual body of traced function */ 1153 if (flags & BPF_TRAMP_F_SKIP_FRAME) 1154 orig_call += RV_FENTRY_NINSNS * 4; 1155 1156 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1157 emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx); 1158 ret = emit_call((const u64)__bpf_tramp_enter, true, ctx); 1159 if (ret) 1160 return ret; 1161 } 1162 1163 for (i = 0; i < fentry->nr_links; i++) { 1164 ret = invoke_bpf_prog(fentry->links[i], args_off, retval_off, run_ctx_off, 1165 flags & BPF_TRAMP_F_RET_FENTRY_RET, ctx); 1166 if (ret) 1167 return ret; 1168 } 1169 1170 if (fmod_ret->nr_links) { 1171 branches_off = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL); 1172 if (!branches_off) 1173 return -ENOMEM; 1174 1175 /* cleanup to avoid garbage return value confusion */ 1176 emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx); 1177 for (i = 0; i < fmod_ret->nr_links; i++) { 1178 ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off, 1179 run_ctx_off, true, ctx); 1180 if (ret) 1181 goto out; 1182 emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx); 1183 branches_off[i] = ctx->ninsns; 1184 /* nop reserved for conditional jump */ 1185 emit(rv_nop(), ctx); 1186 } 1187 } 1188 1189 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1190 restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx); 1191 restore_stack_args(nr_arg_slots - RV_MAX_REG_ARGS, args_off, stk_arg_off, ctx); 1192 ret = emit_call((const u64)orig_call, true, ctx); 1193 if (ret) 1194 goto out; 1195 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); 1196 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); 1197 im->ip_after_call = ctx->ro_insns + ctx->ninsns; 1198 /* 2 nops reserved for auipc+jalr pair */ 1199 emit(rv_nop(), ctx); 1200 emit(rv_nop(), ctx); 1201 } 1202 1203 /* update branches saved in invoke_bpf_mod_ret with bnez */ 1204 for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) { 1205 offset = ninsns_rvoff(ctx->ninsns - branches_off[i]); 1206 insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1); 1207 *(u32 *)(ctx->insns + branches_off[i]) = insn; 1208 } 1209 1210 for (i = 0; i < fexit->nr_links; i++) { 1211 ret = invoke_bpf_prog(fexit->links[i], args_off, retval_off, 1212 run_ctx_off, false, ctx); 1213 if (ret) 1214 goto out; 1215 } 1216 1217 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1218 im->ip_epilogue = ctx->ro_insns + ctx->ninsns; 1219 emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx); 1220 ret = emit_call((const u64)__bpf_tramp_exit, true, ctx); 1221 if (ret) 1222 goto out; 1223 } 1224 1225 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1226 restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx); 1227 1228 if (save_ret) { 1229 emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); 1230 emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx); 1231 } 1232 1233 emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); 1234 1235 if (!is_struct_ops) { 1236 /* trampoline called from function entry */ 1237 emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx); 1238 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); 1239 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); 1240 1241 emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx); 1242 emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx); 1243 emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx); 1244 1245 if (flags & BPF_TRAMP_F_SKIP_FRAME) 1246 /* return to parent function */ 1247 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); 1248 else 1249 /* return to traced function */ 1250 emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx); 1251 } else { 1252 /* trampoline called directly */ 1253 emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx); 1254 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); 1255 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); 1256 1257 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); 1258 } 1259 1260 ret = ctx->ninsns; 1261 out: 1262 kfree(branches_off); 1263 return ret; 1264 } 1265 1266 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 1267 struct bpf_tramp_links *tlinks, void *func_addr) 1268 { 1269 struct bpf_tramp_image im; 1270 struct rv_jit_context ctx; 1271 int ret; 1272 1273 ctx.ninsns = 0; 1274 ctx.insns = NULL; 1275 ctx.ro_insns = NULL; 1276 ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx); 1277 1278 return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns); 1279 } 1280 1281 void *arch_alloc_bpf_trampoline(unsigned int size) 1282 { 1283 return bpf_prog_pack_alloc(size, bpf_fill_ill_insns); 1284 } 1285 1286 void arch_free_bpf_trampoline(void *image, unsigned int size) 1287 { 1288 bpf_prog_pack_free(image, size); 1289 } 1290 1291 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, 1292 void *ro_image_end, const struct btf_func_model *m, 1293 u32 flags, struct bpf_tramp_links *tlinks, 1294 void *func_addr) 1295 { 1296 int ret; 1297 void *image, *res; 1298 struct rv_jit_context ctx; 1299 u32 size = ro_image_end - ro_image; 1300 1301 image = kvmalloc(size, GFP_KERNEL); 1302 if (!image) 1303 return -ENOMEM; 1304 1305 ctx.ninsns = 0; 1306 ctx.insns = image; 1307 ctx.ro_insns = ro_image; 1308 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); 1309 if (ret < 0) 1310 goto out; 1311 1312 if (WARN_ON(size < ninsns_rvoff(ctx.ninsns))) { 1313 ret = -E2BIG; 1314 goto out; 1315 } 1316 1317 res = bpf_arch_text_copy(ro_image, image, size); 1318 if (IS_ERR(res)) { 1319 ret = PTR_ERR(res); 1320 goto out; 1321 } 1322 1323 bpf_flush_icache(ro_image, ro_image_end); 1324 out: 1325 kvfree(image); 1326 return ret < 0 ? ret : size; 1327 } 1328 1329 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 1330 bool extra_pass) 1331 { 1332 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 1333 BPF_CLASS(insn->code) == BPF_JMP; 1334 int s, e, rvoff, ret, i = insn - ctx->prog->insnsi; 1335 struct bpf_prog_aux *aux = ctx->prog->aux; 1336 u8 rd = -1, rs = -1, code = insn->code; 1337 s16 off = insn->off; 1338 s32 imm = insn->imm; 1339 1340 init_regs(&rd, &rs, insn, ctx); 1341 1342 switch (code) { 1343 /* dst = src */ 1344 case BPF_ALU | BPF_MOV | BPF_X: 1345 case BPF_ALU64 | BPF_MOV | BPF_X: 1346 if (insn_is_cast_user(insn)) { 1347 emit_mv(RV_REG_T1, rs, ctx); 1348 emit_zextw(RV_REG_T1, RV_REG_T1, ctx); 1349 emit_imm(rd, (ctx->user_vm_start >> 32) << 32, ctx); 1350 emit(rv_beq(RV_REG_T1, RV_REG_ZERO, 4), ctx); 1351 emit_or(RV_REG_T1, rd, RV_REG_T1, ctx); 1352 emit_mv(rd, RV_REG_T1, ctx); 1353 break; 1354 } else if (insn_is_mov_percpu_addr(insn)) { 1355 if (rd != rs) 1356 emit_mv(rd, rs, ctx); 1357 #ifdef CONFIG_SMP 1358 /* Load current CPU number in T1 */ 1359 emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), 1360 RV_REG_TP, ctx); 1361 /* Load address of __per_cpu_offset array in T2 */ 1362 emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); 1363 /* Get address of __per_cpu_offset[cpu] in T1 */ 1364 emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx); 1365 /* Load __per_cpu_offset[cpu] in T1 */ 1366 emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); 1367 /* Add the offset to Rd */ 1368 emit_add(rd, rd, RV_REG_T1, ctx); 1369 #endif 1370 } 1371 if (imm == 1) { 1372 /* Special mov32 for zext */ 1373 emit_zextw(rd, rd, ctx); 1374 break; 1375 } 1376 switch (insn->off) { 1377 case 0: 1378 emit_mv(rd, rs, ctx); 1379 break; 1380 case 8: 1381 emit_sextb(rd, rs, ctx); 1382 break; 1383 case 16: 1384 emit_sexth(rd, rs, ctx); 1385 break; 1386 case 32: 1387 emit_sextw(rd, rs, ctx); 1388 break; 1389 } 1390 if (!is64 && !aux->verifier_zext) 1391 emit_zextw(rd, rd, ctx); 1392 break; 1393 1394 /* dst = dst OP src */ 1395 case BPF_ALU | BPF_ADD | BPF_X: 1396 case BPF_ALU64 | BPF_ADD | BPF_X: 1397 emit_add(rd, rd, rs, ctx); 1398 if (!is64 && !aux->verifier_zext) 1399 emit_zextw(rd, rd, ctx); 1400 break; 1401 case BPF_ALU | BPF_SUB | BPF_X: 1402 case BPF_ALU64 | BPF_SUB | BPF_X: 1403 if (is64) 1404 emit_sub(rd, rd, rs, ctx); 1405 else 1406 emit_subw(rd, rd, rs, ctx); 1407 1408 if (!is64 && !aux->verifier_zext) 1409 emit_zextw(rd, rd, ctx); 1410 break; 1411 case BPF_ALU | BPF_AND | BPF_X: 1412 case BPF_ALU64 | BPF_AND | BPF_X: 1413 emit_and(rd, rd, rs, ctx); 1414 if (!is64 && !aux->verifier_zext) 1415 emit_zextw(rd, rd, ctx); 1416 break; 1417 case BPF_ALU | BPF_OR | BPF_X: 1418 case BPF_ALU64 | BPF_OR | BPF_X: 1419 emit_or(rd, rd, rs, ctx); 1420 if (!is64 && !aux->verifier_zext) 1421 emit_zextw(rd, rd, ctx); 1422 break; 1423 case BPF_ALU | BPF_XOR | BPF_X: 1424 case BPF_ALU64 | BPF_XOR | BPF_X: 1425 emit_xor(rd, rd, rs, ctx); 1426 if (!is64 && !aux->verifier_zext) 1427 emit_zextw(rd, rd, ctx); 1428 break; 1429 case BPF_ALU | BPF_MUL | BPF_X: 1430 case BPF_ALU64 | BPF_MUL | BPF_X: 1431 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); 1432 if (!is64 && !aux->verifier_zext) 1433 emit_zextw(rd, rd, ctx); 1434 break; 1435 case BPF_ALU | BPF_DIV | BPF_X: 1436 case BPF_ALU64 | BPF_DIV | BPF_X: 1437 if (off) 1438 emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx); 1439 else 1440 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 1441 if (!is64 && !aux->verifier_zext) 1442 emit_zextw(rd, rd, ctx); 1443 break; 1444 case BPF_ALU | BPF_MOD | BPF_X: 1445 case BPF_ALU64 | BPF_MOD | BPF_X: 1446 if (off) 1447 emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx); 1448 else 1449 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 1450 if (!is64 && !aux->verifier_zext) 1451 emit_zextw(rd, rd, ctx); 1452 break; 1453 case BPF_ALU | BPF_LSH | BPF_X: 1454 case BPF_ALU64 | BPF_LSH | BPF_X: 1455 emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); 1456 if (!is64 && !aux->verifier_zext) 1457 emit_zextw(rd, rd, ctx); 1458 break; 1459 case BPF_ALU | BPF_RSH | BPF_X: 1460 case BPF_ALU64 | BPF_RSH | BPF_X: 1461 emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); 1462 if (!is64 && !aux->verifier_zext) 1463 emit_zextw(rd, rd, ctx); 1464 break; 1465 case BPF_ALU | BPF_ARSH | BPF_X: 1466 case BPF_ALU64 | BPF_ARSH | BPF_X: 1467 emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); 1468 if (!is64 && !aux->verifier_zext) 1469 emit_zextw(rd, rd, ctx); 1470 break; 1471 1472 /* dst = -dst */ 1473 case BPF_ALU | BPF_NEG: 1474 case BPF_ALU64 | BPF_NEG: 1475 emit_sub(rd, RV_REG_ZERO, rd, ctx); 1476 if (!is64 && !aux->verifier_zext) 1477 emit_zextw(rd, rd, ctx); 1478 break; 1479 1480 /* dst = BSWAP##imm(dst) */ 1481 case BPF_ALU | BPF_END | BPF_FROM_LE: 1482 switch (imm) { 1483 case 16: 1484 emit_zexth(rd, rd, ctx); 1485 break; 1486 case 32: 1487 if (!aux->verifier_zext) 1488 emit_zextw(rd, rd, ctx); 1489 break; 1490 case 64: 1491 /* Do nothing */ 1492 break; 1493 } 1494 break; 1495 case BPF_ALU | BPF_END | BPF_FROM_BE: 1496 case BPF_ALU64 | BPF_END | BPF_FROM_LE: 1497 emit_bswap(rd, imm, ctx); 1498 break; 1499 1500 /* dst = imm */ 1501 case BPF_ALU | BPF_MOV | BPF_K: 1502 case BPF_ALU64 | BPF_MOV | BPF_K: 1503 emit_imm(rd, imm, ctx); 1504 if (!is64 && !aux->verifier_zext) 1505 emit_zextw(rd, rd, ctx); 1506 break; 1507 1508 /* dst = dst OP imm */ 1509 case BPF_ALU | BPF_ADD | BPF_K: 1510 case BPF_ALU64 | BPF_ADD | BPF_K: 1511 if (is_12b_int(imm)) { 1512 emit_addi(rd, rd, imm, ctx); 1513 } else { 1514 emit_imm(RV_REG_T1, imm, ctx); 1515 emit_add(rd, rd, RV_REG_T1, ctx); 1516 } 1517 if (!is64 && !aux->verifier_zext) 1518 emit_zextw(rd, rd, ctx); 1519 break; 1520 case BPF_ALU | BPF_SUB | BPF_K: 1521 case BPF_ALU64 | BPF_SUB | BPF_K: 1522 if (is_12b_int(-imm)) { 1523 emit_addi(rd, rd, -imm, ctx); 1524 } else { 1525 emit_imm(RV_REG_T1, imm, ctx); 1526 emit_sub(rd, rd, RV_REG_T1, ctx); 1527 } 1528 if (!is64 && !aux->verifier_zext) 1529 emit_zextw(rd, rd, ctx); 1530 break; 1531 case BPF_ALU | BPF_AND | BPF_K: 1532 case BPF_ALU64 | BPF_AND | BPF_K: 1533 if (is_12b_int(imm)) { 1534 emit_andi(rd, rd, imm, ctx); 1535 } else { 1536 emit_imm(RV_REG_T1, imm, ctx); 1537 emit_and(rd, rd, RV_REG_T1, ctx); 1538 } 1539 if (!is64 && !aux->verifier_zext) 1540 emit_zextw(rd, rd, ctx); 1541 break; 1542 case BPF_ALU | BPF_OR | BPF_K: 1543 case BPF_ALU64 | BPF_OR | BPF_K: 1544 if (is_12b_int(imm)) { 1545 emit(rv_ori(rd, rd, imm), ctx); 1546 } else { 1547 emit_imm(RV_REG_T1, imm, ctx); 1548 emit_or(rd, rd, RV_REG_T1, ctx); 1549 } 1550 if (!is64 && !aux->verifier_zext) 1551 emit_zextw(rd, rd, ctx); 1552 break; 1553 case BPF_ALU | BPF_XOR | BPF_K: 1554 case BPF_ALU64 | BPF_XOR | BPF_K: 1555 if (is_12b_int(imm)) { 1556 emit(rv_xori(rd, rd, imm), ctx); 1557 } else { 1558 emit_imm(RV_REG_T1, imm, ctx); 1559 emit_xor(rd, rd, RV_REG_T1, ctx); 1560 } 1561 if (!is64 && !aux->verifier_zext) 1562 emit_zextw(rd, rd, ctx); 1563 break; 1564 case BPF_ALU | BPF_MUL | BPF_K: 1565 case BPF_ALU64 | BPF_MUL | BPF_K: 1566 emit_imm(RV_REG_T1, imm, ctx); 1567 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : 1568 rv_mulw(rd, rd, RV_REG_T1), ctx); 1569 if (!is64 && !aux->verifier_zext) 1570 emit_zextw(rd, rd, ctx); 1571 break; 1572 case BPF_ALU | BPF_DIV | BPF_K: 1573 case BPF_ALU64 | BPF_DIV | BPF_K: 1574 emit_imm(RV_REG_T1, imm, ctx); 1575 if (off) 1576 emit(is64 ? rv_div(rd, rd, RV_REG_T1) : 1577 rv_divw(rd, rd, RV_REG_T1), ctx); 1578 else 1579 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 1580 rv_divuw(rd, rd, RV_REG_T1), ctx); 1581 if (!is64 && !aux->verifier_zext) 1582 emit_zextw(rd, rd, ctx); 1583 break; 1584 case BPF_ALU | BPF_MOD | BPF_K: 1585 case BPF_ALU64 | BPF_MOD | BPF_K: 1586 emit_imm(RV_REG_T1, imm, ctx); 1587 if (off) 1588 emit(is64 ? rv_rem(rd, rd, RV_REG_T1) : 1589 rv_remw(rd, rd, RV_REG_T1), ctx); 1590 else 1591 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 1592 rv_remuw(rd, rd, RV_REG_T1), ctx); 1593 if (!is64 && !aux->verifier_zext) 1594 emit_zextw(rd, rd, ctx); 1595 break; 1596 case BPF_ALU | BPF_LSH | BPF_K: 1597 case BPF_ALU64 | BPF_LSH | BPF_K: 1598 emit_slli(rd, rd, imm, ctx); 1599 1600 if (!is64 && !aux->verifier_zext) 1601 emit_zextw(rd, rd, ctx); 1602 break; 1603 case BPF_ALU | BPF_RSH | BPF_K: 1604 case BPF_ALU64 | BPF_RSH | BPF_K: 1605 if (is64) 1606 emit_srli(rd, rd, imm, ctx); 1607 else 1608 emit(rv_srliw(rd, rd, imm), ctx); 1609 1610 if (!is64 && !aux->verifier_zext) 1611 emit_zextw(rd, rd, ctx); 1612 break; 1613 case BPF_ALU | BPF_ARSH | BPF_K: 1614 case BPF_ALU64 | BPF_ARSH | BPF_K: 1615 if (is64) 1616 emit_srai(rd, rd, imm, ctx); 1617 else 1618 emit(rv_sraiw(rd, rd, imm), ctx); 1619 1620 if (!is64 && !aux->verifier_zext) 1621 emit_zextw(rd, rd, ctx); 1622 break; 1623 1624 /* JUMP off */ 1625 case BPF_JMP | BPF_JA: 1626 case BPF_JMP32 | BPF_JA: 1627 if (BPF_CLASS(code) == BPF_JMP) 1628 rvoff = rv_offset(i, off, ctx); 1629 else 1630 rvoff = rv_offset(i, imm, ctx); 1631 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 1632 if (ret) 1633 return ret; 1634 break; 1635 1636 /* IF (dst COND src) JUMP off */ 1637 case BPF_JMP | BPF_JEQ | BPF_X: 1638 case BPF_JMP32 | BPF_JEQ | BPF_X: 1639 case BPF_JMP | BPF_JGT | BPF_X: 1640 case BPF_JMP32 | BPF_JGT | BPF_X: 1641 case BPF_JMP | BPF_JLT | BPF_X: 1642 case BPF_JMP32 | BPF_JLT | BPF_X: 1643 case BPF_JMP | BPF_JGE | BPF_X: 1644 case BPF_JMP32 | BPF_JGE | BPF_X: 1645 case BPF_JMP | BPF_JLE | BPF_X: 1646 case BPF_JMP32 | BPF_JLE | BPF_X: 1647 case BPF_JMP | BPF_JNE | BPF_X: 1648 case BPF_JMP32 | BPF_JNE | BPF_X: 1649 case BPF_JMP | BPF_JSGT | BPF_X: 1650 case BPF_JMP32 | BPF_JSGT | BPF_X: 1651 case BPF_JMP | BPF_JSLT | BPF_X: 1652 case BPF_JMP32 | BPF_JSLT | BPF_X: 1653 case BPF_JMP | BPF_JSGE | BPF_X: 1654 case BPF_JMP32 | BPF_JSGE | BPF_X: 1655 case BPF_JMP | BPF_JSLE | BPF_X: 1656 case BPF_JMP32 | BPF_JSLE | BPF_X: 1657 case BPF_JMP | BPF_JSET | BPF_X: 1658 case BPF_JMP32 | BPF_JSET | BPF_X: 1659 rvoff = rv_offset(i, off, ctx); 1660 if (!is64) { 1661 s = ctx->ninsns; 1662 if (is_signed_bpf_cond(BPF_OP(code))) { 1663 emit_sextw_alt(&rs, RV_REG_T1, ctx); 1664 emit_sextw_alt(&rd, RV_REG_T2, ctx); 1665 } else { 1666 emit_zextw_alt(&rs, RV_REG_T1, ctx); 1667 emit_zextw_alt(&rd, RV_REG_T2, ctx); 1668 } 1669 e = ctx->ninsns; 1670 1671 /* Adjust for extra insns */ 1672 rvoff -= ninsns_rvoff(e - s); 1673 } 1674 1675 if (BPF_OP(code) == BPF_JSET) { 1676 /* Adjust for and */ 1677 rvoff -= 4; 1678 emit_and(RV_REG_T1, rd, rs, ctx); 1679 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 1680 } else { 1681 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 1682 } 1683 break; 1684 1685 /* IF (dst COND imm) JUMP off */ 1686 case BPF_JMP | BPF_JEQ | BPF_K: 1687 case BPF_JMP32 | BPF_JEQ | BPF_K: 1688 case BPF_JMP | BPF_JGT | BPF_K: 1689 case BPF_JMP32 | BPF_JGT | BPF_K: 1690 case BPF_JMP | BPF_JLT | BPF_K: 1691 case BPF_JMP32 | BPF_JLT | BPF_K: 1692 case BPF_JMP | BPF_JGE | BPF_K: 1693 case BPF_JMP32 | BPF_JGE | BPF_K: 1694 case BPF_JMP | BPF_JLE | BPF_K: 1695 case BPF_JMP32 | BPF_JLE | BPF_K: 1696 case BPF_JMP | BPF_JNE | BPF_K: 1697 case BPF_JMP32 | BPF_JNE | BPF_K: 1698 case BPF_JMP | BPF_JSGT | BPF_K: 1699 case BPF_JMP32 | BPF_JSGT | BPF_K: 1700 case BPF_JMP | BPF_JSLT | BPF_K: 1701 case BPF_JMP32 | BPF_JSLT | BPF_K: 1702 case BPF_JMP | BPF_JSGE | BPF_K: 1703 case BPF_JMP32 | BPF_JSGE | BPF_K: 1704 case BPF_JMP | BPF_JSLE | BPF_K: 1705 case BPF_JMP32 | BPF_JSLE | BPF_K: 1706 rvoff = rv_offset(i, off, ctx); 1707 s = ctx->ninsns; 1708 if (imm) 1709 emit_imm(RV_REG_T1, imm, ctx); 1710 rs = imm ? RV_REG_T1 : RV_REG_ZERO; 1711 if (!is64) { 1712 if (is_signed_bpf_cond(BPF_OP(code))) { 1713 emit_sextw_alt(&rd, RV_REG_T2, ctx); 1714 /* rs has been sign extended */ 1715 } else { 1716 emit_zextw_alt(&rd, RV_REG_T2, ctx); 1717 if (imm) 1718 emit_zextw(rs, rs, ctx); 1719 } 1720 } 1721 e = ctx->ninsns; 1722 1723 /* Adjust for extra insns */ 1724 rvoff -= ninsns_rvoff(e - s); 1725 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 1726 break; 1727 1728 case BPF_JMP | BPF_JSET | BPF_K: 1729 case BPF_JMP32 | BPF_JSET | BPF_K: 1730 rvoff = rv_offset(i, off, ctx); 1731 s = ctx->ninsns; 1732 if (is_12b_int(imm)) { 1733 emit_andi(RV_REG_T1, rd, imm, ctx); 1734 } else { 1735 emit_imm(RV_REG_T1, imm, ctx); 1736 emit_and(RV_REG_T1, rd, RV_REG_T1, ctx); 1737 } 1738 /* For jset32, we should clear the upper 32 bits of t1, but 1739 * sign-extension is sufficient here and saves one instruction, 1740 * as t1 is used only in comparison against zero. 1741 */ 1742 if (!is64 && imm < 0) 1743 emit_sextw(RV_REG_T1, RV_REG_T1, ctx); 1744 e = ctx->ninsns; 1745 rvoff -= ninsns_rvoff(e - s); 1746 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 1747 break; 1748 1749 /* function call */ 1750 case BPF_JMP | BPF_CALL: 1751 { 1752 bool fixed_addr; 1753 u64 addr; 1754 1755 /* Inline calls to bpf_get_smp_processor_id() 1756 * 1757 * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is 1758 * at offset 0 in task_struct. 1759 * Load cpu from thread_info: 1760 * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu 1761 * 1762 * This replicates the implementation of raw_smp_processor_id() on RISCV 1763 */ 1764 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { 1765 /* Load current CPU number in R0 */ 1766 emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), 1767 RV_REG_TP, ctx); 1768 break; 1769 } 1770 1771 mark_call(ctx); 1772 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1773 &addr, &fixed_addr); 1774 if (ret < 0) 1775 return ret; 1776 1777 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 1778 const struct btf_func_model *fm; 1779 int idx; 1780 1781 fm = bpf_jit_find_kfunc_model(ctx->prog, insn); 1782 if (!fm) 1783 return -EINVAL; 1784 1785 for (idx = 0; idx < fm->nr_args; idx++) { 1786 u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); 1787 1788 if (fm->arg_size[idx] == sizeof(int)) 1789 emit_sextw(reg, reg, ctx); 1790 } 1791 } 1792 1793 ret = emit_call(addr, fixed_addr, ctx); 1794 if (ret) 1795 return ret; 1796 1797 if (insn->src_reg != BPF_PSEUDO_CALL) 1798 emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); 1799 break; 1800 } 1801 /* tail call */ 1802 case BPF_JMP | BPF_TAIL_CALL: 1803 if (emit_bpf_tail_call(i, ctx)) 1804 return -1; 1805 break; 1806 1807 /* function return */ 1808 case BPF_JMP | BPF_EXIT: 1809 if (i == ctx->prog->len - 1) 1810 break; 1811 1812 rvoff = epilogue_offset(ctx); 1813 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 1814 if (ret) 1815 return ret; 1816 break; 1817 1818 /* dst = imm64 */ 1819 case BPF_LD | BPF_IMM | BPF_DW: 1820 { 1821 struct bpf_insn insn1 = insn[1]; 1822 u64 imm64; 1823 1824 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1825 if (bpf_pseudo_func(insn)) { 1826 /* fixed-length insns for extra jit pass */ 1827 ret = emit_addr(rd, imm64, extra_pass, ctx); 1828 if (ret) 1829 return ret; 1830 } else { 1831 emit_imm(rd, imm64, ctx); 1832 } 1833 1834 return 1; 1835 } 1836 1837 /* LDX: dst = *(unsigned size *)(src + off) */ 1838 case BPF_LDX | BPF_MEM | BPF_B: 1839 case BPF_LDX | BPF_MEM | BPF_H: 1840 case BPF_LDX | BPF_MEM | BPF_W: 1841 case BPF_LDX | BPF_MEM | BPF_DW: 1842 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1843 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1844 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1845 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1846 /* LDSX: dst = *(signed size *)(src + off) */ 1847 case BPF_LDX | BPF_MEMSX | BPF_B: 1848 case BPF_LDX | BPF_MEMSX | BPF_H: 1849 case BPF_LDX | BPF_MEMSX | BPF_W: 1850 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1851 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1852 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1853 /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + RV_REG_ARENA + off) */ 1854 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1855 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1856 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1857 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1858 { 1859 bool sign_ext; 1860 int insn_len; 1861 1862 sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || 1863 BPF_MODE(insn->code) == BPF_PROBE_MEMSX; 1864 1865 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1866 emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx); 1867 rs = RV_REG_T2; 1868 } 1869 1870 switch (BPF_SIZE(code)) { 1871 case BPF_B: 1872 insn_len = emit_load_8(sign_ext, rd, off, rs, ctx); 1873 break; 1874 case BPF_H: 1875 insn_len = emit_load_16(sign_ext, rd, off, rs, ctx); 1876 break; 1877 case BPF_W: 1878 insn_len = emit_load_32(sign_ext, rd, off, rs, ctx); 1879 break; 1880 case BPF_DW: 1881 insn_len = emit_load_64(sign_ext, rd, off, rs, ctx); 1882 break; 1883 } 1884 1885 ret = add_exception_handler(insn, ctx, rd, insn_len); 1886 if (ret) 1887 return ret; 1888 1889 if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) 1890 return 1; 1891 break; 1892 } 1893 /* speculation barrier */ 1894 case BPF_ST | BPF_NOSPEC: 1895 break; 1896 1897 /* ST: *(size *)(dst + off) = imm */ 1898 case BPF_ST | BPF_MEM | BPF_B: 1899 emit_imm(RV_REG_T1, imm, ctx); 1900 if (is_12b_int(off)) { 1901 emit(rv_sb(rd, off, RV_REG_T1), ctx); 1902 break; 1903 } 1904 1905 emit_imm(RV_REG_T2, off, ctx); 1906 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1907 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); 1908 break; 1909 1910 case BPF_ST | BPF_MEM | BPF_H: 1911 emit_imm(RV_REG_T1, imm, ctx); 1912 if (is_12b_int(off)) { 1913 emit(rv_sh(rd, off, RV_REG_T1), ctx); 1914 break; 1915 } 1916 1917 emit_imm(RV_REG_T2, off, ctx); 1918 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1919 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); 1920 break; 1921 case BPF_ST | BPF_MEM | BPF_W: 1922 emit_imm(RV_REG_T1, imm, ctx); 1923 if (is_12b_int(off)) { 1924 emit_sw(rd, off, RV_REG_T1, ctx); 1925 break; 1926 } 1927 1928 emit_imm(RV_REG_T2, off, ctx); 1929 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1930 emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); 1931 break; 1932 case BPF_ST | BPF_MEM | BPF_DW: 1933 emit_imm(RV_REG_T1, imm, ctx); 1934 if (is_12b_int(off)) { 1935 emit_sd(rd, off, RV_REG_T1, ctx); 1936 break; 1937 } 1938 1939 emit_imm(RV_REG_T2, off, ctx); 1940 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1941 emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); 1942 break; 1943 1944 case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1945 case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1946 case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1947 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1948 { 1949 int insn_len, insns_start; 1950 1951 emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx); 1952 rd = RV_REG_T3; 1953 1954 /* Load imm to a register then store it */ 1955 emit_imm(RV_REG_T1, imm, ctx); 1956 1957 switch (BPF_SIZE(code)) { 1958 case BPF_B: 1959 if (is_12b_int(off)) { 1960 insns_start = ctx->ninsns; 1961 emit(rv_sb(rd, off, RV_REG_T1), ctx); 1962 insn_len = ctx->ninsns - insns_start; 1963 break; 1964 } 1965 1966 emit_imm(RV_REG_T2, off, ctx); 1967 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1968 insns_start = ctx->ninsns; 1969 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); 1970 insn_len = ctx->ninsns - insns_start; 1971 break; 1972 case BPF_H: 1973 if (is_12b_int(off)) { 1974 insns_start = ctx->ninsns; 1975 emit(rv_sh(rd, off, RV_REG_T1), ctx); 1976 insn_len = ctx->ninsns - insns_start; 1977 break; 1978 } 1979 1980 emit_imm(RV_REG_T2, off, ctx); 1981 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1982 insns_start = ctx->ninsns; 1983 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); 1984 insn_len = ctx->ninsns - insns_start; 1985 break; 1986 case BPF_W: 1987 if (is_12b_int(off)) { 1988 insns_start = ctx->ninsns; 1989 emit_sw(rd, off, RV_REG_T1, ctx); 1990 insn_len = ctx->ninsns - insns_start; 1991 break; 1992 } 1993 1994 emit_imm(RV_REG_T2, off, ctx); 1995 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 1996 insns_start = ctx->ninsns; 1997 emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); 1998 insn_len = ctx->ninsns - insns_start; 1999 break; 2000 case BPF_DW: 2001 if (is_12b_int(off)) { 2002 insns_start = ctx->ninsns; 2003 emit_sd(rd, off, RV_REG_T1, ctx); 2004 insn_len = ctx->ninsns - insns_start; 2005 break; 2006 } 2007 2008 emit_imm(RV_REG_T2, off, ctx); 2009 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 2010 insns_start = ctx->ninsns; 2011 emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); 2012 insn_len = ctx->ninsns - insns_start; 2013 break; 2014 } 2015 2016 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, 2017 insn_len); 2018 if (ret) 2019 return ret; 2020 2021 break; 2022 } 2023 2024 /* STX: *(size *)(dst + off) = src */ 2025 case BPF_STX | BPF_MEM | BPF_B: 2026 emit_store_8(rd, off, rs, ctx); 2027 break; 2028 case BPF_STX | BPF_MEM | BPF_H: 2029 emit_store_16(rd, off, rs, ctx); 2030 break; 2031 case BPF_STX | BPF_MEM | BPF_W: 2032 emit_store_32(rd, off, rs, ctx); 2033 break; 2034 case BPF_STX | BPF_MEM | BPF_DW: 2035 emit_store_64(rd, off, rs, ctx); 2036 break; 2037 case BPF_STX | BPF_ATOMIC | BPF_B: 2038 case BPF_STX | BPF_ATOMIC | BPF_H: 2039 case BPF_STX | BPF_ATOMIC | BPF_W: 2040 case BPF_STX | BPF_ATOMIC | BPF_DW: 2041 if (bpf_atomic_is_load_store(insn)) 2042 ret = emit_atomic_ld_st(rd, rs, insn, ctx); 2043 else 2044 ret = emit_atomic_rmw(rd, rs, insn, ctx); 2045 if (ret) 2046 return ret; 2047 break; 2048 2049 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 2050 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 2051 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 2052 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 2053 { 2054 int insn_len, insns_start; 2055 2056 emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx); 2057 rd = RV_REG_T2; 2058 2059 switch (BPF_SIZE(code)) { 2060 case BPF_B: 2061 if (is_12b_int(off)) { 2062 insns_start = ctx->ninsns; 2063 emit(rv_sb(rd, off, rs), ctx); 2064 insn_len = ctx->ninsns - insns_start; 2065 break; 2066 } 2067 2068 emit_imm(RV_REG_T1, off, ctx); 2069 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 2070 insns_start = ctx->ninsns; 2071 emit(rv_sb(RV_REG_T1, 0, rs), ctx); 2072 insn_len = ctx->ninsns - insns_start; 2073 break; 2074 case BPF_H: 2075 if (is_12b_int(off)) { 2076 insns_start = ctx->ninsns; 2077 emit(rv_sh(rd, off, rs), ctx); 2078 insn_len = ctx->ninsns - insns_start; 2079 break; 2080 } 2081 2082 emit_imm(RV_REG_T1, off, ctx); 2083 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 2084 insns_start = ctx->ninsns; 2085 emit(rv_sh(RV_REG_T1, 0, rs), ctx); 2086 insn_len = ctx->ninsns - insns_start; 2087 break; 2088 case BPF_W: 2089 if (is_12b_int(off)) { 2090 insns_start = ctx->ninsns; 2091 emit_sw(rd, off, rs, ctx); 2092 insn_len = ctx->ninsns - insns_start; 2093 break; 2094 } 2095 2096 emit_imm(RV_REG_T1, off, ctx); 2097 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 2098 insns_start = ctx->ninsns; 2099 emit_sw(RV_REG_T1, 0, rs, ctx); 2100 insn_len = ctx->ninsns - insns_start; 2101 break; 2102 case BPF_DW: 2103 if (is_12b_int(off)) { 2104 insns_start = ctx->ninsns; 2105 emit_sd(rd, off, rs, ctx); 2106 insn_len = ctx->ninsns - insns_start; 2107 break; 2108 } 2109 2110 emit_imm(RV_REG_T1, off, ctx); 2111 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 2112 insns_start = ctx->ninsns; 2113 emit_sd(RV_REG_T1, 0, rs, ctx); 2114 insn_len = ctx->ninsns - insns_start; 2115 break; 2116 } 2117 2118 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, 2119 insn_len); 2120 if (ret) 2121 return ret; 2122 2123 break; 2124 } 2125 2126 default: 2127 pr_err("bpf-jit: unknown opcode %02x\n", code); 2128 return -EINVAL; 2129 } 2130 2131 return 0; 2132 } 2133 2134 void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) 2135 { 2136 int i, stack_adjust = 0, store_offset, bpf_stack_adjust; 2137 2138 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, STACK_ALIGN); 2139 if (bpf_stack_adjust) 2140 mark_fp(ctx); 2141 2142 if (seen_reg(RV_REG_RA, ctx)) 2143 stack_adjust += 8; 2144 stack_adjust += 8; /* RV_REG_FP */ 2145 if (seen_reg(RV_REG_S1, ctx)) 2146 stack_adjust += 8; 2147 if (seen_reg(RV_REG_S2, ctx)) 2148 stack_adjust += 8; 2149 if (seen_reg(RV_REG_S3, ctx)) 2150 stack_adjust += 8; 2151 if (seen_reg(RV_REG_S4, ctx)) 2152 stack_adjust += 8; 2153 if (seen_reg(RV_REG_S5, ctx)) 2154 stack_adjust += 8; 2155 if (seen_reg(RV_REG_S6, ctx)) 2156 stack_adjust += 8; 2157 if (ctx->arena_vm_start) 2158 stack_adjust += 8; 2159 2160 stack_adjust = round_up(stack_adjust, STACK_ALIGN); 2161 stack_adjust += bpf_stack_adjust; 2162 2163 store_offset = stack_adjust - 8; 2164 2165 /* emit kcfi type preamble immediately before the first insn */ 2166 emit_kcfi(is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash, ctx); 2167 2168 /* nops reserved for auipc+jalr pair */ 2169 for (i = 0; i < RV_FENTRY_NINSNS; i++) 2170 emit(rv_nop(), ctx); 2171 2172 /* First instruction is always setting the tail-call-counter 2173 * (TCC) register. This instruction is skipped for tail calls. 2174 * Force using a 4-byte (non-compressed) instruction. 2175 */ 2176 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); 2177 2178 emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx); 2179 2180 if (seen_reg(RV_REG_RA, ctx)) { 2181 emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx); 2182 store_offset -= 8; 2183 } 2184 emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx); 2185 store_offset -= 8; 2186 if (seen_reg(RV_REG_S1, ctx)) { 2187 emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx); 2188 store_offset -= 8; 2189 } 2190 if (seen_reg(RV_REG_S2, ctx)) { 2191 emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx); 2192 store_offset -= 8; 2193 } 2194 if (seen_reg(RV_REG_S3, ctx)) { 2195 emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx); 2196 store_offset -= 8; 2197 } 2198 if (seen_reg(RV_REG_S4, ctx)) { 2199 emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx); 2200 store_offset -= 8; 2201 } 2202 if (seen_reg(RV_REG_S5, ctx)) { 2203 emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx); 2204 store_offset -= 8; 2205 } 2206 if (seen_reg(RV_REG_S6, ctx)) { 2207 emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); 2208 store_offset -= 8; 2209 } 2210 if (ctx->arena_vm_start) { 2211 emit_sd(RV_REG_SP, store_offset, RV_REG_ARENA, ctx); 2212 store_offset -= 8; 2213 } 2214 2215 emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); 2216 2217 if (bpf_stack_adjust) 2218 emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx); 2219 2220 /* Program contains calls and tail calls, so RV_REG_TCC need 2221 * to be saved across calls. 2222 */ 2223 if (seen_tail_call(ctx) && seen_call(ctx)) 2224 emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); 2225 2226 ctx->stack_size = stack_adjust; 2227 2228 if (ctx->arena_vm_start) 2229 emit_imm(RV_REG_ARENA, ctx->arena_vm_start, ctx); 2230 } 2231 2232 void bpf_jit_build_epilogue(struct rv_jit_context *ctx) 2233 { 2234 __build_epilogue(false, ctx); 2235 } 2236 2237 bool bpf_jit_supports_kfunc_call(void) 2238 { 2239 return true; 2240 } 2241 2242 bool bpf_jit_supports_ptr_xchg(void) 2243 { 2244 return true; 2245 } 2246 2247 bool bpf_jit_supports_arena(void) 2248 { 2249 return true; 2250 } 2251 2252 bool bpf_jit_supports_percpu_insn(void) 2253 { 2254 return true; 2255 } 2256 2257 bool bpf_jit_inlines_helper_call(s32 imm) 2258 { 2259 switch (imm) { 2260 case BPF_FUNC_get_smp_processor_id: 2261 return true; 2262 default: 2263 return false; 2264 } 2265 } 2266