1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Just-In-Time compiler for eBPF bytecode on MIPS. 4 * Implementation of JIT functions common to 32-bit and 64-bit CPUs. 5 * 6 * Copyright (c) 2021 Anyfi Networks AB. 7 * Author: Johan Almbladh <johan.almbladh@gmail.com> 8 * 9 * Based on code and ideas from 10 * Copyright (c) 2017 Cavium, Inc. 11 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> 12 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 13 */ 14 15 /* 16 * Code overview 17 * ============= 18 * 19 * - bpf_jit_comp.h 20 * Common definitions and utilities. 21 * 22 * - bpf_jit_comp.c 23 * Implementation of JIT top-level logic and exported JIT API functions. 24 * Implementation of internal operations shared by 32-bit and 64-bit code. 25 * JMP and ALU JIT control code, register control code, shared ALU and 26 * JMP/JMP32 JIT operations. 27 * 28 * - bpf_jit_comp32.c 29 * Implementation of functions to JIT prologue, epilogue and a single eBPF 30 * instruction for 32-bit MIPS CPUs. The functions use shared operations 31 * where possible, and implement the rest for 32-bit MIPS such as ALU64 32 * operations. 33 * 34 * - bpf_jit_comp64.c 35 * Ditto, for 64-bit MIPS CPUs. 36 * 37 * Zero and sign extension 38 * ======================== 39 * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, 40 * but the eBPF instruction set mandates zero extension. We let the verifier 41 * insert explicit zero-extensions after 32-bit ALU operations, both for 42 * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs 43 * are JITed with sign extensions inserted when so expected. 44 * 45 * ALU operations 46 * ============== 47 * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are 48 * JITed in the following steps. ALU64 operations on 32-bit MIPS are more 49 * complicated and therefore only processed by special implementations in 50 * step (3). 51 * 52 * 1) valid_alu_i: 53 * Determine if an immediate operation can be emitted as such, or if 54 * we must fall back to the register version. 55 * 56 * 2) rewrite_alu_i: 57 * Convert BPF operation and immediate value to a canonical form for 58 * JITing. In some degenerate cases this form may be a no-op. 59 * 60 * 3) emit_alu_{i,i64,r,64}: 61 * Emit instructions for an ALU or ALU64 immediate or register operation. 62 * 63 * JMP operations 64 * ============== 65 * JMP and JMP32 operations require an JIT instruction offset table for 66 * translating the jump offset. This table is computed by dry-running the 67 * JIT without actually emitting anything. However, the computed PC-relative 68 * offset may overflow the 18-bit offset field width of the native MIPS 69 * branch instruction. In such cases, the long jump is converted into the 70 * following sequence. 71 * 72 * <branch> !<cond> +2 Inverted PC-relative branch 73 * nop Delay slot 74 * j <offset> Unconditional absolute long jump 75 * nop Delay slot 76 * 77 * Since this converted sequence alters the offset table, all offsets must 78 * be re-calculated. This may in turn trigger new branch conversions, so 79 * the process is repeated until no further changes are made. Normally it 80 * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we 81 * fall back to converting every remaining jump operation. The branch 82 * conversion is independent of how the JMP or JMP32 condition is JITed. 83 * 84 * JMP32 and JMP operations are JITed as follows. 85 * 86 * 1) setup_jmp_{i,r}: 87 * Convert jump conditional and offset into a form that can be JITed. 88 * This form may be a no-op, a canonical form, or an inverted PC-relative 89 * jump if branch conversion is necessary. 90 * 91 * 2) valid_jmp_i: 92 * Determine if an immediate operations can be emitted as such, or if 93 * we must fall back to the register version. Applies to JMP32 for 32-bit 94 * MIPS, and both JMP and JMP32 for 64-bit MIPS. 95 * 96 * 3) emit_jmp_{i,i64,r,r64}: 97 * Emit instructions for an JMP or JMP32 immediate or register operation. 98 * 99 * 4) finish_jmp_{i,r}: 100 * Emit any instructions needed to finish the jump. This includes a nop 101 * for the delay slot if a branch was emitted, and a long absolute jump 102 * if the branch was converted. 103 */ 104 105 #include <linux/limits.h> 106 #include <linux/bitops.h> 107 #include <linux/errno.h> 108 #include <linux/filter.h> 109 #include <linux/bpf.h> 110 #include <linux/slab.h> 111 #include <asm/bitops.h> 112 #include <asm/cacheflush.h> 113 #include <asm/cpu-features.h> 114 #include <asm/isa-rev.h> 115 #include <asm/uasm.h> 116 117 #include "bpf_jit_comp.h" 118 119 /* Convenience macros for descriptor access */ 120 #define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT) 121 #define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT) 122 123 /* 124 * Push registers on the stack, starting at a given depth from the stack 125 * pointer and increasing. The next depth to be written is returned. 126 */ 127 int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) 128 { 129 int reg; 130 131 for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) 132 if (mask & BIT(reg)) { 133 if ((excl & BIT(reg)) == 0) { 134 if (sizeof(long) == 4) 135 emit(ctx, sw, reg, depth, MIPS_R_SP); 136 else /* sizeof(long) == 8 */ 137 emit(ctx, sd, reg, depth, MIPS_R_SP); 138 } 139 depth += sizeof(long); 140 } 141 142 ctx->stack_used = max((int)ctx->stack_used, depth); 143 return depth; 144 } 145 146 /* 147 * Pop registers from the stack, starting at a given depth from the stack 148 * pointer and increasing. The next depth to be read is returned. 149 */ 150 int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) 151 { 152 int reg; 153 154 for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) 155 if (mask & BIT(reg)) { 156 if ((excl & BIT(reg)) == 0) { 157 if (sizeof(long) == 4) 158 emit(ctx, lw, reg, depth, MIPS_R_SP); 159 else /* sizeof(long) == 8 */ 160 emit(ctx, ld, reg, depth, MIPS_R_SP); 161 } 162 depth += sizeof(long); 163 } 164 165 return depth; 166 } 167 168 /* Compute the 28-bit jump target address from a BPF program location */ 169 int get_target(struct jit_context *ctx, u32 loc) 170 { 171 u32 index = INDEX(ctx->descriptors[loc]); 172 unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index]; 173 unsigned long addr = (unsigned long)&ctx->target[index]; 174 175 if (!ctx->target) 176 return 0; 177 178 if ((addr ^ pc) & ~MIPS_JMP_MASK) 179 return -1; 180 181 return addr & MIPS_JMP_MASK; 182 } 183 184 /* Compute the PC-relative offset to relative BPF program offset */ 185 int get_offset(const struct jit_context *ctx, int off) 186 { 187 return (INDEX(ctx->descriptors[ctx->bpf_index + off]) - 188 ctx->jit_index - 1) * sizeof(u32); 189 } 190 191 /* dst = imm (register width) */ 192 void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm) 193 { 194 if (imm >= -0x8000 && imm <= 0x7fff) { 195 emit(ctx, addiu, dst, MIPS_R_ZERO, imm); 196 } else { 197 emit(ctx, lui, dst, (s16)((u32)imm >> 16)); 198 emit(ctx, ori, dst, dst, (u16)(imm & 0xffff)); 199 } 200 clobber_reg(ctx, dst); 201 } 202 203 /* dst = src (register width) */ 204 void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src) 205 { 206 emit(ctx, ori, dst, src, 0); 207 clobber_reg(ctx, dst); 208 } 209 210 /* Validate ALU immediate range */ 211 bool valid_alu_i(u8 op, s32 imm) 212 { 213 switch (BPF_OP(op)) { 214 case BPF_NEG: 215 case BPF_LSH: 216 case BPF_RSH: 217 case BPF_ARSH: 218 /* All legal eBPF values are valid */ 219 return true; 220 case BPF_ADD: 221 /* imm must be 16 bits */ 222 return imm >= -0x8000 && imm <= 0x7fff; 223 case BPF_SUB: 224 /* -imm must be 16 bits */ 225 return imm >= -0x7fff && imm <= 0x8000; 226 case BPF_AND: 227 case BPF_OR: 228 case BPF_XOR: 229 /* imm must be 16 bits unsigned */ 230 return imm >= 0 && imm <= 0xffff; 231 case BPF_MUL: 232 /* imm must be zero or a positive power of two */ 233 return imm == 0 || (imm > 0 && is_power_of_2(imm)); 234 case BPF_DIV: 235 case BPF_MOD: 236 /* imm must be an 17-bit power of two */ 237 return (u32)imm <= 0x10000 && is_power_of_2((u32)imm); 238 } 239 return false; 240 } 241 242 /* Rewrite ALU immediate operation */ 243 bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val) 244 { 245 bool act = true; 246 247 switch (BPF_OP(op)) { 248 case BPF_LSH: 249 case BPF_RSH: 250 case BPF_ARSH: 251 case BPF_ADD: 252 case BPF_SUB: 253 case BPF_OR: 254 case BPF_XOR: 255 /* imm == 0 is a no-op */ 256 act = imm != 0; 257 break; 258 case BPF_MUL: 259 if (imm == 1) { 260 /* dst * 1 is a no-op */ 261 act = false; 262 } else if (imm == 0) { 263 /* dst * 0 is dst & 0 */ 264 op = BPF_AND; 265 } else { 266 /* dst * (1 << n) is dst << n */ 267 op = BPF_LSH; 268 imm = ilog2(abs(imm)); 269 } 270 break; 271 case BPF_DIV: 272 if (imm == 1) { 273 /* dst / 1 is a no-op */ 274 act = false; 275 } else { 276 /* dst / (1 << n) is dst >> n */ 277 op = BPF_RSH; 278 imm = ilog2(imm); 279 } 280 break; 281 case BPF_MOD: 282 /* dst % (1 << n) is dst & ((1 << n) - 1) */ 283 op = BPF_AND; 284 imm--; 285 break; 286 } 287 288 *alu = op; 289 *val = imm; 290 return act; 291 } 292 293 /* ALU immediate operation (32-bit) */ 294 void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op) 295 { 296 switch (BPF_OP(op)) { 297 /* dst = -dst */ 298 case BPF_NEG: 299 emit(ctx, subu, dst, MIPS_R_ZERO, dst); 300 break; 301 /* dst = dst & imm */ 302 case BPF_AND: 303 emit(ctx, andi, dst, dst, (u16)imm); 304 break; 305 /* dst = dst | imm */ 306 case BPF_OR: 307 emit(ctx, ori, dst, dst, (u16)imm); 308 break; 309 /* dst = dst ^ imm */ 310 case BPF_XOR: 311 emit(ctx, xori, dst, dst, (u16)imm); 312 break; 313 /* dst = dst << imm */ 314 case BPF_LSH: 315 emit(ctx, sll, dst, dst, imm); 316 break; 317 /* dst = dst >> imm */ 318 case BPF_RSH: 319 emit(ctx, srl, dst, dst, imm); 320 break; 321 /* dst = dst >> imm (arithmetic) */ 322 case BPF_ARSH: 323 emit(ctx, sra, dst, dst, imm); 324 break; 325 /* dst = dst + imm */ 326 case BPF_ADD: 327 emit(ctx, addiu, dst, dst, imm); 328 break; 329 /* dst = dst - imm */ 330 case BPF_SUB: 331 emit(ctx, addiu, dst, dst, -imm); 332 break; 333 } 334 clobber_reg(ctx, dst); 335 } 336 337 /* ALU register operation (32-bit) */ 338 void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) 339 { 340 switch (BPF_OP(op)) { 341 /* dst = dst & src */ 342 case BPF_AND: 343 emit(ctx, and, dst, dst, src); 344 break; 345 /* dst = dst | src */ 346 case BPF_OR: 347 emit(ctx, or, dst, dst, src); 348 break; 349 /* dst = dst ^ src */ 350 case BPF_XOR: 351 emit(ctx, xor, dst, dst, src); 352 break; 353 /* dst = dst << src */ 354 case BPF_LSH: 355 emit(ctx, sllv, dst, dst, src); 356 break; 357 /* dst = dst >> src */ 358 case BPF_RSH: 359 emit(ctx, srlv, dst, dst, src); 360 break; 361 /* dst = dst >> src (arithmetic) */ 362 case BPF_ARSH: 363 emit(ctx, srav, dst, dst, src); 364 break; 365 /* dst = dst + src */ 366 case BPF_ADD: 367 emit(ctx, addu, dst, dst, src); 368 break; 369 /* dst = dst - src */ 370 case BPF_SUB: 371 emit(ctx, subu, dst, dst, src); 372 break; 373 /* dst = dst * src */ 374 case BPF_MUL: 375 if (cpu_has_mips32r1 || cpu_has_mips32r6) { 376 emit(ctx, mul, dst, dst, src); 377 } else { 378 emit(ctx, multu, dst, src); 379 emit(ctx, mflo, dst); 380 } 381 break; 382 /* dst = dst / src */ 383 case BPF_DIV: 384 if (cpu_has_mips32r6) { 385 emit(ctx, divu_r6, dst, dst, src); 386 } else { 387 emit(ctx, divu, dst, src); 388 emit(ctx, mflo, dst); 389 } 390 break; 391 /* dst = dst % src */ 392 case BPF_MOD: 393 if (cpu_has_mips32r6) { 394 emit(ctx, modu, dst, dst, src); 395 } else { 396 emit(ctx, divu, dst, src); 397 emit(ctx, mfhi, dst); 398 } 399 break; 400 } 401 clobber_reg(ctx, dst); 402 } 403 404 /* Atomic read-modify-write (32-bit) */ 405 void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) 406 { 407 LLSC_sync(ctx); 408 emit(ctx, ll, MIPS_R_T9, off, dst); 409 switch (code) { 410 case BPF_ADD: 411 case BPF_ADD | BPF_FETCH: 412 emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src); 413 break; 414 case BPF_AND: 415 case BPF_AND | BPF_FETCH: 416 emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src); 417 break; 418 case BPF_OR: 419 case BPF_OR | BPF_FETCH: 420 emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src); 421 break; 422 case BPF_XOR: 423 case BPF_XOR | BPF_FETCH: 424 emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src); 425 break; 426 case BPF_XCHG: 427 emit(ctx, move, MIPS_R_T8, src); 428 break; 429 } 430 emit(ctx, sc, MIPS_R_T8, off, dst); 431 emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset); 432 emit(ctx, nop); /* Delay slot */ 433 434 if (code & BPF_FETCH) { 435 emit(ctx, move, src, MIPS_R_T9); 436 clobber_reg(ctx, src); 437 } 438 } 439 440 /* Atomic compare-and-exchange (32-bit) */ 441 void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) 442 { 443 LLSC_sync(ctx); 444 emit(ctx, ll, MIPS_R_T9, off, dst); 445 emit(ctx, bne, MIPS_R_T9, res, 12); 446 emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ 447 emit(ctx, sc, MIPS_R_T8, off, dst); 448 emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset); 449 emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ 450 clobber_reg(ctx, res); 451 } 452 453 /* Swap bytes and truncate a register word or half word */ 454 void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width) 455 { 456 u8 tmp = MIPS_R_T8; 457 u8 msk = MIPS_R_T9; 458 459 switch (width) { 460 /* Swap bytes in a word */ 461 case 32: 462 if (cpu_has_mips32r2 || cpu_has_mips32r6) { 463 emit(ctx, wsbh, dst, dst); 464 emit(ctx, rotr, dst, dst, 16); 465 } else { 466 emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */ 467 emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */ 468 emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ 469 470 emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */ 471 emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */ 472 473 emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */ 474 emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ 475 emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */ 476 emit(ctx, and, dst, dst, msk); /* dst = dst & msk */ 477 emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */ 478 } 479 break; 480 /* Swap bytes in a half word */ 481 case 16: 482 if (cpu_has_mips32r2 || cpu_has_mips32r6) { 483 emit(ctx, wsbh, dst, dst); 484 emit(ctx, andi, dst, dst, 0xffff); 485 } else { 486 emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */ 487 emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */ 488 emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */ 489 emit(ctx, sll, dst, dst, 8); /* d = d << 8 */ 490 emit(ctx, or, dst, dst, tmp); /* d = d | t */ 491 } 492 break; 493 } 494 clobber_reg(ctx, dst); 495 } 496 497 /* Validate jump immediate range */ 498 bool valid_jmp_i(u8 op, s32 imm) 499 { 500 switch (op) { 501 case JIT_JNOP: 502 /* Immediate value not used */ 503 return true; 504 case BPF_JEQ: 505 case BPF_JNE: 506 /* No immediate operation */ 507 return false; 508 case BPF_JSET: 509 case JIT_JNSET: 510 /* imm must be 16 bits unsigned */ 511 return imm >= 0 && imm <= 0xffff; 512 case BPF_JGE: 513 case BPF_JLT: 514 case BPF_JSGE: 515 case BPF_JSLT: 516 /* imm must be 16 bits */ 517 return imm >= -0x8000 && imm <= 0x7fff; 518 case BPF_JGT: 519 case BPF_JLE: 520 case BPF_JSGT: 521 case BPF_JSLE: 522 /* imm + 1 must be 16 bits */ 523 return imm >= -0x8001 && imm <= 0x7ffe; 524 } 525 return false; 526 } 527 528 /* Invert a conditional jump operation */ 529 static u8 invert_jmp(u8 op) 530 { 531 switch (op) { 532 case BPF_JA: return JIT_JNOP; 533 case BPF_JEQ: return BPF_JNE; 534 case BPF_JNE: return BPF_JEQ; 535 case BPF_JSET: return JIT_JNSET; 536 case BPF_JGT: return BPF_JLE; 537 case BPF_JGE: return BPF_JLT; 538 case BPF_JLT: return BPF_JGE; 539 case BPF_JLE: return BPF_JGT; 540 case BPF_JSGT: return BPF_JSLE; 541 case BPF_JSGE: return BPF_JSLT; 542 case BPF_JSLT: return BPF_JSGE; 543 case BPF_JSLE: return BPF_JSGT; 544 } 545 return 0; 546 } 547 548 /* Prepare a PC-relative jump operation */ 549 static void setup_jmp(struct jit_context *ctx, u8 bpf_op, 550 s16 bpf_off, u8 *jit_op, s32 *jit_off) 551 { 552 u32 *descp = &ctx->descriptors[ctx->bpf_index]; 553 int op = bpf_op; 554 int offset = 0; 555 556 /* Do not compute offsets on the first pass */ 557 if (INDEX(*descp) == 0) 558 goto done; 559 560 /* Skip jumps never taken */ 561 if (bpf_op == JIT_JNOP) 562 goto done; 563 564 /* Convert jumps always taken */ 565 if (bpf_op == BPF_JA) 566 *descp |= JIT_DESC_CONVERT; 567 568 /* 569 * Current ctx->jit_index points to the start of the branch preamble. 570 * Since the preamble differs among different branch conditionals, 571 * the current index cannot be used to compute the branch offset. 572 * Instead, we use the offset table value for the next instruction, 573 * which gives the index immediately after the branch delay slot. 574 */ 575 if (!CONVERTED(*descp)) { 576 int target = ctx->bpf_index + bpf_off + 1; 577 int origin = ctx->bpf_index + 1; 578 579 offset = (INDEX(ctx->descriptors[target]) - 580 INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32); 581 } 582 583 /* 584 * The PC-relative branch offset field on MIPS is 18 bits signed, 585 * so if the computed offset is larger than this we generate a an 586 * absolute jump that we skip with an inverted conditional branch. 587 */ 588 if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) { 589 offset = 3 * sizeof(u32); 590 op = invert_jmp(bpf_op); 591 ctx->changes += !CONVERTED(*descp); 592 *descp |= JIT_DESC_CONVERT; 593 } 594 595 done: 596 *jit_off = offset; 597 *jit_op = op; 598 } 599 600 /* Prepare a PC-relative jump operation with immediate conditional */ 601 void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, 602 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) 603 { 604 bool always = false; 605 bool never = false; 606 607 switch (bpf_op) { 608 case BPF_JEQ: 609 case BPF_JNE: 610 break; 611 case BPF_JSET: 612 case BPF_JLT: 613 never = imm == 0; 614 break; 615 case BPF_JGE: 616 always = imm == 0; 617 break; 618 case BPF_JGT: 619 never = (u32)imm == U32_MAX; 620 break; 621 case BPF_JLE: 622 always = (u32)imm == U32_MAX; 623 break; 624 case BPF_JSGT: 625 never = imm == S32_MAX && width == 32; 626 break; 627 case BPF_JSGE: 628 always = imm == S32_MIN && width == 32; 629 break; 630 case BPF_JSLT: 631 never = imm == S32_MIN && width == 32; 632 break; 633 case BPF_JSLE: 634 always = imm == S32_MAX && width == 32; 635 break; 636 } 637 638 if (never) 639 bpf_op = JIT_JNOP; 640 if (always) 641 bpf_op = BPF_JA; 642 setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); 643 } 644 645 /* Prepare a PC-relative jump operation with register conditional */ 646 void setup_jmp_r(struct jit_context *ctx, bool same_reg, 647 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) 648 { 649 switch (bpf_op) { 650 case BPF_JSET: 651 break; 652 case BPF_JEQ: 653 case BPF_JGE: 654 case BPF_JLE: 655 case BPF_JSGE: 656 case BPF_JSLE: 657 if (same_reg) 658 bpf_op = BPF_JA; 659 break; 660 case BPF_JNE: 661 case BPF_JLT: 662 case BPF_JGT: 663 case BPF_JSGT: 664 case BPF_JSLT: 665 if (same_reg) 666 bpf_op = JIT_JNOP; 667 break; 668 } 669 setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); 670 } 671 672 /* Finish a PC-relative jump operation */ 673 int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off) 674 { 675 /* Emit conditional branch delay slot */ 676 if (jit_op != JIT_JNOP) 677 emit(ctx, nop); 678 /* 679 * Emit an absolute long jump with delay slot, 680 * if the PC-relative branch was converted. 681 */ 682 if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { 683 int target = get_target(ctx, ctx->bpf_index + bpf_off + 1); 684 685 if (target < 0) 686 return -1; 687 emit(ctx, j, target); 688 emit(ctx, nop); 689 } 690 return 0; 691 } 692 693 /* Jump immediate (32-bit) */ 694 void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op) 695 { 696 switch (op) { 697 /* No-op, used internally for branch optimization */ 698 case JIT_JNOP: 699 break; 700 /* PC += off if dst & imm */ 701 case BPF_JSET: 702 emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); 703 emit(ctx, bnez, MIPS_R_T9, off); 704 break; 705 /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ 706 case JIT_JNSET: 707 emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); 708 emit(ctx, beqz, MIPS_R_T9, off); 709 break; 710 /* PC += off if dst > imm */ 711 case BPF_JGT: 712 emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); 713 emit(ctx, beqz, MIPS_R_T9, off); 714 break; 715 /* PC += off if dst >= imm */ 716 case BPF_JGE: 717 emit(ctx, sltiu, MIPS_R_T9, dst, imm); 718 emit(ctx, beqz, MIPS_R_T9, off); 719 break; 720 /* PC += off if dst < imm */ 721 case BPF_JLT: 722 emit(ctx, sltiu, MIPS_R_T9, dst, imm); 723 emit(ctx, bnez, MIPS_R_T9, off); 724 break; 725 /* PC += off if dst <= imm */ 726 case BPF_JLE: 727 emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); 728 emit(ctx, bnez, MIPS_R_T9, off); 729 break; 730 /* PC += off if dst > imm (signed) */ 731 case BPF_JSGT: 732 emit(ctx, slti, MIPS_R_T9, dst, imm + 1); 733 emit(ctx, beqz, MIPS_R_T9, off); 734 break; 735 /* PC += off if dst >= imm (signed) */ 736 case BPF_JSGE: 737 emit(ctx, slti, MIPS_R_T9, dst, imm); 738 emit(ctx, beqz, MIPS_R_T9, off); 739 break; 740 /* PC += off if dst < imm (signed) */ 741 case BPF_JSLT: 742 emit(ctx, slti, MIPS_R_T9, dst, imm); 743 emit(ctx, bnez, MIPS_R_T9, off); 744 break; 745 /* PC += off if dst <= imm (signed) */ 746 case BPF_JSLE: 747 emit(ctx, slti, MIPS_R_T9, dst, imm + 1); 748 emit(ctx, bnez, MIPS_R_T9, off); 749 break; 750 } 751 } 752 753 /* Jump register (32-bit) */ 754 void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op) 755 { 756 switch (op) { 757 /* No-op, used internally for branch optimization */ 758 case JIT_JNOP: 759 break; 760 /* PC += off if dst == src */ 761 case BPF_JEQ: 762 emit(ctx, beq, dst, src, off); 763 break; 764 /* PC += off if dst != src */ 765 case BPF_JNE: 766 emit(ctx, bne, dst, src, off); 767 break; 768 /* PC += off if dst & src */ 769 case BPF_JSET: 770 emit(ctx, and, MIPS_R_T9, dst, src); 771 emit(ctx, bnez, MIPS_R_T9, off); 772 break; 773 /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ 774 case JIT_JNSET: 775 emit(ctx, and, MIPS_R_T9, dst, src); 776 emit(ctx, beqz, MIPS_R_T9, off); 777 break; 778 /* PC += off if dst > src */ 779 case BPF_JGT: 780 emit(ctx, sltu, MIPS_R_T9, src, dst); 781 emit(ctx, bnez, MIPS_R_T9, off); 782 break; 783 /* PC += off if dst >= src */ 784 case BPF_JGE: 785 emit(ctx, sltu, MIPS_R_T9, dst, src); 786 emit(ctx, beqz, MIPS_R_T9, off); 787 break; 788 /* PC += off if dst < src */ 789 case BPF_JLT: 790 emit(ctx, sltu, MIPS_R_T9, dst, src); 791 emit(ctx, bnez, MIPS_R_T9, off); 792 break; 793 /* PC += off if dst <= src */ 794 case BPF_JLE: 795 emit(ctx, sltu, MIPS_R_T9, src, dst); 796 emit(ctx, beqz, MIPS_R_T9, off); 797 break; 798 /* PC += off if dst > src (signed) */ 799 case BPF_JSGT: 800 emit(ctx, slt, MIPS_R_T9, src, dst); 801 emit(ctx, bnez, MIPS_R_T9, off); 802 break; 803 /* PC += off if dst >= src (signed) */ 804 case BPF_JSGE: 805 emit(ctx, slt, MIPS_R_T9, dst, src); 806 emit(ctx, beqz, MIPS_R_T9, off); 807 break; 808 /* PC += off if dst < src (signed) */ 809 case BPF_JSLT: 810 emit(ctx, slt, MIPS_R_T9, dst, src); 811 emit(ctx, bnez, MIPS_R_T9, off); 812 break; 813 /* PC += off if dst <= src (signed) */ 814 case BPF_JSLE: 815 emit(ctx, slt, MIPS_R_T9, src, dst); 816 emit(ctx, beqz, MIPS_R_T9, off); 817 break; 818 } 819 } 820 821 /* Jump always */ 822 int emit_ja(struct jit_context *ctx, s16 off) 823 { 824 int target = get_target(ctx, ctx->bpf_index + off + 1); 825 826 if (target < 0) 827 return -1; 828 emit(ctx, j, target); 829 emit(ctx, nop); 830 return 0; 831 } 832 833 /* Jump to epilogue */ 834 int emit_exit(struct jit_context *ctx) 835 { 836 int target = get_target(ctx, ctx->program->len); 837 838 if (target < 0) 839 return -1; 840 emit(ctx, j, target); 841 emit(ctx, nop); 842 return 0; 843 } 844 845 /* Build the program body from eBPF bytecode */ 846 static int build_body(struct jit_context *ctx) 847 { 848 const struct bpf_prog *prog = ctx->program; 849 unsigned int i; 850 851 ctx->stack_used = 0; 852 for (i = 0; i < prog->len; i++) { 853 const struct bpf_insn *insn = &prog->insnsi[i]; 854 u32 *descp = &ctx->descriptors[i]; 855 int ret; 856 857 access_reg(ctx, insn->src_reg); 858 access_reg(ctx, insn->dst_reg); 859 860 ctx->bpf_index = i; 861 if (ctx->target == NULL) { 862 ctx->changes += INDEX(*descp) != ctx->jit_index; 863 *descp &= JIT_DESC_CONVERT; 864 *descp |= ctx->jit_index; 865 } 866 867 ret = build_insn(insn, ctx); 868 if (ret < 0) 869 return ret; 870 871 if (ret > 0) { 872 i++; 873 if (ctx->target == NULL) 874 descp[1] = ctx->jit_index; 875 } 876 } 877 878 /* Store the end offset, where the epilogue begins */ 879 ctx->descriptors[prog->len] = ctx->jit_index; 880 return 0; 881 } 882 883 /* Set the branch conversion flag on all instructions */ 884 static void set_convert_flag(struct jit_context *ctx, bool enable) 885 { 886 const struct bpf_prog *prog = ctx->program; 887 u32 flag = enable ? JIT_DESC_CONVERT : 0; 888 unsigned int i; 889 890 for (i = 0; i <= prog->len; i++) 891 ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag; 892 } 893 894 static void jit_fill_hole(void *area, unsigned int size) 895 { 896 u32 *p; 897 898 /* We are guaranteed to have aligned memory. */ 899 for (p = area; size >= sizeof(u32); size -= sizeof(u32)) 900 uasm_i_break(&p, BRK_BUG); /* Increments p */ 901 } 902 903 bool bpf_jit_needs_zext(void) 904 { 905 return true; 906 } 907 908 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 909 { 910 struct bpf_prog *tmp, *orig_prog = prog; 911 struct bpf_binary_header *header = NULL; 912 struct jit_context ctx; 913 bool tmp_blinded = false; 914 unsigned int tmp_idx; 915 unsigned int image_size; 916 u8 *image_ptr; 917 int tries; 918 919 /* 920 * If BPF JIT was not enabled then we must fall back to 921 * the interpreter. 922 */ 923 if (!prog->jit_requested) 924 return orig_prog; 925 /* 926 * If constant blinding was enabled and we failed during blinding 927 * then we must fall back to the interpreter. Otherwise, we save 928 * the new JITed code. 929 */ 930 tmp = bpf_jit_blind_constants(prog); 931 if (IS_ERR(tmp)) 932 return orig_prog; 933 if (tmp != prog) { 934 tmp_blinded = true; 935 prog = tmp; 936 } 937 938 memset(&ctx, 0, sizeof(ctx)); 939 ctx.program = prog; 940 941 /* 942 * Not able to allocate memory for descriptors[], then 943 * we must fall back to the interpreter 944 */ 945 ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors), 946 GFP_KERNEL); 947 if (ctx.descriptors == NULL) 948 goto out_err; 949 950 /* First pass discovers used resources */ 951 if (build_body(&ctx) < 0) 952 goto out_err; 953 /* 954 * Second pass computes instruction offsets. 955 * If any PC-relative branches are out of range, a sequence of 956 * a PC-relative branch + a jump is generated, and we have to 957 * try again from the beginning to generate the new offsets. 958 * This is done until no additional conversions are necessary. 959 * The last two iterations are done with all branches being 960 * converted, to guarantee offset table convergence within a 961 * fixed number of iterations. 962 */ 963 ctx.jit_index = 0; 964 build_prologue(&ctx); 965 tmp_idx = ctx.jit_index; 966 967 tries = JIT_MAX_ITERATIONS; 968 do { 969 ctx.jit_index = tmp_idx; 970 ctx.changes = 0; 971 if (tries == 2) 972 set_convert_flag(&ctx, true); 973 if (build_body(&ctx) < 0) 974 goto out_err; 975 } while (ctx.changes > 0 && --tries > 0); 976 977 if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge")) 978 goto out_err; 979 980 build_epilogue(&ctx, MIPS_R_RA); 981 982 /* Now we know the size of the structure to make */ 983 image_size = sizeof(u32) * ctx.jit_index; 984 header = bpf_jit_binary_alloc(image_size, &image_ptr, 985 sizeof(u32), jit_fill_hole); 986 /* 987 * Not able to allocate memory for the structure then 988 * we must fall back to the interpretation 989 */ 990 if (header == NULL) 991 goto out_err; 992 993 /* Actual pass to generate final JIT code */ 994 ctx.target = (u32 *)image_ptr; 995 ctx.jit_index = 0; 996 997 /* 998 * If building the JITed code fails somehow, 999 * we fall back to the interpretation. 1000 */ 1001 build_prologue(&ctx); 1002 if (build_body(&ctx) < 0) 1003 goto out_err; 1004 build_epilogue(&ctx, MIPS_R_RA); 1005 1006 /* Populate line info meta data */ 1007 set_convert_flag(&ctx, false); 1008 bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]); 1009 1010 /* Set as read-only exec and flush instruction cache */ 1011 bpf_jit_binary_lock_ro(header); 1012 flush_icache_range((unsigned long)header, 1013 (unsigned long)&ctx.target[ctx.jit_index]); 1014 1015 if (bpf_jit_enable > 1) 1016 bpf_jit_dump(prog->len, image_size, 2, ctx.target); 1017 1018 prog->bpf_func = (void *)ctx.target; 1019 prog->jited = 1; 1020 prog->jited_len = image_size; 1021 1022 out: 1023 if (tmp_blinded) 1024 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1025 tmp : orig_prog); 1026 kfree(ctx.descriptors); 1027 return prog; 1028 1029 out_err: 1030 prog = orig_prog; 1031 if (header) 1032 bpf_jit_binary_free(header); 1033 goto out; 1034 } 1035