1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Just-In-Time compiler for eBPF bytecode on MIPS. 4 * Implementation of JIT functions common to 32-bit and 64-bit CPUs. 5 * 6 * Copyright (c) 2021 Anyfi Networks AB. 7 * Author: Johan Almbladh <johan.almbladh@gmail.com> 8 * 9 * Based on code and ideas from 10 * Copyright (c) 2017 Cavium, Inc. 11 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> 12 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 13 */ 14 15 /* 16 * Code overview 17 * ============= 18 * 19 * - bpf_jit_comp.h 20 * Common definitions and utilities. 21 * 22 * - bpf_jit_comp.c 23 * Implementation of JIT top-level logic and exported JIT API functions. 24 * Implementation of internal operations shared by 32-bit and 64-bit code. 25 * JMP and ALU JIT control code, register control code, shared ALU and 26 * JMP/JMP32 JIT operations. 27 * 28 * - bpf_jit_comp32.c 29 * Implementation of functions to JIT prologue, epilogue and a single eBPF 30 * instruction for 32-bit MIPS CPUs. The functions use shared operations 31 * where possible, and implement the rest for 32-bit MIPS such as ALU64 32 * operations. 33 * 34 * - bpf_jit_comp64.c 35 * Ditto, for 64-bit MIPS CPUs. 36 * 37 * Zero and sign extension 38 * ======================== 39 * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, 40 * but the eBPF instruction set mandates zero extension. We let the verifier 41 * insert explicit zero-extensions after 32-bit ALU operations, both for 42 * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs 43 * are JITed with sign extensions inserted when so expected. 44 * 45 * ALU operations 46 * ============== 47 * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are 48 * JITed in the following steps. ALU64 operations on 32-bit MIPS are more 49 * complicated and therefore only processed by special implementations in 50 * step (3). 51 * 52 * 1) valid_alu_i: 53 * Determine if an immediate operation can be emitted as such, or if 54 * we must fall back to the register version. 55 * 56 * 2) rewrite_alu_i: 57 * Convert BPF operation and immediate value to a canonical form for 58 * JITing. In some degenerate cases this form may be a no-op. 59 * 60 * 3) emit_alu_{i,i64,r,64}: 61 * Emit instructions for an ALU or ALU64 immediate or register operation. 62 * 63 * JMP operations 64 * ============== 65 * JMP and JMP32 operations require an JIT instruction offset table for 66 * translating the jump offset. This table is computed by dry-running the 67 * JIT without actually emitting anything. However, the computed PC-relative 68 * offset may overflow the 18-bit offset field width of the native MIPS 69 * branch instruction. In such cases, the long jump is converted into the 70 * following sequence. 71 * 72 * <branch> !<cond> +2 Inverted PC-relative branch 73 * nop Delay slot 74 * j <offset> Unconditional absolute long jump 75 * nop Delay slot 76 * 77 * Since this converted sequence alters the offset table, all offsets must 78 * be re-calculated. This may in turn trigger new branch conversions, so 79 * the process is repeated until no further changes are made. Normally it 80 * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we 81 * fall back to converting every remaining jump operation. The branch 82 * conversion is independent of how the JMP or JMP32 condition is JITed. 83 * 84 * JMP32 and JMP operations are JITed as follows. 85 * 86 * 1) setup_jmp_{i,r}: 87 * Convert jump conditional and offset into a form that can be JITed. 88 * This form may be a no-op, a canonical form, or an inverted PC-relative 89 * jump if branch conversion is necessary. 90 * 91 * 2) valid_jmp_i: 92 * Determine if an immediate operations can be emitted as such, or if 93 * we must fall back to the register version. Applies to JMP32 for 32-bit 94 * MIPS, and both JMP and JMP32 for 64-bit MIPS. 95 * 96 * 3) emit_jmp_{i,i64,r,r64}: 97 * Emit instructions for an JMP or JMP32 immediate or register operation. 98 * 99 * 4) finish_jmp_{i,r}: 100 * Emit any instructions needed to finish the jump. This includes a nop 101 * for the delay slot if a branch was emitted, and a long absolute jump 102 * if the branch was converted. 103 */ 104 105 #include <linux/limits.h> 106 #include <linux/bitops.h> 107 #include <linux/errno.h> 108 #include <linux/filter.h> 109 #include <linux/bpf.h> 110 #include <linux/slab.h> 111 #include <asm/bitops.h> 112 #include <asm/cacheflush.h> 113 #include <asm/cpu-features.h> 114 #include <asm/isa-rev.h> 115 #include <asm/uasm.h> 116 117 #include "bpf_jit_comp.h" 118 119 /* Convenience macros for descriptor access */ 120 #define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT) 121 #define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT) 122 123 /* 124 * Push registers on the stack, starting at a given depth from the stack 125 * pointer and increasing. The next depth to be written is returned. 126 */ 127 int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) 128 { 129 int reg; 130 131 for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) 132 if (mask & BIT(reg)) { 133 if ((excl & BIT(reg)) == 0) { 134 if (sizeof(long) == 4) 135 emit(ctx, sw, reg, depth, MIPS_R_SP); 136 else /* sizeof(long) == 8 */ 137 emit(ctx, sd, reg, depth, MIPS_R_SP); 138 } 139 depth += sizeof(long); 140 } 141 142 ctx->stack_used = max((int)ctx->stack_used, depth); 143 return depth; 144 } 145 146 /* 147 * Pop registers from the stack, starting at a given depth from the stack 148 * pointer and increasing. The next depth to be read is returned. 149 */ 150 int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) 151 { 152 int reg; 153 154 for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) 155 if (mask & BIT(reg)) { 156 if ((excl & BIT(reg)) == 0) { 157 if (sizeof(long) == 4) 158 emit(ctx, lw, reg, depth, MIPS_R_SP); 159 else /* sizeof(long) == 8 */ 160 emit(ctx, ld, reg, depth, MIPS_R_SP); 161 } 162 depth += sizeof(long); 163 } 164 165 return depth; 166 } 167 168 /* Compute the 28-bit jump target address from a BPF program location */ 169 int get_target(struct jit_context *ctx, u32 loc) 170 { 171 u32 index = INDEX(ctx->descriptors[loc]); 172 unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index]; 173 unsigned long addr = (unsigned long)&ctx->target[index]; 174 175 if (!ctx->target) 176 return 0; 177 178 if ((addr ^ pc) & ~MIPS_JMP_MASK) 179 return -1; 180 181 return addr & MIPS_JMP_MASK; 182 } 183 184 /* Compute the PC-relative offset to relative BPF program offset */ 185 int get_offset(const struct jit_context *ctx, int off) 186 { 187 return (INDEX(ctx->descriptors[ctx->bpf_index + off]) - 188 ctx->jit_index - 1) * sizeof(u32); 189 } 190 191 /* dst = imm (register width) */ 192 void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm) 193 { 194 if (imm >= -0x8000 && imm <= 0x7fff) { 195 emit(ctx, addiu, dst, MIPS_R_ZERO, imm); 196 } else { 197 emit(ctx, lui, dst, (s16)((u32)imm >> 16)); 198 emit(ctx, ori, dst, dst, (u16)(imm & 0xffff)); 199 } 200 clobber_reg(ctx, dst); 201 } 202 203 /* dst = src (register width) */ 204 void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src) 205 { 206 emit(ctx, ori, dst, src, 0); 207 clobber_reg(ctx, dst); 208 } 209 210 /* Validate ALU immediate range */ 211 bool valid_alu_i(u8 op, s32 imm) 212 { 213 switch (BPF_OP(op)) { 214 case BPF_NEG: 215 case BPF_LSH: 216 case BPF_RSH: 217 case BPF_ARSH: 218 /* All legal eBPF values are valid */ 219 return true; 220 case BPF_ADD: 221 if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) 222 return false; 223 /* imm must be 16 bits */ 224 return imm >= -0x8000 && imm <= 0x7fff; 225 case BPF_SUB: 226 if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) 227 return false; 228 /* -imm must be 16 bits */ 229 return imm >= -0x7fff && imm <= 0x8000; 230 case BPF_AND: 231 case BPF_OR: 232 case BPF_XOR: 233 /* imm must be 16 bits unsigned */ 234 return imm >= 0 && imm <= 0xffff; 235 case BPF_MUL: 236 /* imm must be zero or a positive power of two */ 237 return imm == 0 || (imm > 0 && is_power_of_2(imm)); 238 case BPF_DIV: 239 case BPF_MOD: 240 /* imm must be an 17-bit power of two */ 241 return (u32)imm <= 0x10000 && is_power_of_2((u32)imm); 242 } 243 return false; 244 } 245 246 /* Rewrite ALU immediate operation */ 247 bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val) 248 { 249 bool act = true; 250 251 switch (BPF_OP(op)) { 252 case BPF_LSH: 253 case BPF_RSH: 254 case BPF_ARSH: 255 case BPF_ADD: 256 case BPF_SUB: 257 case BPF_OR: 258 case BPF_XOR: 259 /* imm == 0 is a no-op */ 260 act = imm != 0; 261 break; 262 case BPF_MUL: 263 if (imm == 1) { 264 /* dst * 1 is a no-op */ 265 act = false; 266 } else if (imm == 0) { 267 /* dst * 0 is dst & 0 */ 268 op = BPF_AND; 269 } else { 270 /* dst * (1 << n) is dst << n */ 271 op = BPF_LSH; 272 imm = ilog2(abs(imm)); 273 } 274 break; 275 case BPF_DIV: 276 if (imm == 1) { 277 /* dst / 1 is a no-op */ 278 act = false; 279 } else { 280 /* dst / (1 << n) is dst >> n */ 281 op = BPF_RSH; 282 imm = ilog2(imm); 283 } 284 break; 285 case BPF_MOD: 286 /* dst % (1 << n) is dst & ((1 << n) - 1) */ 287 op = BPF_AND; 288 imm--; 289 break; 290 } 291 292 *alu = op; 293 *val = imm; 294 return act; 295 } 296 297 /* ALU immediate operation (32-bit) */ 298 void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op) 299 { 300 switch (BPF_OP(op)) { 301 /* dst = -dst */ 302 case BPF_NEG: 303 emit(ctx, subu, dst, MIPS_R_ZERO, dst); 304 break; 305 /* dst = dst & imm */ 306 case BPF_AND: 307 emit(ctx, andi, dst, dst, (u16)imm); 308 break; 309 /* dst = dst | imm */ 310 case BPF_OR: 311 emit(ctx, ori, dst, dst, (u16)imm); 312 break; 313 /* dst = dst ^ imm */ 314 case BPF_XOR: 315 emit(ctx, xori, dst, dst, (u16)imm); 316 break; 317 /* dst = dst << imm */ 318 case BPF_LSH: 319 emit(ctx, sll, dst, dst, imm); 320 break; 321 /* dst = dst >> imm */ 322 case BPF_RSH: 323 emit(ctx, srl, dst, dst, imm); 324 break; 325 /* dst = dst >> imm (arithmetic) */ 326 case BPF_ARSH: 327 emit(ctx, sra, dst, dst, imm); 328 break; 329 /* dst = dst + imm */ 330 case BPF_ADD: 331 emit(ctx, addiu, dst, dst, imm); 332 break; 333 /* dst = dst - imm */ 334 case BPF_SUB: 335 emit(ctx, addiu, dst, dst, -imm); 336 break; 337 } 338 clobber_reg(ctx, dst); 339 } 340 341 /* ALU register operation (32-bit) */ 342 void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) 343 { 344 switch (BPF_OP(op)) { 345 /* dst = dst & src */ 346 case BPF_AND: 347 emit(ctx, and, dst, dst, src); 348 break; 349 /* dst = dst | src */ 350 case BPF_OR: 351 emit(ctx, or, dst, dst, src); 352 break; 353 /* dst = dst ^ src */ 354 case BPF_XOR: 355 emit(ctx, xor, dst, dst, src); 356 break; 357 /* dst = dst << src */ 358 case BPF_LSH: 359 emit(ctx, sllv, dst, dst, src); 360 break; 361 /* dst = dst >> src */ 362 case BPF_RSH: 363 emit(ctx, srlv, dst, dst, src); 364 break; 365 /* dst = dst >> src (arithmetic) */ 366 case BPF_ARSH: 367 emit(ctx, srav, dst, dst, src); 368 break; 369 /* dst = dst + src */ 370 case BPF_ADD: 371 emit(ctx, addu, dst, dst, src); 372 break; 373 /* dst = dst - src */ 374 case BPF_SUB: 375 emit(ctx, subu, dst, dst, src); 376 break; 377 /* dst = dst * src */ 378 case BPF_MUL: 379 if (cpu_has_mips32r1 || cpu_has_mips32r6) { 380 emit(ctx, mul, dst, dst, src); 381 } else { 382 emit(ctx, multu, dst, src); 383 emit(ctx, mflo, dst); 384 } 385 break; 386 /* dst = dst / src */ 387 case BPF_DIV: 388 if (cpu_has_mips32r6) { 389 emit(ctx, divu_r6, dst, dst, src); 390 } else { 391 emit(ctx, divu, dst, src); 392 emit(ctx, mflo, dst); 393 } 394 break; 395 /* dst = dst % src */ 396 case BPF_MOD: 397 if (cpu_has_mips32r6) { 398 emit(ctx, modu, dst, dst, src); 399 } else { 400 emit(ctx, divu, dst, src); 401 emit(ctx, mfhi, dst); 402 } 403 break; 404 } 405 clobber_reg(ctx, dst); 406 } 407 408 /* Atomic read-modify-write (32-bit) */ 409 void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) 410 { 411 LLSC_sync(ctx); 412 emit(ctx, ll, MIPS_R_T9, off, dst); 413 switch (code) { 414 case BPF_ADD: 415 case BPF_ADD | BPF_FETCH: 416 emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src); 417 break; 418 case BPF_AND: 419 case BPF_AND | BPF_FETCH: 420 emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src); 421 break; 422 case BPF_OR: 423 case BPF_OR | BPF_FETCH: 424 emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src); 425 break; 426 case BPF_XOR: 427 case BPF_XOR | BPF_FETCH: 428 emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src); 429 break; 430 case BPF_XCHG: 431 emit(ctx, move, MIPS_R_T8, src); 432 break; 433 } 434 emit(ctx, sc, MIPS_R_T8, off, dst); 435 emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset); 436 emit(ctx, nop); /* Delay slot */ 437 438 if (code & BPF_FETCH) { 439 emit(ctx, move, src, MIPS_R_T9); 440 clobber_reg(ctx, src); 441 } 442 } 443 444 /* Atomic compare-and-exchange (32-bit) */ 445 void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) 446 { 447 LLSC_sync(ctx); 448 emit(ctx, ll, MIPS_R_T9, off, dst); 449 emit(ctx, bne, MIPS_R_T9, res, 12); 450 emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ 451 emit(ctx, sc, MIPS_R_T8, off, dst); 452 emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset); 453 emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ 454 clobber_reg(ctx, res); 455 } 456 457 /* Swap bytes and truncate a register word or half word */ 458 void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width) 459 { 460 u8 tmp = MIPS_R_T8; 461 u8 msk = MIPS_R_T9; 462 463 switch (width) { 464 /* Swap bytes in a word */ 465 case 32: 466 if (cpu_has_mips32r2 || cpu_has_mips32r6) { 467 emit(ctx, wsbh, dst, dst); 468 emit(ctx, rotr, dst, dst, 16); 469 } else { 470 emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */ 471 emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */ 472 emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ 473 474 emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */ 475 emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */ 476 477 emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */ 478 emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ 479 emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */ 480 emit(ctx, and, dst, dst, msk); /* dst = dst & msk */ 481 emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */ 482 } 483 break; 484 /* Swap bytes in a half word */ 485 case 16: 486 if (cpu_has_mips32r2 || cpu_has_mips32r6) { 487 emit(ctx, wsbh, dst, dst); 488 emit(ctx, andi, dst, dst, 0xffff); 489 } else { 490 emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */ 491 emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */ 492 emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */ 493 emit(ctx, sll, dst, dst, 8); /* d = d << 8 */ 494 emit(ctx, or, dst, dst, tmp); /* d = d | t */ 495 } 496 break; 497 } 498 clobber_reg(ctx, dst); 499 } 500 501 /* Validate jump immediate range */ 502 bool valid_jmp_i(u8 op, s32 imm) 503 { 504 switch (op) { 505 case JIT_JNOP: 506 /* Immediate value not used */ 507 return true; 508 case BPF_JEQ: 509 case BPF_JNE: 510 /* No immediate operation */ 511 return false; 512 case BPF_JSET: 513 case JIT_JNSET: 514 /* imm must be 16 bits unsigned */ 515 return imm >= 0 && imm <= 0xffff; 516 case BPF_JGE: 517 case BPF_JLT: 518 case BPF_JSGE: 519 case BPF_JSLT: 520 /* imm must be 16 bits */ 521 return imm >= -0x8000 && imm <= 0x7fff; 522 case BPF_JGT: 523 case BPF_JLE: 524 case BPF_JSGT: 525 case BPF_JSLE: 526 /* imm + 1 must be 16 bits */ 527 return imm >= -0x8001 && imm <= 0x7ffe; 528 } 529 return false; 530 } 531 532 /* Invert a conditional jump operation */ 533 static u8 invert_jmp(u8 op) 534 { 535 switch (op) { 536 case BPF_JA: return JIT_JNOP; 537 case BPF_JEQ: return BPF_JNE; 538 case BPF_JNE: return BPF_JEQ; 539 case BPF_JSET: return JIT_JNSET; 540 case BPF_JGT: return BPF_JLE; 541 case BPF_JGE: return BPF_JLT; 542 case BPF_JLT: return BPF_JGE; 543 case BPF_JLE: return BPF_JGT; 544 case BPF_JSGT: return BPF_JSLE; 545 case BPF_JSGE: return BPF_JSLT; 546 case BPF_JSLT: return BPF_JSGE; 547 case BPF_JSLE: return BPF_JSGT; 548 } 549 return 0; 550 } 551 552 /* Prepare a PC-relative jump operation */ 553 static void setup_jmp(struct jit_context *ctx, u8 bpf_op, 554 s16 bpf_off, u8 *jit_op, s32 *jit_off) 555 { 556 u32 *descp = &ctx->descriptors[ctx->bpf_index]; 557 int op = bpf_op; 558 int offset = 0; 559 560 /* Do not compute offsets on the first pass */ 561 if (INDEX(*descp) == 0) 562 goto done; 563 564 /* Skip jumps never taken */ 565 if (bpf_op == JIT_JNOP) 566 goto done; 567 568 /* Convert jumps always taken */ 569 if (bpf_op == BPF_JA) 570 *descp |= JIT_DESC_CONVERT; 571 572 /* 573 * Current ctx->jit_index points to the start of the branch preamble. 574 * Since the preamble differs among different branch conditionals, 575 * the current index cannot be used to compute the branch offset. 576 * Instead, we use the offset table value for the next instruction, 577 * which gives the index immediately after the branch delay slot. 578 */ 579 if (!CONVERTED(*descp)) { 580 int target = ctx->bpf_index + bpf_off + 1; 581 int origin = ctx->bpf_index + 1; 582 583 offset = (INDEX(ctx->descriptors[target]) - 584 INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32); 585 } 586 587 /* 588 * The PC-relative branch offset field on MIPS is 18 bits signed, 589 * so if the computed offset is larger than this we generate a an 590 * absolute jump that we skip with an inverted conditional branch. 591 */ 592 if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) { 593 offset = 3 * sizeof(u32); 594 op = invert_jmp(bpf_op); 595 ctx->changes += !CONVERTED(*descp); 596 *descp |= JIT_DESC_CONVERT; 597 } 598 599 done: 600 *jit_off = offset; 601 *jit_op = op; 602 } 603 604 /* Prepare a PC-relative jump operation with immediate conditional */ 605 void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, 606 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) 607 { 608 bool always = false; 609 bool never = false; 610 611 switch (bpf_op) { 612 case BPF_JEQ: 613 case BPF_JNE: 614 break; 615 case BPF_JSET: 616 case BPF_JLT: 617 never = imm == 0; 618 break; 619 case BPF_JGE: 620 always = imm == 0; 621 break; 622 case BPF_JGT: 623 never = (u32)imm == U32_MAX; 624 break; 625 case BPF_JLE: 626 always = (u32)imm == U32_MAX; 627 break; 628 case BPF_JSGT: 629 never = imm == S32_MAX && width == 32; 630 break; 631 case BPF_JSGE: 632 always = imm == S32_MIN && width == 32; 633 break; 634 case BPF_JSLT: 635 never = imm == S32_MIN && width == 32; 636 break; 637 case BPF_JSLE: 638 always = imm == S32_MAX && width == 32; 639 break; 640 } 641 642 if (never) 643 bpf_op = JIT_JNOP; 644 if (always) 645 bpf_op = BPF_JA; 646 setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); 647 } 648 649 /* Prepare a PC-relative jump operation with register conditional */ 650 void setup_jmp_r(struct jit_context *ctx, bool same_reg, 651 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) 652 { 653 switch (bpf_op) { 654 case BPF_JSET: 655 break; 656 case BPF_JEQ: 657 case BPF_JGE: 658 case BPF_JLE: 659 case BPF_JSGE: 660 case BPF_JSLE: 661 if (same_reg) 662 bpf_op = BPF_JA; 663 break; 664 case BPF_JNE: 665 case BPF_JLT: 666 case BPF_JGT: 667 case BPF_JSGT: 668 case BPF_JSLT: 669 if (same_reg) 670 bpf_op = JIT_JNOP; 671 break; 672 } 673 setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); 674 } 675 676 /* Finish a PC-relative jump operation */ 677 int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off) 678 { 679 /* Emit conditional branch delay slot */ 680 if (jit_op != JIT_JNOP) 681 emit(ctx, nop); 682 /* 683 * Emit an absolute long jump with delay slot, 684 * if the PC-relative branch was converted. 685 */ 686 if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { 687 int target = get_target(ctx, ctx->bpf_index + bpf_off + 1); 688 689 if (target < 0) 690 return -1; 691 emit(ctx, j, target); 692 emit(ctx, nop); 693 } 694 return 0; 695 } 696 697 /* Jump immediate (32-bit) */ 698 void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op) 699 { 700 switch (op) { 701 /* No-op, used internally for branch optimization */ 702 case JIT_JNOP: 703 break; 704 /* PC += off if dst & imm */ 705 case BPF_JSET: 706 emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); 707 emit(ctx, bnez, MIPS_R_T9, off); 708 break; 709 /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ 710 case JIT_JNSET: 711 emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); 712 emit(ctx, beqz, MIPS_R_T9, off); 713 break; 714 /* PC += off if dst > imm */ 715 case BPF_JGT: 716 emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); 717 emit(ctx, beqz, MIPS_R_T9, off); 718 break; 719 /* PC += off if dst >= imm */ 720 case BPF_JGE: 721 emit(ctx, sltiu, MIPS_R_T9, dst, imm); 722 emit(ctx, beqz, MIPS_R_T9, off); 723 break; 724 /* PC += off if dst < imm */ 725 case BPF_JLT: 726 emit(ctx, sltiu, MIPS_R_T9, dst, imm); 727 emit(ctx, bnez, MIPS_R_T9, off); 728 break; 729 /* PC += off if dst <= imm */ 730 case BPF_JLE: 731 emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); 732 emit(ctx, bnez, MIPS_R_T9, off); 733 break; 734 /* PC += off if dst > imm (signed) */ 735 case BPF_JSGT: 736 emit(ctx, slti, MIPS_R_T9, dst, imm + 1); 737 emit(ctx, beqz, MIPS_R_T9, off); 738 break; 739 /* PC += off if dst >= imm (signed) */ 740 case BPF_JSGE: 741 emit(ctx, slti, MIPS_R_T9, dst, imm); 742 emit(ctx, beqz, MIPS_R_T9, off); 743 break; 744 /* PC += off if dst < imm (signed) */ 745 case BPF_JSLT: 746 emit(ctx, slti, MIPS_R_T9, dst, imm); 747 emit(ctx, bnez, MIPS_R_T9, off); 748 break; 749 /* PC += off if dst <= imm (signed) */ 750 case BPF_JSLE: 751 emit(ctx, slti, MIPS_R_T9, dst, imm + 1); 752 emit(ctx, bnez, MIPS_R_T9, off); 753 break; 754 } 755 } 756 757 /* Jump register (32-bit) */ 758 void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op) 759 { 760 switch (op) { 761 /* No-op, used internally for branch optimization */ 762 case JIT_JNOP: 763 break; 764 /* PC += off if dst == src */ 765 case BPF_JEQ: 766 emit(ctx, beq, dst, src, off); 767 break; 768 /* PC += off if dst != src */ 769 case BPF_JNE: 770 emit(ctx, bne, dst, src, off); 771 break; 772 /* PC += off if dst & src */ 773 case BPF_JSET: 774 emit(ctx, and, MIPS_R_T9, dst, src); 775 emit(ctx, bnez, MIPS_R_T9, off); 776 break; 777 /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ 778 case JIT_JNSET: 779 emit(ctx, and, MIPS_R_T9, dst, src); 780 emit(ctx, beqz, MIPS_R_T9, off); 781 break; 782 /* PC += off if dst > src */ 783 case BPF_JGT: 784 emit(ctx, sltu, MIPS_R_T9, src, dst); 785 emit(ctx, bnez, MIPS_R_T9, off); 786 break; 787 /* PC += off if dst >= src */ 788 case BPF_JGE: 789 emit(ctx, sltu, MIPS_R_T9, dst, src); 790 emit(ctx, beqz, MIPS_R_T9, off); 791 break; 792 /* PC += off if dst < src */ 793 case BPF_JLT: 794 emit(ctx, sltu, MIPS_R_T9, dst, src); 795 emit(ctx, bnez, MIPS_R_T9, off); 796 break; 797 /* PC += off if dst <= src */ 798 case BPF_JLE: 799 emit(ctx, sltu, MIPS_R_T9, src, dst); 800 emit(ctx, beqz, MIPS_R_T9, off); 801 break; 802 /* PC += off if dst > src (signed) */ 803 case BPF_JSGT: 804 emit(ctx, slt, MIPS_R_T9, src, dst); 805 emit(ctx, bnez, MIPS_R_T9, off); 806 break; 807 /* PC += off if dst >= src (signed) */ 808 case BPF_JSGE: 809 emit(ctx, slt, MIPS_R_T9, dst, src); 810 emit(ctx, beqz, MIPS_R_T9, off); 811 break; 812 /* PC += off if dst < src (signed) */ 813 case BPF_JSLT: 814 emit(ctx, slt, MIPS_R_T9, dst, src); 815 emit(ctx, bnez, MIPS_R_T9, off); 816 break; 817 /* PC += off if dst <= src (signed) */ 818 case BPF_JSLE: 819 emit(ctx, slt, MIPS_R_T9, src, dst); 820 emit(ctx, beqz, MIPS_R_T9, off); 821 break; 822 } 823 } 824 825 /* Jump always */ 826 int emit_ja(struct jit_context *ctx, s16 off) 827 { 828 int target = get_target(ctx, ctx->bpf_index + off + 1); 829 830 if (target < 0) 831 return -1; 832 emit(ctx, j, target); 833 emit(ctx, nop); 834 return 0; 835 } 836 837 /* Jump to epilogue */ 838 int emit_exit(struct jit_context *ctx) 839 { 840 int target = get_target(ctx, ctx->program->len); 841 842 if (target < 0) 843 return -1; 844 emit(ctx, j, target); 845 emit(ctx, nop); 846 return 0; 847 } 848 849 /* Build the program body from eBPF bytecode */ 850 static int build_body(struct jit_context *ctx) 851 { 852 const struct bpf_prog *prog = ctx->program; 853 unsigned int i; 854 855 ctx->stack_used = 0; 856 for (i = 0; i < prog->len; i++) { 857 const struct bpf_insn *insn = &prog->insnsi[i]; 858 u32 *descp = &ctx->descriptors[i]; 859 int ret; 860 861 access_reg(ctx, insn->src_reg); 862 access_reg(ctx, insn->dst_reg); 863 864 ctx->bpf_index = i; 865 if (ctx->target == NULL) { 866 ctx->changes += INDEX(*descp) != ctx->jit_index; 867 *descp &= JIT_DESC_CONVERT; 868 *descp |= ctx->jit_index; 869 } 870 871 ret = build_insn(insn, ctx); 872 if (ret < 0) 873 return ret; 874 875 if (ret > 0) { 876 i++; 877 if (ctx->target == NULL) 878 descp[1] = ctx->jit_index; 879 } 880 } 881 882 /* Store the end offset, where the epilogue begins */ 883 ctx->descriptors[prog->len] = ctx->jit_index; 884 return 0; 885 } 886 887 /* Set the branch conversion flag on all instructions */ 888 static void set_convert_flag(struct jit_context *ctx, bool enable) 889 { 890 const struct bpf_prog *prog = ctx->program; 891 u32 flag = enable ? JIT_DESC_CONVERT : 0; 892 unsigned int i; 893 894 for (i = 0; i <= prog->len; i++) 895 ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag; 896 } 897 898 static void jit_fill_hole(void *area, unsigned int size) 899 { 900 u32 *p; 901 902 /* We are guaranteed to have aligned memory. */ 903 for (p = area; size >= sizeof(u32); size -= sizeof(u32)) 904 uasm_i_break(&p, BRK_BUG); /* Increments p */ 905 } 906 907 bool bpf_jit_needs_zext(void) 908 { 909 return true; 910 } 911 912 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 913 { 914 struct bpf_prog *tmp, *orig_prog = prog; 915 struct bpf_binary_header *header = NULL; 916 struct jit_context ctx; 917 bool tmp_blinded = false; 918 unsigned int tmp_idx; 919 unsigned int image_size; 920 u8 *image_ptr; 921 int tries; 922 923 /* 924 * If BPF JIT was not enabled then we must fall back to 925 * the interpreter. 926 */ 927 if (!prog->jit_requested) 928 return orig_prog; 929 /* 930 * If constant blinding was enabled and we failed during blinding 931 * then we must fall back to the interpreter. Otherwise, we save 932 * the new JITed code. 933 */ 934 tmp = bpf_jit_blind_constants(prog); 935 if (IS_ERR(tmp)) 936 return orig_prog; 937 if (tmp != prog) { 938 tmp_blinded = true; 939 prog = tmp; 940 } 941 942 memset(&ctx, 0, sizeof(ctx)); 943 ctx.program = prog; 944 945 /* 946 * Not able to allocate memory for descriptors[], then 947 * we must fall back to the interpreter 948 */ 949 ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors), 950 GFP_KERNEL); 951 if (ctx.descriptors == NULL) 952 goto out_err; 953 954 /* First pass discovers used resources */ 955 if (build_body(&ctx) < 0) 956 goto out_err; 957 /* 958 * Second pass computes instruction offsets. 959 * If any PC-relative branches are out of range, a sequence of 960 * a PC-relative branch + a jump is generated, and we have to 961 * try again from the beginning to generate the new offsets. 962 * This is done until no additional conversions are necessary. 963 * The last two iterations are done with all branches being 964 * converted, to guarantee offset table convergence within a 965 * fixed number of iterations. 966 */ 967 ctx.jit_index = 0; 968 build_prologue(&ctx); 969 tmp_idx = ctx.jit_index; 970 971 tries = JIT_MAX_ITERATIONS; 972 do { 973 ctx.jit_index = tmp_idx; 974 ctx.changes = 0; 975 if (tries == 2) 976 set_convert_flag(&ctx, true); 977 if (build_body(&ctx) < 0) 978 goto out_err; 979 } while (ctx.changes > 0 && --tries > 0); 980 981 if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge")) 982 goto out_err; 983 984 build_epilogue(&ctx, MIPS_R_RA); 985 986 /* Now we know the size of the structure to make */ 987 image_size = sizeof(u32) * ctx.jit_index; 988 header = bpf_jit_binary_alloc(image_size, &image_ptr, 989 sizeof(u32), jit_fill_hole); 990 /* 991 * Not able to allocate memory for the structure then 992 * we must fall back to the interpretation 993 */ 994 if (header == NULL) 995 goto out_err; 996 997 /* Actual pass to generate final JIT code */ 998 ctx.target = (u32 *)image_ptr; 999 ctx.jit_index = 0; 1000 1001 /* 1002 * If building the JITed code fails somehow, 1003 * we fall back to the interpretation. 1004 */ 1005 build_prologue(&ctx); 1006 if (build_body(&ctx) < 0) 1007 goto out_err; 1008 build_epilogue(&ctx, MIPS_R_RA); 1009 1010 /* Populate line info meta data */ 1011 set_convert_flag(&ctx, false); 1012 bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]); 1013 1014 /* Set as read-only exec and flush instruction cache */ 1015 bpf_jit_binary_lock_ro(header); 1016 flush_icache_range((unsigned long)header, 1017 (unsigned long)&ctx.target[ctx.jit_index]); 1018 1019 if (bpf_jit_enable > 1) 1020 bpf_jit_dump(prog->len, image_size, 2, ctx.target); 1021 1022 prog->bpf_func = (void *)ctx.target; 1023 prog->jited = 1; 1024 prog->jited_len = image_size; 1025 1026 out: 1027 if (tmp_blinded) 1028 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1029 tmp : orig_prog); 1030 kfree(ctx.descriptors); 1031 return prog; 1032 1033 out_err: 1034 prog = orig_prog; 1035 if (header) 1036 bpf_jit_binary_free(header); 1037 goto out; 1038 } 1039