1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Just-In-Time compiler for eBPF filters on 32bit ARM 4 * 5 * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com> 6 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> 7 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 8 */ 9 10 #include <linux/bpf.h> 11 #include <linux/bitops.h> 12 #include <linux/compiler.h> 13 #include <linux/errno.h> 14 #include <linux/filter.h> 15 #include <linux/netdevice.h> 16 #include <linux/string.h> 17 #include <linux/slab.h> 18 #include <linux/if_vlan.h> 19 #include <linux/math64.h> 20 21 #include <asm/cacheflush.h> 22 #include <asm/hwcap.h> 23 #include <asm/opcodes.h> 24 #include <asm/system_info.h> 25 26 #include "bpf_jit_32.h" 27 28 /* 29 * eBPF prog stack layout: 30 * 31 * high 32 * original ARM_SP => +-----+ 33 * | | callee saved registers 34 * +-----+ <= (BPF_FP + SCRATCH_SIZE) 35 * | ... | eBPF JIT scratch space 36 * eBPF fp register => +-----+ 37 * (BPF_FP) | ... | eBPF prog stack 38 * +-----+ 39 * |RSVD | JIT scratchpad 40 * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) 41 * | ... | caller-saved registers 42 * +-----+ 43 * | ... | arguments passed on stack 44 * ARM_SP during call => +-----| 45 * | | 46 * | ... | Function call stack 47 * | | 48 * +-----+ 49 * low 50 * 51 * The callee saved registers depends on whether frame pointers are enabled. 52 * With frame pointers (to be compliant with the ABI): 53 * 54 * high 55 * original ARM_SP => +--------------+ \ 56 * | pc | | 57 * current ARM_FP => +--------------+ } callee saved registers 58 * |r4-r9,fp,ip,lr| | 59 * +--------------+ / 60 * low 61 * 62 * Without frame pointers: 63 * 64 * high 65 * original ARM_SP => +--------------+ 66 * | r4-r9,fp,lr | callee saved registers 67 * current ARM_FP => +--------------+ 68 * low 69 * 70 * When popping registers off the stack at the end of a BPF function, we 71 * reference them via the current ARM_FP register. 72 * 73 * Some eBPF operations are implemented via a call to a helper function. 74 * Such calls are "invisible" in the eBPF code, so it is up to the calling 75 * program to preserve any caller-saved ARM registers during the call. The 76 * JIT emits code to push and pop those registers onto the stack, immediately 77 * above the callee stack frame. 78 */ 79 #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 80 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ 81 1 << ARM_FP) 82 #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) 83 #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) 84 85 #define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) 86 87 enum { 88 /* Stack layout - these are offsets from (top of stack - 4) */ 89 BPF_R2_HI, 90 BPF_R2_LO, 91 BPF_R3_HI, 92 BPF_R3_LO, 93 BPF_R4_HI, 94 BPF_R4_LO, 95 BPF_R5_HI, 96 BPF_R5_LO, 97 BPF_R7_HI, 98 BPF_R7_LO, 99 BPF_R8_HI, 100 BPF_R8_LO, 101 BPF_R9_HI, 102 BPF_R9_LO, 103 BPF_FP_HI, 104 BPF_FP_LO, 105 BPF_TC_HI, 106 BPF_TC_LO, 107 BPF_AX_HI, 108 BPF_AX_LO, 109 /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, 110 * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, 111 * BPF_REG_FP and Tail call counts. 112 */ 113 BPF_JIT_SCRATCH_REGS, 114 }; 115 116 /* 117 * Negative "register" values indicate the register is stored on the stack 118 * and are the offset from the top of the eBPF JIT scratch space. 119 */ 120 #define STACK_OFFSET(k) (-4 - (k) * 4) 121 #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) 122 123 #ifdef CONFIG_FRAME_POINTER 124 #define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4) 125 #else 126 #define EBPF_SCRATCH_TO_ARM_FP(x) (x) 127 #endif 128 129 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ 130 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ 131 #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ 132 133 #define FLAG_IMM_OVERFLOW (1 << 0) 134 135 /* 136 * Map eBPF registers to ARM 32bit registers or stack scratch space. 137 * 138 * 1. First argument is passed using the arm 32bit registers and rest of the 139 * arguments are passed on stack scratch space. 140 * 2. First callee-saved argument is mapped to arm 32 bit registers and rest 141 * arguments are mapped to scratch space on stack. 142 * 3. We need two 64 bit temp registers to do complex operations on eBPF 143 * registers. 144 * 145 * As the eBPF registers are all 64 bit registers and arm has only 32 bit 146 * registers, we have to map each eBPF registers with two arm 32 bit regs or 147 * scratch memory space and we have to build eBPF 64 bit register from those. 148 * 149 */ 150 static const s8 bpf2a32[][2] = { 151 /* return value from in-kernel function, and exit value from eBPF */ 152 [BPF_REG_0] = {ARM_R1, ARM_R0}, 153 /* arguments from eBPF program to in-kernel function */ 154 [BPF_REG_1] = {ARM_R3, ARM_R2}, 155 /* Stored on stack scratch space */ 156 [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)}, 157 [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)}, 158 [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)}, 159 [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, 160 /* callee saved registers that in-kernel function will preserve */ 161 [BPF_REG_6] = {ARM_R5, ARM_R4}, 162 /* Stored on stack scratch space */ 163 [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, 164 [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, 165 [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, 166 /* Read only Frame Pointer to access Stack */ 167 [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, 168 /* Temporary Register for BPF JIT, can be used 169 * for constant blindings and others. 170 */ 171 [TMP_REG_1] = {ARM_R7, ARM_R6}, 172 [TMP_REG_2] = {ARM_R9, ARM_R8}, 173 /* Tail call count. Stored on stack scratch space. */ 174 [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, 175 /* temporary register for blinding constants. 176 * Stored on stack scratch space. 177 */ 178 [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, 179 }; 180 181 #define dst_lo dst[1] 182 #define dst_hi dst[0] 183 #define src_lo src[1] 184 #define src_hi src[0] 185 186 /* 187 * JIT Context: 188 * 189 * prog : bpf_prog 190 * idx : index of current last JITed instruction. 191 * prologue_bytes : bytes used in prologue. 192 * epilogue_offset : offset of epilogue starting. 193 * offsets : array of eBPF instruction offsets in 194 * JITed code. 195 * target : final JITed code. 196 * epilogue_bytes : no of bytes used in epilogue. 197 * imm_count : no of immediate counts used for global 198 * variables. 199 * imms : array of global variable addresses. 200 */ 201 202 struct jit_ctx { 203 const struct bpf_prog *prog; 204 unsigned int idx; 205 unsigned int prologue_bytes; 206 unsigned int epilogue_offset; 207 unsigned int cpu_architecture; 208 u32 flags; 209 u32 *offsets; 210 u32 *target; 211 u32 stack_size; 212 #if __LINUX_ARM_ARCH__ < 7 213 u16 epilogue_bytes; 214 u16 imm_count; 215 u32 *imms; 216 #endif 217 }; 218 219 /* 220 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking 221 * (where the assembly routines like __aeabi_uidiv could cause problems). 222 */ 223 static u32 jit_udiv32(u32 dividend, u32 divisor) 224 { 225 return dividend / divisor; 226 } 227 228 static u32 jit_mod32(u32 dividend, u32 divisor) 229 { 230 return dividend % divisor; 231 } 232 233 static s32 jit_sdiv32(s32 dividend, s32 divisor) 234 { 235 return dividend / divisor; 236 } 237 238 static s32 jit_smod32(s32 dividend, s32 divisor) 239 { 240 return dividend % divisor; 241 } 242 243 /* Wrappers for 64-bit div/mod */ 244 static u64 jit_udiv64(u64 dividend, u64 divisor) 245 { 246 return div64_u64(dividend, divisor); 247 } 248 249 static u64 jit_mod64(u64 dividend, u64 divisor) 250 { 251 u64 rem; 252 253 div64_u64_rem(dividend, divisor, &rem); 254 return rem; 255 } 256 257 static s64 jit_sdiv64(s64 dividend, s64 divisor) 258 { 259 return div64_s64(dividend, divisor); 260 } 261 262 static s64 jit_smod64(s64 dividend, s64 divisor) 263 { 264 u64 q; 265 266 q = div64_s64(dividend, divisor); 267 268 return dividend - q * divisor; 269 } 270 271 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 272 { 273 inst |= (cond << 28); 274 inst = __opcode_to_mem_arm(inst); 275 276 if (ctx->target != NULL) 277 ctx->target[ctx->idx] = inst; 278 279 ctx->idx++; 280 } 281 282 /* 283 * Emit an instruction that will be executed unconditionally. 284 */ 285 static inline void emit(u32 inst, struct jit_ctx *ctx) 286 { 287 _emit(ARM_COND_AL, inst, ctx); 288 } 289 290 /* 291 * This is rather horrid, but necessary to convert an integer constant 292 * to an immediate operand for the opcodes, and be able to detect at 293 * build time whether the constant can't be converted (iow, usable in 294 * BUILD_BUG_ON()). 295 */ 296 #define imm12val(v, s) (rol32(v, (s)) | (s) << 7) 297 #define const_imm8m(x) \ 298 ({ int r; \ 299 u32 v = (x); \ 300 if (!(v & ~0x000000ff)) \ 301 r = imm12val(v, 0); \ 302 else if (!(v & ~0xc000003f)) \ 303 r = imm12val(v, 2); \ 304 else if (!(v & ~0xf000000f)) \ 305 r = imm12val(v, 4); \ 306 else if (!(v & ~0xfc000003)) \ 307 r = imm12val(v, 6); \ 308 else if (!(v & ~0xff000000)) \ 309 r = imm12val(v, 8); \ 310 else if (!(v & ~0x3fc00000)) \ 311 r = imm12val(v, 10); \ 312 else if (!(v & ~0x0ff00000)) \ 313 r = imm12val(v, 12); \ 314 else if (!(v & ~0x03fc0000)) \ 315 r = imm12val(v, 14); \ 316 else if (!(v & ~0x00ff0000)) \ 317 r = imm12val(v, 16); \ 318 else if (!(v & ~0x003fc000)) \ 319 r = imm12val(v, 18); \ 320 else if (!(v & ~0x000ff000)) \ 321 r = imm12val(v, 20); \ 322 else if (!(v & ~0x0003fc00)) \ 323 r = imm12val(v, 22); \ 324 else if (!(v & ~0x0000ff00)) \ 325 r = imm12val(v, 24); \ 326 else if (!(v & ~0x00003fc0)) \ 327 r = imm12val(v, 26); \ 328 else if (!(v & ~0x00000ff0)) \ 329 r = imm12val(v, 28); \ 330 else if (!(v & ~0x000003fc)) \ 331 r = imm12val(v, 30); \ 332 else \ 333 r = -1; \ 334 r; }) 335 336 /* 337 * Checks if immediate value can be converted to imm12(12 bits) value. 338 */ 339 static int imm8m(u32 x) 340 { 341 u32 rot; 342 343 for (rot = 0; rot < 16; rot++) 344 if ((x & ~ror32(0xff, 2 * rot)) == 0) 345 return rol32(x, 2 * rot) | (rot << 8); 346 return -1; 347 } 348 349 #define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x)) 350 351 static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) 352 { 353 op |= rt << 12 | rn << 16; 354 if (imm12 >= 0) 355 op |= ARM_INST_LDST__U; 356 else 357 imm12 = -imm12; 358 return op | (imm12 & ARM_INST_LDST__IMM12); 359 } 360 361 static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) 362 { 363 op |= rt << 12 | rn << 16; 364 if (imm8 >= 0) 365 op |= ARM_INST_LDST__U; 366 else 367 imm8 = -imm8; 368 return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f); 369 } 370 371 #define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) 372 #define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) 373 #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) 374 #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) 375 376 #define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off) 377 #define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off) 378 379 #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) 380 #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) 381 #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) 382 #define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) 383 384 /* 385 * Initializes the JIT space with undefined instructions. 386 */ 387 static void jit_fill_hole(void *area, unsigned int size) 388 { 389 u32 *ptr; 390 /* We are guaranteed to have aligned memory. */ 391 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 392 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); 393 } 394 395 #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) 396 /* EABI requires the stack to be aligned to 64-bit boundaries */ 397 #define STACK_ALIGNMENT 8 398 #else 399 /* Stack must be aligned to 32-bit boundaries */ 400 #define STACK_ALIGNMENT 4 401 #endif 402 403 /* total stack size used in JITed code */ 404 #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) 405 #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) 406 407 #if __LINUX_ARM_ARCH__ < 7 408 409 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 410 { 411 unsigned int i = 0, offset; 412 u16 imm; 413 414 /* on the "fake" run we just count them (duplicates included) */ 415 if (ctx->target == NULL) { 416 ctx->imm_count++; 417 return 0; 418 } 419 420 while ((i < ctx->imm_count) && ctx->imms[i]) { 421 if (ctx->imms[i] == k) 422 break; 423 i++; 424 } 425 426 if (ctx->imms[i] == 0) 427 ctx->imms[i] = k; 428 429 /* constants go just after the epilogue */ 430 offset = ctx->offsets[ctx->prog->len - 1] * 4; 431 offset += ctx->prologue_bytes; 432 offset += ctx->epilogue_bytes; 433 offset += i * 4; 434 435 ctx->target[offset / 4] = k; 436 437 /* PC in ARM mode == address of the instruction + 8 */ 438 imm = offset - (8 + ctx->idx * 4); 439 440 if (imm & ~0xfff) { 441 /* 442 * literal pool is too far, signal it into flags. we 443 * can only detect it on the second pass unfortunately. 444 */ 445 ctx->flags |= FLAG_IMM_OVERFLOW; 446 return 0; 447 } 448 449 return imm; 450 } 451 452 #endif /* __LINUX_ARM_ARCH__ */ 453 454 static inline int bpf2a32_offset(int bpf_to, int bpf_from, 455 const struct jit_ctx *ctx) { 456 int to, from; 457 458 if (ctx->target == NULL) 459 return 0; 460 to = ctx->offsets[bpf_to]; 461 from = ctx->offsets[bpf_from]; 462 463 return to - from - 1; 464 } 465 466 /* 467 * Move an immediate that's not an imm8m to a core register. 468 */ 469 static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) 470 { 471 #if __LINUX_ARM_ARCH__ < 7 472 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 473 #else 474 emit(ARM_MOVW(rd, val & 0xffff), ctx); 475 if (val > 0xffff) 476 emit(ARM_MOVT(rd, val >> 16), ctx); 477 #endif 478 } 479 480 static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) 481 { 482 int imm12 = imm8m(val); 483 484 if (imm12 >= 0) 485 emit(ARM_MOV_I(rd, imm12), ctx); 486 else 487 emit_mov_i_no8m(rd, val, ctx); 488 } 489 490 static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) 491 { 492 if (elf_hwcap & HWCAP_THUMB) 493 emit(ARM_BX(tgt_reg), ctx); 494 else 495 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 496 } 497 498 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 499 { 500 #if __LINUX_ARM_ARCH__ < 5 501 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 502 emit_bx_r(tgt_reg, ctx); 503 #else 504 emit(ARM_BLX_R(tgt_reg), ctx); 505 #endif 506 } 507 508 static inline int epilogue_offset(const struct jit_ctx *ctx) 509 { 510 int to, from; 511 /* No need for 1st dummy run */ 512 if (ctx->target == NULL) 513 return 0; 514 to = ctx->epilogue_offset; 515 from = ctx->idx; 516 517 return to - from - 2; 518 } 519 520 static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign) 521 { 522 const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); 523 const s8 *tmp = bpf2a32[TMP_REG_1]; 524 u32 dst; 525 526 #if __LINUX_ARM_ARCH__ == 7 527 if (elf_hwcap & HWCAP_IDIVA) { 528 if (op == BPF_DIV) { 529 emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx); 530 } else { 531 emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx); 532 emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); 533 } 534 return; 535 } 536 #endif 537 538 /* 539 * For BPF_ALU | BPF_DIV | BPF_K instructions 540 * As ARM_R1 and ARM_R0 contains 1st argument of bpf 541 * function, we need to save it on caller side to save 542 * it from getting destroyed within callee. 543 * After the return from the callee, we restore ARM_R0 544 * ARM_R1. 545 */ 546 if (rn != ARM_R1) { 547 emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); 548 emit(ARM_MOV_R(ARM_R1, rn), ctx); 549 } 550 if (rm != ARM_R0) { 551 emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); 552 emit(ARM_MOV_R(ARM_R0, rm), ctx); 553 } 554 555 /* Push caller-saved registers on stack */ 556 emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); 557 558 /* Call appropriate function */ 559 if (sign) { 560 if (op == BPF_DIV) 561 dst = (u32)jit_sdiv32; 562 else 563 dst = (u32)jit_smod32; 564 } else { 565 if (op == BPF_DIV) 566 dst = (u32)jit_udiv32; 567 else 568 dst = (u32)jit_mod32; 569 } 570 571 emit_mov_i(ARM_IP, dst, ctx); 572 emit_blx_r(ARM_IP, ctx); 573 574 /* Restore caller-saved registers from stack */ 575 emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); 576 577 /* Save return value */ 578 if (rd != ARM_R0) 579 emit(ARM_MOV_R(rd, ARM_R0), ctx); 580 581 /* Restore ARM_R0 and ARM_R1 */ 582 if (rn != ARM_R1) 583 emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); 584 if (rm != ARM_R0) 585 emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); 586 } 587 588 static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx, 589 u8 op, u8 sign) 590 { 591 u32 dst; 592 593 /* Push caller-saved registers on stack */ 594 emit(ARM_PUSH(CALLER_MASK), ctx); 595 596 /* 597 * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in 598 * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, 599 * we can recover them later after returning from the function call. 600 */ 601 if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { 602 /* 603 * Move Rm to {R1, R0} if it is not already there. 604 */ 605 if (rm[1] != ARM_R0) { 606 if (rn[1] == ARM_R0) 607 emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx); 608 emit(ARM_MOV_R(ARM_R1, rm[0]), ctx); 609 emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); 610 if (rn[1] == ARM_R0) { 611 emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); 612 goto cont; 613 } 614 } 615 /* 616 * Move Rn to {R3, R2} if it is not already there. 617 */ 618 if (rn[1] != ARM_R2) { 619 emit(ARM_MOV_R(ARM_R3, rn[0]), ctx); 620 emit(ARM_MOV_R(ARM_R2, rn[1]), ctx); 621 } 622 } 623 624 cont: 625 626 /* Call appropriate function */ 627 if (sign) { 628 if (op == BPF_DIV) 629 dst = (u32)jit_sdiv64; 630 else 631 dst = (u32)jit_smod64; 632 } else { 633 if (op == BPF_DIV) 634 dst = (u32)jit_udiv64; 635 else 636 dst = (u32)jit_mod64; 637 } 638 639 emit_mov_i(ARM_IP, dst, ctx); 640 emit_blx_r(ARM_IP, ctx); 641 642 /* Save return value */ 643 if (rd[1] != ARM_R0) { 644 emit(ARM_MOV_R(rd[0], ARM_R1), ctx); 645 emit(ARM_MOV_R(rd[1], ARM_R0), ctx); 646 } 647 648 /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ 649 if (rd[1] != ARM_R0 && rd[1] != ARM_R2) { 650 emit(ARM_POP(CALLER_MASK), ctx); 651 } else if (rd[1] != ARM_R0) { 652 emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx); 653 emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); 654 } else { 655 emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); 656 emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); 657 } 658 } 659 660 /* Is the translated BPF register on stack? */ 661 static bool is_stacked(s8 reg) 662 { 663 return reg < 0; 664 } 665 666 /* If a BPF register is on the stack (stk is true), load it to the 667 * supplied temporary register and return the temporary register 668 * for subsequent operations, otherwise just use the CPU register. 669 */ 670 static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) 671 { 672 if (is_stacked(reg)) { 673 emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); 674 reg = tmp; 675 } 676 return reg; 677 } 678 679 static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, 680 struct jit_ctx *ctx) 681 { 682 if (is_stacked(reg[1])) { 683 if (__LINUX_ARM_ARCH__ >= 6 || 684 ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { 685 emit(ARM_LDRD_I(tmp[1], ARM_FP, 686 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); 687 } else { 688 emit(ARM_LDR_I(tmp[1], ARM_FP, 689 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); 690 emit(ARM_LDR_I(tmp[0], ARM_FP, 691 EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); 692 } 693 reg = tmp; 694 } 695 return reg; 696 } 697 698 /* If a BPF register is on the stack (stk is true), save the register 699 * back to the stack. If the source register is not the same, then 700 * move it into the correct register. 701 */ 702 static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) 703 { 704 if (is_stacked(reg)) 705 emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); 706 else if (reg != src) 707 emit(ARM_MOV_R(reg, src), ctx); 708 } 709 710 static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, 711 struct jit_ctx *ctx) 712 { 713 if (is_stacked(reg[1])) { 714 if (__LINUX_ARM_ARCH__ >= 6 || 715 ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { 716 emit(ARM_STRD_I(src[1], ARM_FP, 717 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); 718 } else { 719 emit(ARM_STR_I(src[1], ARM_FP, 720 EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); 721 emit(ARM_STR_I(src[0], ARM_FP, 722 EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); 723 } 724 } else { 725 if (reg[1] != src[1]) 726 emit(ARM_MOV_R(reg[1], src[1]), ctx); 727 if (reg[0] != src[0]) 728 emit(ARM_MOV_R(reg[0], src[0]), ctx); 729 } 730 } 731 732 static inline void emit_a32_mov_i(const s8 dst, const u32 val, 733 struct jit_ctx *ctx) 734 { 735 const s8 *tmp = bpf2a32[TMP_REG_1]; 736 737 if (is_stacked(dst)) { 738 emit_mov_i(tmp[1], val, ctx); 739 arm_bpf_put_reg32(dst, tmp[1], ctx); 740 } else { 741 emit_mov_i(dst, val, ctx); 742 } 743 } 744 745 static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx) 746 { 747 const s8 *tmp = bpf2a32[TMP_REG_1]; 748 const s8 *rd = is_stacked(dst_lo) ? tmp : dst; 749 750 emit_mov_i(rd[1], (u32)val, ctx); 751 emit_mov_i(rd[0], val >> 32, ctx); 752 753 arm_bpf_put_reg64(dst, rd, ctx); 754 } 755 756 /* Sign extended move */ 757 static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[], 758 const u32 val, struct jit_ctx *ctx) { 759 u64 val64 = val; 760 761 if (is64 && (val & (1<<31))) 762 val64 |= 0xffffffff00000000ULL; 763 emit_a32_mov_i64(dst, val64, ctx); 764 } 765 766 static inline void emit_a32_add_r(const u8 dst, const u8 src, 767 const bool is64, const bool hi, 768 struct jit_ctx *ctx) { 769 /* 64 bit : 770 * adds dst_lo, dst_lo, src_lo 771 * adc dst_hi, dst_hi, src_hi 772 * 32 bit : 773 * add dst_lo, dst_lo, src_lo 774 */ 775 if (!hi && is64) 776 emit(ARM_ADDS_R(dst, dst, src), ctx); 777 else if (hi && is64) 778 emit(ARM_ADC_R(dst, dst, src), ctx); 779 else 780 emit(ARM_ADD_R(dst, dst, src), ctx); 781 } 782 783 static inline void emit_a32_sub_r(const u8 dst, const u8 src, 784 const bool is64, const bool hi, 785 struct jit_ctx *ctx) { 786 /* 64 bit : 787 * subs dst_lo, dst_lo, src_lo 788 * sbc dst_hi, dst_hi, src_hi 789 * 32 bit : 790 * sub dst_lo, dst_lo, src_lo 791 */ 792 if (!hi && is64) 793 emit(ARM_SUBS_R(dst, dst, src), ctx); 794 else if (hi && is64) 795 emit(ARM_SBC_R(dst, dst, src), ctx); 796 else 797 emit(ARM_SUB_R(dst, dst, src), ctx); 798 } 799 800 static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, 801 const bool hi, const u8 op, struct jit_ctx *ctx){ 802 switch (BPF_OP(op)) { 803 /* dst = dst + src */ 804 case BPF_ADD: 805 emit_a32_add_r(dst, src, is64, hi, ctx); 806 break; 807 /* dst = dst - src */ 808 case BPF_SUB: 809 emit_a32_sub_r(dst, src, is64, hi, ctx); 810 break; 811 /* dst = dst | src */ 812 case BPF_OR: 813 emit(ARM_ORR_R(dst, dst, src), ctx); 814 break; 815 /* dst = dst & src */ 816 case BPF_AND: 817 emit(ARM_AND_R(dst, dst, src), ctx); 818 break; 819 /* dst = dst ^ src */ 820 case BPF_XOR: 821 emit(ARM_EOR_R(dst, dst, src), ctx); 822 break; 823 /* dst = dst * src */ 824 case BPF_MUL: 825 emit(ARM_MUL(dst, dst, src), ctx); 826 break; 827 /* dst = dst << src */ 828 case BPF_LSH: 829 emit(ARM_LSL_R(dst, dst, src), ctx); 830 break; 831 /* dst = dst >> src */ 832 case BPF_RSH: 833 emit(ARM_LSR_R(dst, dst, src), ctx); 834 break; 835 /* dst = dst >> src (signed)*/ 836 case BPF_ARSH: 837 emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); 838 break; 839 } 840 } 841 842 /* ALU operation (64 bit) */ 843 static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], 844 const s8 src[], struct jit_ctx *ctx, 845 const u8 op) { 846 const s8 *tmp = bpf2a32[TMP_REG_1]; 847 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 848 const s8 *rd; 849 850 rd = arm_bpf_get_reg64(dst, tmp, ctx); 851 if (is64) { 852 const s8 *rs; 853 854 rs = arm_bpf_get_reg64(src, tmp2, ctx); 855 856 /* ALU operation */ 857 emit_alu_r(rd[1], rs[1], true, false, op, ctx); 858 emit_alu_r(rd[0], rs[0], true, true, op, ctx); 859 } else { 860 s8 rs; 861 862 rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); 863 864 /* ALU operation */ 865 emit_alu_r(rd[1], rs, true, false, op, ctx); 866 if (!ctx->prog->aux->verifier_zext) 867 emit_a32_mov_i(rd[0], 0, ctx); 868 } 869 870 arm_bpf_put_reg64(dst, rd, ctx); 871 } 872 873 /* dst = src (4 bytes)*/ 874 static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, 875 struct jit_ctx *ctx) { 876 const s8 *tmp = bpf2a32[TMP_REG_1]; 877 s8 rt; 878 879 rt = arm_bpf_get_reg32(src, tmp[0], ctx); 880 if (off && off != 32) { 881 emit(ARM_LSL_I(rt, rt, 32 - off), ctx); 882 emit(ARM_ASR_I(rt, rt, 32 - off), ctx); 883 } 884 arm_bpf_put_reg32(dst, rt, ctx); 885 } 886 887 /* dst = src */ 888 static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], 889 const s8 src[], 890 struct jit_ctx *ctx) { 891 if (!is64) { 892 emit_a32_mov_r(dst_lo, src_lo, 0, ctx); 893 if (!ctx->prog->aux->verifier_zext) 894 /* Zero out high 4 bytes */ 895 emit_a32_mov_i(dst_hi, 0, ctx); 896 } else if (__LINUX_ARM_ARCH__ < 6 && 897 ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { 898 /* complete 8 byte move */ 899 emit_a32_mov_r(dst_lo, src_lo, 0, ctx); 900 emit_a32_mov_r(dst_hi, src_hi, 0, ctx); 901 } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { 902 const u8 *tmp = bpf2a32[TMP_REG_1]; 903 904 emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); 905 emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); 906 } else if (is_stacked(src_lo)) { 907 emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); 908 } else if (is_stacked(dst_lo)) { 909 emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); 910 } else { 911 emit(ARM_MOV_R(dst[0], src[0]), ctx); 912 emit(ARM_MOV_R(dst[1], src[1]), ctx); 913 } 914 } 915 916 /* dst = (signed)src */ 917 static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], 918 struct jit_ctx *ctx) { 919 const s8 *tmp = bpf2a32[TMP_REG_1]; 920 const s8 *rt; 921 922 rt = arm_bpf_get_reg64(dst, tmp, ctx); 923 924 emit_a32_mov_r(dst_lo, src_lo, off, ctx); 925 if (!is64) { 926 if (!ctx->prog->aux->verifier_zext) 927 /* Zero out high 4 bytes */ 928 emit_a32_mov_i(dst_hi, 0, ctx); 929 } else { 930 emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); 931 } 932 } 933 934 /* Shift operations */ 935 static inline void emit_a32_alu_i(const s8 dst, const u32 val, 936 struct jit_ctx *ctx, const u8 op) { 937 const s8 *tmp = bpf2a32[TMP_REG_1]; 938 s8 rd; 939 940 rd = arm_bpf_get_reg32(dst, tmp[0], ctx); 941 942 /* Do shift operation */ 943 switch (op) { 944 case BPF_LSH: 945 emit(ARM_LSL_I(rd, rd, val), ctx); 946 break; 947 case BPF_RSH: 948 emit(ARM_LSR_I(rd, rd, val), ctx); 949 break; 950 case BPF_ARSH: 951 emit(ARM_ASR_I(rd, rd, val), ctx); 952 break; 953 case BPF_NEG: 954 emit(ARM_RSB_I(rd, rd, val), ctx); 955 break; 956 } 957 958 arm_bpf_put_reg32(dst, rd, ctx); 959 } 960 961 /* dst = ~dst (64 bit) */ 962 static inline void emit_a32_neg64(const s8 dst[], 963 struct jit_ctx *ctx){ 964 const s8 *tmp = bpf2a32[TMP_REG_1]; 965 const s8 *rd; 966 967 /* Setup Operand */ 968 rd = arm_bpf_get_reg64(dst, tmp, ctx); 969 970 /* Do Negate Operation */ 971 emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx); 972 emit(ARM_RSC_I(rd[0], rd[0], 0), ctx); 973 974 arm_bpf_put_reg64(dst, rd, ctx); 975 } 976 977 /* dst = dst << src */ 978 static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], 979 struct jit_ctx *ctx) { 980 const s8 *tmp = bpf2a32[TMP_REG_1]; 981 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 982 const s8 *rd; 983 s8 rt; 984 985 /* Setup Operands */ 986 rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); 987 rd = arm_bpf_get_reg64(dst, tmp, ctx); 988 989 /* Do LSH operation */ 990 emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); 991 emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); 992 emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx); 993 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx); 994 emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx); 995 emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx); 996 997 arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); 998 arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); 999 } 1000 1001 /* dst = dst >> src (signed)*/ 1002 static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], 1003 struct jit_ctx *ctx) { 1004 const s8 *tmp = bpf2a32[TMP_REG_1]; 1005 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1006 const s8 *rd; 1007 s8 rt; 1008 1009 /* Setup Operands */ 1010 rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); 1011 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1012 1013 /* Do the ARSH operation */ 1014 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); 1015 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); 1016 emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); 1017 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); 1018 _emit(ARM_COND_PL, 1019 ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); 1020 emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); 1021 1022 arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); 1023 arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); 1024 } 1025 1026 /* dst = dst >> src */ 1027 static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], 1028 struct jit_ctx *ctx) { 1029 const s8 *tmp = bpf2a32[TMP_REG_1]; 1030 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1031 const s8 *rd; 1032 s8 rt; 1033 1034 /* Setup Operands */ 1035 rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); 1036 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1037 1038 /* Do RSH operation */ 1039 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); 1040 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); 1041 emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); 1042 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); 1043 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx); 1044 emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx); 1045 1046 arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); 1047 arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); 1048 } 1049 1050 /* dst = dst << val */ 1051 static inline void emit_a32_lsh_i64(const s8 dst[], 1052 const u32 val, struct jit_ctx *ctx){ 1053 const s8 *tmp = bpf2a32[TMP_REG_1]; 1054 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1055 const s8 *rd; 1056 1057 /* Setup operands */ 1058 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1059 1060 /* Do LSH operation */ 1061 if (val < 32) { 1062 emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx); 1063 emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx); 1064 emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx); 1065 } else { 1066 if (val == 32) 1067 emit(ARM_MOV_R(rd[0], rd[1]), ctx); 1068 else 1069 emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx); 1070 emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx); 1071 } 1072 1073 arm_bpf_put_reg64(dst, rd, ctx); 1074 } 1075 1076 /* dst = dst >> val */ 1077 static inline void emit_a32_rsh_i64(const s8 dst[], 1078 const u32 val, struct jit_ctx *ctx) { 1079 const s8 *tmp = bpf2a32[TMP_REG_1]; 1080 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1081 const s8 *rd; 1082 1083 /* Setup operands */ 1084 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1085 1086 /* Do LSR operation */ 1087 if (val == 0) { 1088 /* An immediate value of 0 encodes a shift amount of 32 1089 * for LSR. To shift by 0, don't do anything. 1090 */ 1091 } else if (val < 32) { 1092 emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); 1093 emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); 1094 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); 1095 } else if (val == 32) { 1096 emit(ARM_MOV_R(rd[1], rd[0]), ctx); 1097 emit(ARM_MOV_I(rd[0], 0), ctx); 1098 } else { 1099 emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx); 1100 emit(ARM_MOV_I(rd[0], 0), ctx); 1101 } 1102 1103 arm_bpf_put_reg64(dst, rd, ctx); 1104 } 1105 1106 /* dst = dst >> val (signed) */ 1107 static inline void emit_a32_arsh_i64(const s8 dst[], 1108 const u32 val, struct jit_ctx *ctx){ 1109 const s8 *tmp = bpf2a32[TMP_REG_1]; 1110 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1111 const s8 *rd; 1112 1113 /* Setup operands */ 1114 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1115 1116 /* Do ARSH operation */ 1117 if (val == 0) { 1118 /* An immediate value of 0 encodes a shift amount of 32 1119 * for ASR. To shift by 0, don't do anything. 1120 */ 1121 } else if (val < 32) { 1122 emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); 1123 emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); 1124 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); 1125 } else if (val == 32) { 1126 emit(ARM_MOV_R(rd[1], rd[0]), ctx); 1127 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); 1128 } else { 1129 emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx); 1130 emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); 1131 } 1132 1133 arm_bpf_put_reg64(dst, rd, ctx); 1134 } 1135 1136 static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], 1137 struct jit_ctx *ctx) { 1138 const s8 *tmp = bpf2a32[TMP_REG_1]; 1139 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1140 const s8 *rd, *rt; 1141 1142 /* Setup operands for multiplication */ 1143 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1144 rt = arm_bpf_get_reg64(src, tmp2, ctx); 1145 1146 /* Do Multiplication */ 1147 emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx); 1148 emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx); 1149 emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); 1150 1151 emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx); 1152 emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx); 1153 1154 arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); 1155 arm_bpf_put_reg32(dst_hi, rd[0], ctx); 1156 } 1157 1158 static bool is_ldst_imm(s16 off, const u8 size) 1159 { 1160 s16 off_max = 0; 1161 1162 switch (size) { 1163 case BPF_B: 1164 case BPF_W: 1165 off_max = 0xfff; 1166 break; 1167 case BPF_H: 1168 off_max = 0xff; 1169 break; 1170 case BPF_DW: 1171 /* Need to make sure off+4 does not overflow. */ 1172 off_max = 0xfff - 4; 1173 break; 1174 } 1175 return -off_max <= off && off <= off_max; 1176 } 1177 1178 static bool is_ldst_imm8(s16 off, const u8 size) 1179 { 1180 s16 off_max = 0; 1181 1182 switch (size) { 1183 case BPF_B: 1184 off_max = 0xff; 1185 break; 1186 case BPF_W: 1187 off_max = 0xfff; 1188 break; 1189 case BPF_H: 1190 off_max = 0xff; 1191 break; 1192 } 1193 return -off_max <= off && off <= off_max; 1194 } 1195 1196 /* *(size *)(dst + off) = src */ 1197 static inline void emit_str_r(const s8 dst, const s8 src[], 1198 s16 off, struct jit_ctx *ctx, const u8 sz){ 1199 const s8 *tmp = bpf2a32[TMP_REG_1]; 1200 s8 rd; 1201 1202 rd = arm_bpf_get_reg32(dst, tmp[1], ctx); 1203 1204 if (!is_ldst_imm(off, sz)) { 1205 emit_a32_mov_i(tmp[0], off, ctx); 1206 emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); 1207 rd = tmp[0]; 1208 off = 0; 1209 } 1210 switch (sz) { 1211 case BPF_B: 1212 /* Store a Byte */ 1213 emit(ARM_STRB_I(src_lo, rd, off), ctx); 1214 break; 1215 case BPF_H: 1216 /* Store a HalfWord */ 1217 emit(ARM_STRH_I(src_lo, rd, off), ctx); 1218 break; 1219 case BPF_W: 1220 /* Store a Word */ 1221 emit(ARM_STR_I(src_lo, rd, off), ctx); 1222 break; 1223 case BPF_DW: 1224 /* Store a Double Word */ 1225 emit(ARM_STR_I(src_lo, rd, off), ctx); 1226 emit(ARM_STR_I(src_hi, rd, off + 4), ctx); 1227 break; 1228 } 1229 } 1230 1231 /* dst = *(size*)(src + off) */ 1232 static inline void emit_ldx_r(const s8 dst[], const s8 src, 1233 s16 off, struct jit_ctx *ctx, const u8 sz){ 1234 const s8 *tmp = bpf2a32[TMP_REG_1]; 1235 const s8 *rd = is_stacked(dst_lo) ? tmp : dst; 1236 s8 rm = src; 1237 1238 if (!is_ldst_imm(off, sz)) { 1239 emit_a32_mov_i(tmp[0], off, ctx); 1240 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); 1241 rm = tmp[0]; 1242 off = 0; 1243 } else if (rd[1] == rm) { 1244 emit(ARM_MOV_R(tmp[0], rm), ctx); 1245 rm = tmp[0]; 1246 } 1247 switch (sz) { 1248 case BPF_B: 1249 /* Load a Byte */ 1250 emit(ARM_LDRB_I(rd[1], rm, off), ctx); 1251 if (!ctx->prog->aux->verifier_zext) 1252 emit_a32_mov_i(rd[0], 0, ctx); 1253 break; 1254 case BPF_H: 1255 /* Load a HalfWord */ 1256 emit(ARM_LDRH_I(rd[1], rm, off), ctx); 1257 if (!ctx->prog->aux->verifier_zext) 1258 emit_a32_mov_i(rd[0], 0, ctx); 1259 break; 1260 case BPF_W: 1261 /* Load a Word */ 1262 emit(ARM_LDR_I(rd[1], rm, off), ctx); 1263 if (!ctx->prog->aux->verifier_zext) 1264 emit_a32_mov_i(rd[0], 0, ctx); 1265 break; 1266 case BPF_DW: 1267 /* Load a Double Word */ 1268 emit(ARM_LDR_I(rd[1], rm, off), ctx); 1269 emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); 1270 break; 1271 } 1272 arm_bpf_put_reg64(dst, rd, ctx); 1273 } 1274 1275 /* dst = *(signed size*)(src + off) */ 1276 static inline void emit_ldsx_r(const s8 dst[], const s8 src, 1277 s16 off, struct jit_ctx *ctx, const u8 sz){ 1278 const s8 *tmp = bpf2a32[TMP_REG_1]; 1279 const s8 *rd = is_stacked(dst_lo) ? tmp : dst; 1280 s8 rm = src; 1281 int add_off; 1282 1283 if (!is_ldst_imm8(off, sz)) { 1284 /* 1285 * offset does not fit in the load/store immediate, 1286 * construct an ADD instruction to apply the offset. 1287 */ 1288 add_off = imm8m(off); 1289 if (add_off > 0) { 1290 emit(ARM_ADD_I(tmp[0], src, add_off), ctx); 1291 rm = tmp[0]; 1292 } else { 1293 emit_a32_mov_i(tmp[0], off, ctx); 1294 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); 1295 rm = tmp[0]; 1296 } 1297 off = 0; 1298 } 1299 1300 switch (sz) { 1301 case BPF_B: 1302 /* Load a Byte with sign extension*/ 1303 emit(ARM_LDRSB_I(rd[1], rm, off), ctx); 1304 break; 1305 case BPF_H: 1306 /* Load a HalfWord with sign extension*/ 1307 emit(ARM_LDRSH_I(rd[1], rm, off), ctx); 1308 break; 1309 case BPF_W: 1310 /* Load a Word*/ 1311 emit(ARM_LDR_I(rd[1], rm, off), ctx); 1312 break; 1313 } 1314 /* Carry the sign extension to upper 32 bits */ 1315 emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); 1316 arm_bpf_put_reg64(dst, rd, ctx); 1317 } 1318 1319 /* Arithmatic Operation */ 1320 static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, 1321 const u8 rn, struct jit_ctx *ctx, u8 op, 1322 bool is_jmp64) { 1323 switch (op) { 1324 case BPF_JSET: 1325 if (is_jmp64) { 1326 emit(ARM_AND_R(ARM_IP, rt, rn), ctx); 1327 emit(ARM_AND_R(ARM_LR, rd, rm), ctx); 1328 emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); 1329 } else { 1330 emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx); 1331 } 1332 break; 1333 case BPF_JEQ: 1334 case BPF_JNE: 1335 case BPF_JGT: 1336 case BPF_JGE: 1337 case BPF_JLE: 1338 case BPF_JLT: 1339 if (is_jmp64) { 1340 emit(ARM_CMP_R(rd, rm), ctx); 1341 /* Only compare low halve if high halve are equal. */ 1342 _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); 1343 } else { 1344 emit(ARM_CMP_R(rt, rn), ctx); 1345 } 1346 break; 1347 case BPF_JSLE: 1348 case BPF_JSGT: 1349 emit(ARM_CMP_R(rn, rt), ctx); 1350 if (is_jmp64) 1351 emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); 1352 break; 1353 case BPF_JSLT: 1354 case BPF_JSGE: 1355 emit(ARM_CMP_R(rt, rn), ctx); 1356 if (is_jmp64) 1357 emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); 1358 break; 1359 } 1360 } 1361 1362 static int out_offset = -1; /* initialized on the first pass of build_body() */ 1363 static int emit_bpf_tail_call(struct jit_ctx *ctx) 1364 { 1365 1366 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 1367 const s8 *r2 = bpf2a32[BPF_REG_2]; 1368 const s8 *r3 = bpf2a32[BPF_REG_3]; 1369 const s8 *tmp = bpf2a32[TMP_REG_1]; 1370 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1371 const s8 *tcc = bpf2a32[TCALL_CNT]; 1372 const s8 *tc; 1373 const int idx0 = ctx->idx; 1374 #define cur_offset (ctx->idx - idx0) 1375 #define jmp_offset (out_offset - (cur_offset) - 2) 1376 u32 lo, hi; 1377 s8 r_array, r_index; 1378 int off; 1379 1380 /* if (index >= array->map.max_entries) 1381 * goto out; 1382 */ 1383 BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > 1384 ARM_INST_LDST__IMM12); 1385 off = offsetof(struct bpf_array, map.max_entries); 1386 r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); 1387 /* index is 32-bit for arrays */ 1388 r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); 1389 /* array->map.max_entries */ 1390 emit(ARM_LDR_I(tmp[1], r_array, off), ctx); 1391 /* index >= array->map.max_entries */ 1392 emit(ARM_CMP_R(r_index, tmp[1]), ctx); 1393 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 1394 1395 /* tmp2[0] = array, tmp2[1] = index */ 1396 1397 /* 1398 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) 1399 * goto out; 1400 * tail_call_cnt++; 1401 */ 1402 lo = (u32)MAX_TAIL_CALL_CNT; 1403 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); 1404 tc = arm_bpf_get_reg64(tcc, tmp, ctx); 1405 emit(ARM_CMP_I(tc[0], hi), ctx); 1406 _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); 1407 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 1408 emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); 1409 emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); 1410 arm_bpf_put_reg64(tcc, tmp, ctx); 1411 1412 /* prog = array->ptrs[index] 1413 * if (prog == NULL) 1414 * goto out; 1415 */ 1416 BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); 1417 off = imm8m(offsetof(struct bpf_array, ptrs)); 1418 emit(ARM_ADD_I(tmp[1], r_array, off), ctx); 1419 emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); 1420 emit(ARM_CMP_I(tmp[1], 0), ctx); 1421 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 1422 1423 /* goto *(prog->bpf_func + prologue_size); */ 1424 BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) > 1425 ARM_INST_LDST__IMM12); 1426 off = offsetof(struct bpf_prog, bpf_func); 1427 emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx); 1428 emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); 1429 emit_bx_r(tmp[1], ctx); 1430 1431 /* out: */ 1432 if (out_offset == -1) 1433 out_offset = cur_offset; 1434 if (cur_offset != out_offset) { 1435 pr_err_once("tail_call out_offset = %d, expected %d!\n", 1436 cur_offset, out_offset); 1437 return -1; 1438 } 1439 return 0; 1440 #undef cur_offset 1441 #undef jmp_offset 1442 } 1443 1444 /* 0xabcd => 0xcdab */ 1445 static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) 1446 { 1447 #if __LINUX_ARM_ARCH__ < 6 1448 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1449 1450 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); 1451 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); 1452 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); 1453 emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx); 1454 #else /* ARMv6+ */ 1455 emit(ARM_REV16(rd, rn), ctx); 1456 #endif 1457 } 1458 1459 /* 0xabcdefgh => 0xghefcdab */ 1460 static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) 1461 { 1462 #if __LINUX_ARM_ARCH__ < 6 1463 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1464 1465 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); 1466 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); 1467 emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx); 1468 1469 emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx); 1470 emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx); 1471 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx); 1472 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); 1473 emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx); 1474 emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx); 1475 emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx); 1476 1477 #else /* ARMv6+ */ 1478 emit(ARM_REV(rd, rn), ctx); 1479 #endif 1480 } 1481 1482 // push the scratch stack register on top of the stack 1483 static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) 1484 { 1485 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1486 const s8 *rt; 1487 u16 reg_set = 0; 1488 1489 rt = arm_bpf_get_reg64(src, tmp2, ctx); 1490 1491 reg_set = (1 << rt[1]) | (1 << rt[0]); 1492 emit(ARM_PUSH(reg_set), ctx); 1493 } 1494 1495 static void build_prologue(struct jit_ctx *ctx) 1496 { 1497 const s8 arm_r0 = bpf2a32[BPF_REG_0][1]; 1498 const s8 *bpf_r1 = bpf2a32[BPF_REG_1]; 1499 const s8 *bpf_fp = bpf2a32[BPF_REG_FP]; 1500 const s8 *tcc = bpf2a32[TCALL_CNT]; 1501 1502 /* Save callee saved registers. */ 1503 #ifdef CONFIG_FRAME_POINTER 1504 u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; 1505 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); 1506 emit(ARM_PUSH(reg_set), ctx); 1507 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 1508 #else 1509 emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); 1510 emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); 1511 #endif 1512 /* mov r3, #0 */ 1513 /* sub r2, sp, #SCRATCH_SIZE */ 1514 emit(ARM_MOV_I(bpf_r1[0], 0), ctx); 1515 emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx); 1516 1517 ctx->stack_size = imm8m(STACK_SIZE); 1518 1519 /* Set up function call stack */ 1520 emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); 1521 1522 /* Set up BPF prog stack base register */ 1523 emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx); 1524 1525 /* Initialize Tail Count */ 1526 emit(ARM_MOV_I(bpf_r1[1], 0), ctx); 1527 emit_a32_mov_r64(true, tcc, bpf_r1, ctx); 1528 1529 /* Move BPF_CTX to BPF_R1 */ 1530 emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx); 1531 1532 /* end of prologue */ 1533 } 1534 1535 /* restore callee saved registers. */ 1536 static void build_epilogue(struct jit_ctx *ctx) 1537 { 1538 #ifdef CONFIG_FRAME_POINTER 1539 /* When using frame pointers, some additional registers need to 1540 * be loaded. */ 1541 u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; 1542 emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); 1543 emit(ARM_LDM(ARM_SP, reg_set), ctx); 1544 #else 1545 /* Restore callee saved registers. */ 1546 emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); 1547 emit(ARM_POP(CALLEE_POP_MASK), ctx); 1548 #endif 1549 } 1550 1551 /* 1552 * Convert an eBPF instruction to native instruction, i.e 1553 * JITs an eBPF instruction. 1554 * Returns : 1555 * 0 - Successfully JITed an 8-byte eBPF instruction 1556 * >0 - Successfully JITed a 16-byte eBPF instruction 1557 * <0 - Failed to JIT. 1558 */ 1559 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) 1560 { 1561 const u8 code = insn->code; 1562 const s8 *dst = bpf2a32[insn->dst_reg]; 1563 const s8 *src = bpf2a32[insn->src_reg]; 1564 const s8 *tmp = bpf2a32[TMP_REG_1]; 1565 const s8 *tmp2 = bpf2a32[TMP_REG_2]; 1566 const s16 off = insn->off; 1567 const s32 imm = insn->imm; 1568 const int i = insn - ctx->prog->insnsi; 1569 const bool is64 = BPF_CLASS(code) == BPF_ALU64; 1570 const s8 *rd, *rs; 1571 s8 rd_lo, rt, rm, rn; 1572 s32 jmp_offset; 1573 1574 #define check_imm(bits, imm) do { \ 1575 if ((imm) >= (1 << ((bits) - 1)) || \ 1576 (imm) < -(1 << ((bits) - 1))) { \ 1577 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 1578 i, imm, imm); \ 1579 return -EINVAL; \ 1580 } \ 1581 } while (0) 1582 #define check_imm24(imm) check_imm(24, imm) 1583 1584 switch (code) { 1585 /* ALU operations */ 1586 1587 /* dst = src */ 1588 case BPF_ALU | BPF_MOV | BPF_K: 1589 case BPF_ALU | BPF_MOV | BPF_X: 1590 case BPF_ALU64 | BPF_MOV | BPF_K: 1591 case BPF_ALU64 | BPF_MOV | BPF_X: 1592 switch (BPF_SRC(code)) { 1593 case BPF_X: 1594 if (imm == 1) { 1595 /* Special mov32 for zext */ 1596 emit_a32_mov_i(dst_hi, 0, ctx); 1597 break; 1598 } 1599 if (insn->off) 1600 emit_a32_movsx_r64(is64, insn->off, dst, src, ctx); 1601 else 1602 emit_a32_mov_r64(is64, dst, src, ctx); 1603 break; 1604 case BPF_K: 1605 /* Sign-extend immediate value to destination reg */ 1606 emit_a32_mov_se_i64(is64, dst, imm, ctx); 1607 break; 1608 } 1609 break; 1610 /* dst = dst + src/imm */ 1611 /* dst = dst - src/imm */ 1612 /* dst = dst | src/imm */ 1613 /* dst = dst & src/imm */ 1614 /* dst = dst ^ src/imm */ 1615 /* dst = dst * src/imm */ 1616 /* dst = dst << src */ 1617 /* dst = dst >> src */ 1618 case BPF_ALU | BPF_ADD | BPF_K: 1619 case BPF_ALU | BPF_ADD | BPF_X: 1620 case BPF_ALU | BPF_SUB | BPF_K: 1621 case BPF_ALU | BPF_SUB | BPF_X: 1622 case BPF_ALU | BPF_OR | BPF_K: 1623 case BPF_ALU | BPF_OR | BPF_X: 1624 case BPF_ALU | BPF_AND | BPF_K: 1625 case BPF_ALU | BPF_AND | BPF_X: 1626 case BPF_ALU | BPF_XOR | BPF_K: 1627 case BPF_ALU | BPF_XOR | BPF_X: 1628 case BPF_ALU | BPF_MUL | BPF_K: 1629 case BPF_ALU | BPF_MUL | BPF_X: 1630 case BPF_ALU | BPF_LSH | BPF_X: 1631 case BPF_ALU | BPF_RSH | BPF_X: 1632 case BPF_ALU | BPF_ARSH | BPF_X: 1633 case BPF_ALU64 | BPF_ADD | BPF_K: 1634 case BPF_ALU64 | BPF_ADD | BPF_X: 1635 case BPF_ALU64 | BPF_SUB | BPF_K: 1636 case BPF_ALU64 | BPF_SUB | BPF_X: 1637 case BPF_ALU64 | BPF_OR | BPF_K: 1638 case BPF_ALU64 | BPF_OR | BPF_X: 1639 case BPF_ALU64 | BPF_AND | BPF_K: 1640 case BPF_ALU64 | BPF_AND | BPF_X: 1641 case BPF_ALU64 | BPF_XOR | BPF_K: 1642 case BPF_ALU64 | BPF_XOR | BPF_X: 1643 switch (BPF_SRC(code)) { 1644 case BPF_X: 1645 emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code)); 1646 break; 1647 case BPF_K: 1648 /* Move immediate value to the temporary register 1649 * and then do the ALU operation on the temporary 1650 * register as this will sign-extend the immediate 1651 * value into temporary reg and then it would be 1652 * safe to do the operation on it. 1653 */ 1654 emit_a32_mov_se_i64(is64, tmp2, imm, ctx); 1655 emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); 1656 break; 1657 } 1658 break; 1659 /* dst = dst / src(imm) */ 1660 /* dst = dst % src(imm) */ 1661 case BPF_ALU | BPF_DIV | BPF_K: 1662 case BPF_ALU | BPF_DIV | BPF_X: 1663 case BPF_ALU | BPF_MOD | BPF_K: 1664 case BPF_ALU | BPF_MOD | BPF_X: 1665 rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); 1666 switch (BPF_SRC(code)) { 1667 case BPF_X: 1668 rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); 1669 break; 1670 case BPF_K: 1671 rt = tmp2[0]; 1672 emit_a32_mov_i(rt, imm, ctx); 1673 break; 1674 default: 1675 rt = src_lo; 1676 break; 1677 } 1678 emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off); 1679 arm_bpf_put_reg32(dst_lo, rd_lo, ctx); 1680 if (!ctx->prog->aux->verifier_zext) 1681 emit_a32_mov_i(dst_hi, 0, ctx); 1682 break; 1683 case BPF_ALU64 | BPF_DIV | BPF_K: 1684 case BPF_ALU64 | BPF_DIV | BPF_X: 1685 case BPF_ALU64 | BPF_MOD | BPF_K: 1686 case BPF_ALU64 | BPF_MOD | BPF_X: 1687 rd = arm_bpf_get_reg64(dst, tmp2, ctx); 1688 switch (BPF_SRC(code)) { 1689 case BPF_X: 1690 rs = arm_bpf_get_reg64(src, tmp, ctx); 1691 break; 1692 case BPF_K: 1693 rs = tmp; 1694 emit_a32_mov_se_i64(is64, rs, imm, ctx); 1695 break; 1696 } 1697 emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off); 1698 arm_bpf_put_reg64(dst, rd, ctx); 1699 break; 1700 /* dst = dst << imm */ 1701 /* dst = dst >> imm */ 1702 /* dst = dst >> imm (signed) */ 1703 case BPF_ALU | BPF_LSH | BPF_K: 1704 case BPF_ALU | BPF_RSH | BPF_K: 1705 case BPF_ALU | BPF_ARSH | BPF_K: 1706 if (unlikely(imm > 31)) 1707 return -EINVAL; 1708 if (imm) 1709 emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); 1710 if (!ctx->prog->aux->verifier_zext) 1711 emit_a32_mov_i(dst_hi, 0, ctx); 1712 break; 1713 /* dst = dst << imm */ 1714 case BPF_ALU64 | BPF_LSH | BPF_K: 1715 if (unlikely(imm > 63)) 1716 return -EINVAL; 1717 emit_a32_lsh_i64(dst, imm, ctx); 1718 break; 1719 /* dst = dst >> imm */ 1720 case BPF_ALU64 | BPF_RSH | BPF_K: 1721 if (unlikely(imm > 63)) 1722 return -EINVAL; 1723 emit_a32_rsh_i64(dst, imm, ctx); 1724 break; 1725 /* dst = dst << src */ 1726 case BPF_ALU64 | BPF_LSH | BPF_X: 1727 emit_a32_lsh_r64(dst, src, ctx); 1728 break; 1729 /* dst = dst >> src */ 1730 case BPF_ALU64 | BPF_RSH | BPF_X: 1731 emit_a32_rsh_r64(dst, src, ctx); 1732 break; 1733 /* dst = dst >> src (signed) */ 1734 case BPF_ALU64 | BPF_ARSH | BPF_X: 1735 emit_a32_arsh_r64(dst, src, ctx); 1736 break; 1737 /* dst = dst >> imm (signed) */ 1738 case BPF_ALU64 | BPF_ARSH | BPF_K: 1739 if (unlikely(imm > 63)) 1740 return -EINVAL; 1741 emit_a32_arsh_i64(dst, imm, ctx); 1742 break; 1743 /* dst = ~dst */ 1744 case BPF_ALU | BPF_NEG: 1745 emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); 1746 if (!ctx->prog->aux->verifier_zext) 1747 emit_a32_mov_i(dst_hi, 0, ctx); 1748 break; 1749 /* dst = ~dst (64 bit) */ 1750 case BPF_ALU64 | BPF_NEG: 1751 emit_a32_neg64(dst, ctx); 1752 break; 1753 /* dst = dst * src/imm */ 1754 case BPF_ALU64 | BPF_MUL | BPF_X: 1755 case BPF_ALU64 | BPF_MUL | BPF_K: 1756 switch (BPF_SRC(code)) { 1757 case BPF_X: 1758 emit_a32_mul_r64(dst, src, ctx); 1759 break; 1760 case BPF_K: 1761 /* Move immediate value to the temporary register 1762 * and then do the multiplication on it as this 1763 * will sign-extend the immediate value into temp 1764 * reg then it would be safe to do the operation 1765 * on it. 1766 */ 1767 emit_a32_mov_se_i64(is64, tmp2, imm, ctx); 1768 emit_a32_mul_r64(dst, tmp2, ctx); 1769 break; 1770 } 1771 break; 1772 /* dst = htole(dst) */ 1773 /* dst = htobe(dst) */ 1774 case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ 1775 case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */ 1776 /* dst = bswap(dst) */ 1777 case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ 1778 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1779 if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64) 1780 goto emit_bswap_uxt; 1781 switch (imm) { 1782 case 16: 1783 emit_rev16(rd[1], rd[1], ctx); 1784 goto emit_bswap_uxt; 1785 case 32: 1786 emit_rev32(rd[1], rd[1], ctx); 1787 goto emit_bswap_uxt; 1788 case 64: 1789 emit_rev32(ARM_LR, rd[1], ctx); 1790 emit_rev32(rd[1], rd[0], ctx); 1791 emit(ARM_MOV_R(rd[0], ARM_LR), ctx); 1792 break; 1793 } 1794 goto exit; 1795 emit_bswap_uxt: 1796 switch (imm) { 1797 case 16: 1798 /* zero-extend 16 bits into 64 bits */ 1799 #if __LINUX_ARM_ARCH__ < 6 1800 emit_a32_mov_i(tmp2[1], 0xffff, ctx); 1801 emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx); 1802 #else /* ARMv6+ */ 1803 emit(ARM_UXTH(rd[1], rd[1]), ctx); 1804 #endif 1805 if (!ctx->prog->aux->verifier_zext) 1806 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); 1807 break; 1808 case 32: 1809 /* zero-extend 32 bits into 64 bits */ 1810 if (!ctx->prog->aux->verifier_zext) 1811 emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); 1812 break; 1813 case 64: 1814 /* nop */ 1815 break; 1816 } 1817 exit: 1818 arm_bpf_put_reg64(dst, rd, ctx); 1819 break; 1820 /* dst = imm64 */ 1821 case BPF_LD | BPF_IMM | BPF_DW: 1822 { 1823 u64 val = (u32)imm | (u64)insn[1].imm << 32; 1824 1825 emit_a32_mov_i64(dst, val, ctx); 1826 1827 return 1; 1828 } 1829 /* LDX: dst = *(size *)(src + off) */ 1830 case BPF_LDX | BPF_MEM | BPF_W: 1831 case BPF_LDX | BPF_MEM | BPF_H: 1832 case BPF_LDX | BPF_MEM | BPF_B: 1833 case BPF_LDX | BPF_MEM | BPF_DW: 1834 /* LDSX: dst = *(signed size *)(src + off) */ 1835 case BPF_LDX | BPF_MEMSX | BPF_B: 1836 case BPF_LDX | BPF_MEMSX | BPF_H: 1837 case BPF_LDX | BPF_MEMSX | BPF_W: 1838 rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); 1839 if (BPF_MODE(insn->code) == BPF_MEMSX) 1840 emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code)); 1841 else 1842 emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); 1843 break; 1844 /* speculation barrier */ 1845 case BPF_ST | BPF_NOSPEC: 1846 break; 1847 /* ST: *(size *)(dst + off) = imm */ 1848 case BPF_ST | BPF_MEM | BPF_W: 1849 case BPF_ST | BPF_MEM | BPF_H: 1850 case BPF_ST | BPF_MEM | BPF_B: 1851 case BPF_ST | BPF_MEM | BPF_DW: 1852 switch (BPF_SIZE(code)) { 1853 case BPF_DW: 1854 /* Sign-extend immediate value into temp reg */ 1855 emit_a32_mov_se_i64(true, tmp2, imm, ctx); 1856 break; 1857 case BPF_W: 1858 case BPF_H: 1859 case BPF_B: 1860 emit_a32_mov_i(tmp2[1], imm, ctx); 1861 break; 1862 } 1863 emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); 1864 break; 1865 /* Atomic ops */ 1866 case BPF_STX | BPF_ATOMIC | BPF_W: 1867 case BPF_STX | BPF_ATOMIC | BPF_DW: 1868 goto notyet; 1869 /* STX: *(size *)(dst + off) = src */ 1870 case BPF_STX | BPF_MEM | BPF_W: 1871 case BPF_STX | BPF_MEM | BPF_H: 1872 case BPF_STX | BPF_MEM | BPF_B: 1873 case BPF_STX | BPF_MEM | BPF_DW: 1874 rs = arm_bpf_get_reg64(src, tmp2, ctx); 1875 emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code)); 1876 break; 1877 /* PC += off if dst == src */ 1878 /* PC += off if dst > src */ 1879 /* PC += off if dst >= src */ 1880 /* PC += off if dst < src */ 1881 /* PC += off if dst <= src */ 1882 /* PC += off if dst != src */ 1883 /* PC += off if dst > src (signed) */ 1884 /* PC += off if dst >= src (signed) */ 1885 /* PC += off if dst < src (signed) */ 1886 /* PC += off if dst <= src (signed) */ 1887 /* PC += off if dst & src */ 1888 case BPF_JMP | BPF_JEQ | BPF_X: 1889 case BPF_JMP | BPF_JGT | BPF_X: 1890 case BPF_JMP | BPF_JGE | BPF_X: 1891 case BPF_JMP | BPF_JNE | BPF_X: 1892 case BPF_JMP | BPF_JSGT | BPF_X: 1893 case BPF_JMP | BPF_JSGE | BPF_X: 1894 case BPF_JMP | BPF_JSET | BPF_X: 1895 case BPF_JMP | BPF_JLE | BPF_X: 1896 case BPF_JMP | BPF_JLT | BPF_X: 1897 case BPF_JMP | BPF_JSLT | BPF_X: 1898 case BPF_JMP | BPF_JSLE | BPF_X: 1899 case BPF_JMP32 | BPF_JEQ | BPF_X: 1900 case BPF_JMP32 | BPF_JGT | BPF_X: 1901 case BPF_JMP32 | BPF_JGE | BPF_X: 1902 case BPF_JMP32 | BPF_JNE | BPF_X: 1903 case BPF_JMP32 | BPF_JSGT | BPF_X: 1904 case BPF_JMP32 | BPF_JSGE | BPF_X: 1905 case BPF_JMP32 | BPF_JSET | BPF_X: 1906 case BPF_JMP32 | BPF_JLE | BPF_X: 1907 case BPF_JMP32 | BPF_JLT | BPF_X: 1908 case BPF_JMP32 | BPF_JSLT | BPF_X: 1909 case BPF_JMP32 | BPF_JSLE | BPF_X: 1910 /* Setup source registers */ 1911 rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); 1912 rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); 1913 goto go_jmp; 1914 /* PC += off if dst == imm */ 1915 /* PC += off if dst > imm */ 1916 /* PC += off if dst >= imm */ 1917 /* PC += off if dst < imm */ 1918 /* PC += off if dst <= imm */ 1919 /* PC += off if dst != imm */ 1920 /* PC += off if dst > imm (signed) */ 1921 /* PC += off if dst >= imm (signed) */ 1922 /* PC += off if dst < imm (signed) */ 1923 /* PC += off if dst <= imm (signed) */ 1924 /* PC += off if dst & imm */ 1925 case BPF_JMP | BPF_JEQ | BPF_K: 1926 case BPF_JMP | BPF_JGT | BPF_K: 1927 case BPF_JMP | BPF_JGE | BPF_K: 1928 case BPF_JMP | BPF_JNE | BPF_K: 1929 case BPF_JMP | BPF_JSGT | BPF_K: 1930 case BPF_JMP | BPF_JSGE | BPF_K: 1931 case BPF_JMP | BPF_JSET | BPF_K: 1932 case BPF_JMP | BPF_JLT | BPF_K: 1933 case BPF_JMP | BPF_JLE | BPF_K: 1934 case BPF_JMP | BPF_JSLT | BPF_K: 1935 case BPF_JMP | BPF_JSLE | BPF_K: 1936 case BPF_JMP32 | BPF_JEQ | BPF_K: 1937 case BPF_JMP32 | BPF_JGT | BPF_K: 1938 case BPF_JMP32 | BPF_JGE | BPF_K: 1939 case BPF_JMP32 | BPF_JNE | BPF_K: 1940 case BPF_JMP32 | BPF_JSGT | BPF_K: 1941 case BPF_JMP32 | BPF_JSGE | BPF_K: 1942 case BPF_JMP32 | BPF_JSET | BPF_K: 1943 case BPF_JMP32 | BPF_JLT | BPF_K: 1944 case BPF_JMP32 | BPF_JLE | BPF_K: 1945 case BPF_JMP32 | BPF_JSLT | BPF_K: 1946 case BPF_JMP32 | BPF_JSLE | BPF_K: 1947 if (off == 0) 1948 break; 1949 rm = tmp2[0]; 1950 rn = tmp2[1]; 1951 /* Sign-extend immediate value */ 1952 emit_a32_mov_se_i64(true, tmp2, imm, ctx); 1953 go_jmp: 1954 /* Setup destination register */ 1955 rd = arm_bpf_get_reg64(dst, tmp, ctx); 1956 1957 /* Check for the condition */ 1958 emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code), 1959 BPF_CLASS(code) == BPF_JMP); 1960 1961 /* Setup JUMP instruction */ 1962 jmp_offset = bpf2a32_offset(i+off, i, ctx); 1963 switch (BPF_OP(code)) { 1964 case BPF_JNE: 1965 case BPF_JSET: 1966 _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx); 1967 break; 1968 case BPF_JEQ: 1969 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 1970 break; 1971 case BPF_JGT: 1972 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); 1973 break; 1974 case BPF_JGE: 1975 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 1976 break; 1977 case BPF_JSGT: 1978 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); 1979 break; 1980 case BPF_JSGE: 1981 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); 1982 break; 1983 case BPF_JLE: 1984 _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx); 1985 break; 1986 case BPF_JLT: 1987 _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx); 1988 break; 1989 case BPF_JSLT: 1990 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); 1991 break; 1992 case BPF_JSLE: 1993 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); 1994 break; 1995 } 1996 break; 1997 /* JMP OFF */ 1998 case BPF_JMP | BPF_JA: 1999 case BPF_JMP32 | BPF_JA: 2000 { 2001 if (BPF_CLASS(code) == BPF_JMP32 && imm != 0) 2002 jmp_offset = bpf2a32_offset(i + imm, i, ctx); 2003 else if (BPF_CLASS(code) == BPF_JMP && off != 0) 2004 jmp_offset = bpf2a32_offset(i + off, i, ctx); 2005 else 2006 break; 2007 2008 check_imm24(jmp_offset); 2009 emit(ARM_B(jmp_offset), ctx); 2010 break; 2011 } 2012 /* tail call */ 2013 case BPF_JMP | BPF_TAIL_CALL: 2014 if (emit_bpf_tail_call(ctx)) 2015 return -EFAULT; 2016 break; 2017 /* function call */ 2018 case BPF_JMP | BPF_CALL: 2019 { 2020 const s8 *r0 = bpf2a32[BPF_REG_0]; 2021 const s8 *r1 = bpf2a32[BPF_REG_1]; 2022 const s8 *r2 = bpf2a32[BPF_REG_2]; 2023 const s8 *r3 = bpf2a32[BPF_REG_3]; 2024 const s8 *r4 = bpf2a32[BPF_REG_4]; 2025 const s8 *r5 = bpf2a32[BPF_REG_5]; 2026 const u32 func = (u32)__bpf_call_base + (u32)imm; 2027 2028 emit_a32_mov_r64(true, r0, r1, ctx); 2029 emit_a32_mov_r64(true, r1, r2, ctx); 2030 emit_push_r64(r5, ctx); 2031 emit_push_r64(r4, ctx); 2032 emit_push_r64(r3, ctx); 2033 2034 emit_a32_mov_i(tmp[1], func, ctx); 2035 emit_blx_r(tmp[1], ctx); 2036 2037 emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean 2038 break; 2039 } 2040 /* function return */ 2041 case BPF_JMP | BPF_EXIT: 2042 /* Optimization: when last instruction is EXIT 2043 * simply fallthrough to epilogue. 2044 */ 2045 if (i == ctx->prog->len - 1) 2046 break; 2047 jmp_offset = epilogue_offset(ctx); 2048 check_imm24(jmp_offset); 2049 emit(ARM_B(jmp_offset), ctx); 2050 break; 2051 notyet: 2052 pr_info_once("*** NOT YET: opcode %02x ***\n", code); 2053 return -EFAULT; 2054 default: 2055 pr_err_once("unknown opcode %02x\n", code); 2056 return -EINVAL; 2057 } 2058 2059 if (ctx->flags & FLAG_IMM_OVERFLOW) 2060 /* 2061 * this instruction generated an overflow when 2062 * trying to access the literal pool, so 2063 * delegate this filter to the kernel interpreter. 2064 */ 2065 return -1; 2066 return 0; 2067 } 2068 2069 static int build_body(struct jit_ctx *ctx) 2070 { 2071 const struct bpf_prog *prog = ctx->prog; 2072 unsigned int i; 2073 2074 for (i = 0; i < prog->len; i++) { 2075 const struct bpf_insn *insn = &(prog->insnsi[i]); 2076 int ret; 2077 2078 ret = build_insn(insn, ctx); 2079 2080 /* It's used with loading the 64 bit immediate value. */ 2081 if (ret > 0) { 2082 i++; 2083 if (ctx->target == NULL) 2084 ctx->offsets[i] = ctx->idx; 2085 continue; 2086 } 2087 2088 if (ctx->target == NULL) 2089 ctx->offsets[i] = ctx->idx; 2090 2091 /* If unsuccesful, return with error code */ 2092 if (ret) 2093 return ret; 2094 } 2095 return 0; 2096 } 2097 2098 static int validate_code(struct jit_ctx *ctx) 2099 { 2100 int i; 2101 2102 for (i = 0; i < ctx->idx; i++) { 2103 if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF)) 2104 return -1; 2105 } 2106 2107 return 0; 2108 } 2109 2110 bool bpf_jit_needs_zext(void) 2111 { 2112 return true; 2113 } 2114 2115 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 2116 { 2117 struct bpf_prog *tmp, *orig_prog = prog; 2118 struct bpf_binary_header *header; 2119 bool tmp_blinded = false; 2120 struct jit_ctx ctx; 2121 unsigned int tmp_idx; 2122 unsigned int image_size; 2123 u8 *image_ptr; 2124 2125 /* If BPF JIT was not enabled then we must fall back to 2126 * the interpreter. 2127 */ 2128 if (!prog->jit_requested) 2129 return orig_prog; 2130 2131 /* If constant blinding was enabled and we failed during blinding 2132 * then we must fall back to the interpreter. Otherwise, we save 2133 * the new JITed code. 2134 */ 2135 tmp = bpf_jit_blind_constants(prog); 2136 2137 if (IS_ERR(tmp)) 2138 return orig_prog; 2139 if (tmp != prog) { 2140 tmp_blinded = true; 2141 prog = tmp; 2142 } 2143 2144 memset(&ctx, 0, sizeof(ctx)); 2145 ctx.prog = prog; 2146 ctx.cpu_architecture = cpu_architecture(); 2147 2148 /* Not able to allocate memory for offsets[] , then 2149 * we must fall back to the interpreter 2150 */ 2151 ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); 2152 if (ctx.offsets == NULL) { 2153 prog = orig_prog; 2154 goto out; 2155 } 2156 2157 /* 1) fake pass to find in the length of the JITed code, 2158 * to compute ctx->offsets and other context variables 2159 * needed to compute final JITed code. 2160 * Also, calculate random starting pointer/start of JITed code 2161 * which is prefixed by random number of fault instructions. 2162 * 2163 * If the first pass fails then there is no chance of it 2164 * being successful in the second pass, so just fall back 2165 * to the interpreter. 2166 */ 2167 if (build_body(&ctx)) { 2168 prog = orig_prog; 2169 goto out_off; 2170 } 2171 2172 tmp_idx = ctx.idx; 2173 build_prologue(&ctx); 2174 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 2175 2176 ctx.epilogue_offset = ctx.idx; 2177 2178 #if __LINUX_ARM_ARCH__ < 7 2179 tmp_idx = ctx.idx; 2180 build_epilogue(&ctx); 2181 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 2182 2183 ctx.idx += ctx.imm_count; 2184 if (ctx.imm_count) { 2185 ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); 2186 if (ctx.imms == NULL) { 2187 prog = orig_prog; 2188 goto out_off; 2189 } 2190 } 2191 #else 2192 /* there's nothing about the epilogue on ARMv7 */ 2193 build_epilogue(&ctx); 2194 #endif 2195 /* Now we can get the actual image size of the JITed arm code. 2196 * Currently, we are not considering the THUMB-2 instructions 2197 * for jit, although it can decrease the size of the image. 2198 * 2199 * As each arm instruction is of length 32bit, we are translating 2200 * number of JITed instructions into the size required to store these 2201 * JITed code. 2202 */ 2203 image_size = sizeof(u32) * ctx.idx; 2204 2205 /* Now we know the size of the structure to make */ 2206 header = bpf_jit_binary_alloc(image_size, &image_ptr, 2207 sizeof(u32), jit_fill_hole); 2208 /* Not able to allocate memory for the structure then 2209 * we must fall back to the interpretation 2210 */ 2211 if (header == NULL) { 2212 prog = orig_prog; 2213 goto out_imms; 2214 } 2215 2216 /* 2.) Actual pass to generate final JIT code */ 2217 ctx.target = (u32 *) image_ptr; 2218 ctx.idx = 0; 2219 2220 build_prologue(&ctx); 2221 2222 /* If building the body of the JITed code fails somehow, 2223 * we fall back to the interpretation. 2224 */ 2225 if (build_body(&ctx) < 0) { 2226 image_ptr = NULL; 2227 bpf_jit_binary_free(header); 2228 prog = orig_prog; 2229 goto out_imms; 2230 } 2231 build_epilogue(&ctx); 2232 2233 /* 3.) Extra pass to validate JITed Code */ 2234 if (validate_code(&ctx)) { 2235 image_ptr = NULL; 2236 bpf_jit_binary_free(header); 2237 prog = orig_prog; 2238 goto out_imms; 2239 } 2240 flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); 2241 2242 if (bpf_jit_enable > 1) 2243 /* there are 2 passes here */ 2244 bpf_jit_dump(prog->len, image_size, 2, ctx.target); 2245 2246 bpf_jit_binary_lock_ro(header); 2247 prog->bpf_func = (void *)ctx.target; 2248 prog->jited = 1; 2249 prog->jited_len = image_size; 2250 2251 out_imms: 2252 #if __LINUX_ARM_ARCH__ < 7 2253 if (ctx.imm_count) 2254 kfree(ctx.imms); 2255 #endif 2256 out_off: 2257 kfree(ctx.offsets); 2258 out: 2259 if (tmp_blinded) 2260 bpf_jit_prog_release_other(prog, prog == orig_prog ? 2261 tmp : orig_prog); 2262 return prog; 2263 } 2264 2265