1 /* 2 * Just-In-Time compiler for eBPF filters on 32bit ARM 3 * 4 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> 5 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the 9 * Free Software Foundation; version 2 of the License. 10 */ 11 12 #include <linux/bpf.h> 13 #include <linux/bitops.h> 14 #include <linux/compiler.h> 15 #include <linux/errno.h> 16 #include <linux/filter.h> 17 #include <linux/netdevice.h> 18 #include <linux/string.h> 19 #include <linux/slab.h> 20 #include <linux/if_vlan.h> 21 22 #include <asm/cacheflush.h> 23 #include <asm/hwcap.h> 24 #include <asm/opcodes.h> 25 26 #include "bpf_jit_32.h" 27 28 /* 29 * eBPF prog stack layout: 30 * 31 * high 32 * original ARM_SP => +-----+ 33 * | | callee saved registers 34 * +-----+ <= (BPF_FP + SCRATCH_SIZE) 35 * | ... | eBPF JIT scratch space 36 * eBPF fp register => +-----+ 37 * (BPF_FP) | ... | eBPF prog stack 38 * +-----+ 39 * |RSVD | JIT scratchpad 40 * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) 41 * | | 42 * | ... | Function call stack 43 * | | 44 * +-----+ 45 * low 46 * 47 * The callee saved registers depends on whether frame pointers are enabled. 48 * With frame pointers (to be compliant with the ABI): 49 * 50 * high 51 * original ARM_SP => +------------------+ \ 52 * | pc | | 53 * current ARM_FP => +------------------+ } callee saved registers 54 * |r4-r8,r10,fp,ip,lr| | 55 * +------------------+ / 56 * low 57 * 58 * Without frame pointers: 59 * 60 * high 61 * original ARM_SP => +------------------+ 62 * | r4-r8,r10,fp,lr | callee saved registers 63 * current ARM_FP => +------------------+ 64 * low 65 * 66 * When popping registers off the stack at the end of a BPF function, we 67 * reference them via the current ARM_FP register. 68 */ 69 #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 70 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ 71 1 << ARM_FP) 72 #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) 73 #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) 74 75 #define STACK_OFFSET(k) (k) 76 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ 77 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ 78 #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ 79 80 #define FLAG_IMM_OVERFLOW (1 << 0) 81 82 /* 83 * Map eBPF registers to ARM 32bit registers or stack scratch space. 84 * 85 * 1. First argument is passed using the arm 32bit registers and rest of the 86 * arguments are passed on stack scratch space. 87 * 2. First callee-saved argument is mapped to arm 32 bit registers and rest 88 * arguments are mapped to scratch space on stack. 89 * 3. We need two 64 bit temp registers to do complex operations on eBPF 90 * registers. 91 * 92 * As the eBPF registers are all 64 bit registers and arm has only 32 bit 93 * registers, we have to map each eBPF registers with two arm 32 bit regs or 94 * scratch memory space and we have to build eBPF 64 bit register from those. 95 * 96 */ 97 static const u8 bpf2a32[][2] = { 98 /* return value from in-kernel function, and exit value from eBPF */ 99 [BPF_REG_0] = {ARM_R1, ARM_R0}, 100 /* arguments from eBPF program to in-kernel function */ 101 [BPF_REG_1] = {ARM_R3, ARM_R2}, 102 /* Stored on stack scratch space */ 103 [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, 104 [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, 105 [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, 106 [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, 107 /* callee saved registers that in-kernel function will preserve */ 108 [BPF_REG_6] = {ARM_R5, ARM_R4}, 109 /* Stored on stack scratch space */ 110 [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, 111 [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, 112 [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, 113 /* Read only Frame Pointer to access Stack */ 114 [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, 115 /* Temporary Register for internal BPF JIT, can be used 116 * for constant blindings and others. 117 */ 118 [TMP_REG_1] = {ARM_R7, ARM_R6}, 119 [TMP_REG_2] = {ARM_R10, ARM_R8}, 120 /* Tail call count. Stored on stack scratch space. */ 121 [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, 122 /* temporary register for blinding constants. 123 * Stored on stack scratch space. 124 */ 125 [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, 126 }; 127 128 #define dst_lo dst[1] 129 #define dst_hi dst[0] 130 #define src_lo src[1] 131 #define src_hi src[0] 132 133 /* 134 * JIT Context: 135 * 136 * prog : bpf_prog 137 * idx : index of current last JITed instruction. 138 * prologue_bytes : bytes used in prologue. 139 * epilogue_offset : offset of epilogue starting. 140 * offsets : array of eBPF instruction offsets in 141 * JITed code. 142 * target : final JITed code. 143 * epilogue_bytes : no of bytes used in epilogue. 144 * imm_count : no of immediate counts used for global 145 * variables. 146 * imms : array of global variable addresses. 147 */ 148 149 struct jit_ctx { 150 const struct bpf_prog *prog; 151 unsigned int idx; 152 unsigned int prologue_bytes; 153 unsigned int epilogue_offset; 154 u32 flags; 155 u32 *offsets; 156 u32 *target; 157 u32 stack_size; 158 #if __LINUX_ARM_ARCH__ < 7 159 u16 epilogue_bytes; 160 u16 imm_count; 161 u32 *imms; 162 #endif 163 }; 164 165 /* 166 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking 167 * (where the assembly routines like __aeabi_uidiv could cause problems). 168 */ 169 static u32 jit_udiv32(u32 dividend, u32 divisor) 170 { 171 return dividend / divisor; 172 } 173 174 static u32 jit_mod32(u32 dividend, u32 divisor) 175 { 176 return dividend % divisor; 177 } 178 179 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 180 { 181 inst |= (cond << 28); 182 inst = __opcode_to_mem_arm(inst); 183 184 if (ctx->target != NULL) 185 ctx->target[ctx->idx] = inst; 186 187 ctx->idx++; 188 } 189 190 /* 191 * Emit an instruction that will be executed unconditionally. 192 */ 193 static inline void emit(u32 inst, struct jit_ctx *ctx) 194 { 195 _emit(ARM_COND_AL, inst, ctx); 196 } 197 198 /* 199 * Checks if immediate value can be converted to imm12(12 bits) value. 200 */ 201 static int16_t imm8m(u32 x) 202 { 203 u32 rot; 204 205 for (rot = 0; rot < 16; rot++) 206 if ((x & ~ror32(0xff, 2 * rot)) == 0) 207 return rol32(x, 2 * rot) | (rot << 8); 208 return -1; 209 } 210 211 /* 212 * Initializes the JIT space with undefined instructions. 213 */ 214 static void jit_fill_hole(void *area, unsigned int size) 215 { 216 u32 *ptr; 217 /* We are guaranteed to have aligned memory. */ 218 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 219 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); 220 } 221 222 #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) 223 /* EABI requires the stack to be aligned to 64-bit boundaries */ 224 #define STACK_ALIGNMENT 8 225 #else 226 /* Stack must be aligned to 32-bit boundaries */ 227 #define STACK_ALIGNMENT 4 228 #endif 229 230 /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, 231 * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, 232 * BPF_REG_FP and Tail call counts. 233 */ 234 #define SCRATCH_SIZE 80 235 236 /* total stack size used in JITed code */ 237 #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) 238 #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) 239 240 /* Get the offset of eBPF REGISTERs stored on scratch space. */ 241 #define STACK_VAR(off) (STACK_SIZE - off) 242 243 #if __LINUX_ARM_ARCH__ < 7 244 245 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 246 { 247 unsigned int i = 0, offset; 248 u16 imm; 249 250 /* on the "fake" run we just count them (duplicates included) */ 251 if (ctx->target == NULL) { 252 ctx->imm_count++; 253 return 0; 254 } 255 256 while ((i < ctx->imm_count) && ctx->imms[i]) { 257 if (ctx->imms[i] == k) 258 break; 259 i++; 260 } 261 262 if (ctx->imms[i] == 0) 263 ctx->imms[i] = k; 264 265 /* constants go just after the epilogue */ 266 offset = ctx->offsets[ctx->prog->len - 1] * 4; 267 offset += ctx->prologue_bytes; 268 offset += ctx->epilogue_bytes; 269 offset += i * 4; 270 271 ctx->target[offset / 4] = k; 272 273 /* PC in ARM mode == address of the instruction + 8 */ 274 imm = offset - (8 + ctx->idx * 4); 275 276 if (imm & ~0xfff) { 277 /* 278 * literal pool is too far, signal it into flags. we 279 * can only detect it on the second pass unfortunately. 280 */ 281 ctx->flags |= FLAG_IMM_OVERFLOW; 282 return 0; 283 } 284 285 return imm; 286 } 287 288 #endif /* __LINUX_ARM_ARCH__ */ 289 290 static inline int bpf2a32_offset(int bpf_to, int bpf_from, 291 const struct jit_ctx *ctx) { 292 int to, from; 293 294 if (ctx->target == NULL) 295 return 0; 296 to = ctx->offsets[bpf_to]; 297 from = ctx->offsets[bpf_from]; 298 299 return to - from - 1; 300 } 301 302 /* 303 * Move an immediate that's not an imm8m to a core register. 304 */ 305 static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) 306 { 307 #if __LINUX_ARM_ARCH__ < 7 308 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 309 #else 310 emit(ARM_MOVW(rd, val & 0xffff), ctx); 311 if (val > 0xffff) 312 emit(ARM_MOVT(rd, val >> 16), ctx); 313 #endif 314 } 315 316 static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) 317 { 318 int imm12 = imm8m(val); 319 320 if (imm12 >= 0) 321 emit(ARM_MOV_I(rd, imm12), ctx); 322 else 323 emit_mov_i_no8m(rd, val, ctx); 324 } 325 326 static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) 327 { 328 if (elf_hwcap & HWCAP_THUMB) 329 emit(ARM_BX(tgt_reg), ctx); 330 else 331 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 332 } 333 334 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 335 { 336 #if __LINUX_ARM_ARCH__ < 5 337 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 338 emit_bx_r(tgt_reg, ctx); 339 #else 340 emit(ARM_BLX_R(tgt_reg), ctx); 341 #endif 342 } 343 344 static inline int epilogue_offset(const struct jit_ctx *ctx) 345 { 346 int to, from; 347 /* No need for 1st dummy run */ 348 if (ctx->target == NULL) 349 return 0; 350 to = ctx->epilogue_offset; 351 from = ctx->idx; 352 353 return to - from - 2; 354 } 355 356 static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) 357 { 358 const u8 *tmp = bpf2a32[TMP_REG_1]; 359 360 #if __LINUX_ARM_ARCH__ == 7 361 if (elf_hwcap & HWCAP_IDIVA) { 362 if (op == BPF_DIV) 363 emit(ARM_UDIV(rd, rm, rn), ctx); 364 else { 365 emit(ARM_UDIV(ARM_IP, rm, rn), ctx); 366 emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); 367 } 368 return; 369 } 370 #endif 371 372 /* 373 * For BPF_ALU | BPF_DIV | BPF_K instructions 374 * As ARM_R1 and ARM_R0 contains 1st argument of bpf 375 * function, we need to save it on caller side to save 376 * it from getting destroyed within callee. 377 * After the return from the callee, we restore ARM_R0 378 * ARM_R1. 379 */ 380 if (rn != ARM_R1) { 381 emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); 382 emit(ARM_MOV_R(ARM_R1, rn), ctx); 383 } 384 if (rm != ARM_R0) { 385 emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); 386 emit(ARM_MOV_R(ARM_R0, rm), ctx); 387 } 388 389 /* Call appropriate function */ 390 emit_mov_i(ARM_IP, op == BPF_DIV ? 391 (u32)jit_udiv32 : (u32)jit_mod32, ctx); 392 emit_blx_r(ARM_IP, ctx); 393 394 /* Save return value */ 395 if (rd != ARM_R0) 396 emit(ARM_MOV_R(rd, ARM_R0), ctx); 397 398 /* Restore ARM_R0 and ARM_R1 */ 399 if (rn != ARM_R1) 400 emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); 401 if (rm != ARM_R0) 402 emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); 403 } 404 405 /* Checks whether BPF register is on scratch stack space or not. */ 406 static inline bool is_on_stack(u8 bpf_reg) 407 { 408 static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, 409 BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, 410 BPF_REG_2, BPF_REG_FP}; 411 int i, reg_len = sizeof(stack_regs); 412 413 for (i = 0 ; i < reg_len ; i++) { 414 if (bpf_reg == stack_regs[i]) 415 return true; 416 } 417 return false; 418 } 419 420 static inline void emit_a32_mov_i(const u8 dst, const u32 val, 421 bool dstk, struct jit_ctx *ctx) 422 { 423 const u8 *tmp = bpf2a32[TMP_REG_1]; 424 425 if (dstk) { 426 emit_mov_i(tmp[1], val, ctx); 427 emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); 428 } else { 429 emit_mov_i(dst, val, ctx); 430 } 431 } 432 433 /* Sign extended move */ 434 static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], 435 const u32 val, bool dstk, 436 struct jit_ctx *ctx) { 437 u32 hi = 0; 438 439 if (is64 && (val & (1<<31))) 440 hi = (u32)~0; 441 emit_a32_mov_i(dst_lo, val, dstk, ctx); 442 emit_a32_mov_i(dst_hi, hi, dstk, ctx); 443 } 444 445 static inline void emit_a32_add_r(const u8 dst, const u8 src, 446 const bool is64, const bool hi, 447 struct jit_ctx *ctx) { 448 /* 64 bit : 449 * adds dst_lo, dst_lo, src_lo 450 * adc dst_hi, dst_hi, src_hi 451 * 32 bit : 452 * add dst_lo, dst_lo, src_lo 453 */ 454 if (!hi && is64) 455 emit(ARM_ADDS_R(dst, dst, src), ctx); 456 else if (hi && is64) 457 emit(ARM_ADC_R(dst, dst, src), ctx); 458 else 459 emit(ARM_ADD_R(dst, dst, src), ctx); 460 } 461 462 static inline void emit_a32_sub_r(const u8 dst, const u8 src, 463 const bool is64, const bool hi, 464 struct jit_ctx *ctx) { 465 /* 64 bit : 466 * subs dst_lo, dst_lo, src_lo 467 * sbc dst_hi, dst_hi, src_hi 468 * 32 bit : 469 * sub dst_lo, dst_lo, src_lo 470 */ 471 if (!hi && is64) 472 emit(ARM_SUBS_R(dst, dst, src), ctx); 473 else if (hi && is64) 474 emit(ARM_SBC_R(dst, dst, src), ctx); 475 else 476 emit(ARM_SUB_R(dst, dst, src), ctx); 477 } 478 479 static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, 480 const bool hi, const u8 op, struct jit_ctx *ctx){ 481 switch (BPF_OP(op)) { 482 /* dst = dst + src */ 483 case BPF_ADD: 484 emit_a32_add_r(dst, src, is64, hi, ctx); 485 break; 486 /* dst = dst - src */ 487 case BPF_SUB: 488 emit_a32_sub_r(dst, src, is64, hi, ctx); 489 break; 490 /* dst = dst | src */ 491 case BPF_OR: 492 emit(ARM_ORR_R(dst, dst, src), ctx); 493 break; 494 /* dst = dst & src */ 495 case BPF_AND: 496 emit(ARM_AND_R(dst, dst, src), ctx); 497 break; 498 /* dst = dst ^ src */ 499 case BPF_XOR: 500 emit(ARM_EOR_R(dst, dst, src), ctx); 501 break; 502 /* dst = dst * src */ 503 case BPF_MUL: 504 emit(ARM_MUL(dst, dst, src), ctx); 505 break; 506 /* dst = dst << src */ 507 case BPF_LSH: 508 emit(ARM_LSL_R(dst, dst, src), ctx); 509 break; 510 /* dst = dst >> src */ 511 case BPF_RSH: 512 emit(ARM_LSR_R(dst, dst, src), ctx); 513 break; 514 /* dst = dst >> src (signed)*/ 515 case BPF_ARSH: 516 emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); 517 break; 518 } 519 } 520 521 /* ALU operation (32 bit) 522 * dst = dst (op) src 523 */ 524 static inline void emit_a32_alu_r(const u8 dst, const u8 src, 525 bool dstk, bool sstk, 526 struct jit_ctx *ctx, const bool is64, 527 const bool hi, const u8 op) { 528 const u8 *tmp = bpf2a32[TMP_REG_1]; 529 u8 rn = sstk ? tmp[1] : src; 530 531 if (sstk) 532 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); 533 534 /* ALU operation */ 535 if (dstk) { 536 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); 537 emit_alu_r(tmp[0], rn, is64, hi, op, ctx); 538 emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); 539 } else { 540 emit_alu_r(dst, rn, is64, hi, op, ctx); 541 } 542 } 543 544 /* ALU operation (64 bit) */ 545 static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], 546 const u8 src[], bool dstk, 547 bool sstk, struct jit_ctx *ctx, 548 const u8 op) { 549 emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); 550 if (is64) 551 emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); 552 else 553 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 554 } 555 556 /* dst = imm (4 bytes)*/ 557 static inline void emit_a32_mov_r(const u8 dst, const u8 src, 558 bool dstk, bool sstk, 559 struct jit_ctx *ctx) { 560 const u8 *tmp = bpf2a32[TMP_REG_1]; 561 u8 rt = sstk ? tmp[0] : src; 562 563 if (sstk) 564 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); 565 if (dstk) 566 emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); 567 else 568 emit(ARM_MOV_R(dst, rt), ctx); 569 } 570 571 /* dst = src */ 572 static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], 573 const u8 src[], bool dstk, 574 bool sstk, struct jit_ctx *ctx) { 575 emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); 576 if (is64) { 577 /* complete 8 byte move */ 578 emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); 579 } else { 580 /* Zero out high 4 bytes */ 581 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 582 } 583 } 584 585 /* Shift operations */ 586 static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, 587 struct jit_ctx *ctx, const u8 op) { 588 const u8 *tmp = bpf2a32[TMP_REG_1]; 589 u8 rd = dstk ? tmp[0] : dst; 590 591 if (dstk) 592 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 593 594 /* Do shift operation */ 595 switch (op) { 596 case BPF_LSH: 597 emit(ARM_LSL_I(rd, rd, val), ctx); 598 break; 599 case BPF_RSH: 600 emit(ARM_LSR_I(rd, rd, val), ctx); 601 break; 602 case BPF_NEG: 603 emit(ARM_RSB_I(rd, rd, val), ctx); 604 break; 605 } 606 607 if (dstk) 608 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 609 } 610 611 /* dst = ~dst (64 bit) */ 612 static inline void emit_a32_neg64(const u8 dst[], bool dstk, 613 struct jit_ctx *ctx){ 614 const u8 *tmp = bpf2a32[TMP_REG_1]; 615 u8 rd = dstk ? tmp[1] : dst[1]; 616 u8 rm = dstk ? tmp[0] : dst[0]; 617 618 /* Setup Operand */ 619 if (dstk) { 620 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 621 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 622 } 623 624 /* Do Negate Operation */ 625 emit(ARM_RSBS_I(rd, rd, 0), ctx); 626 emit(ARM_RSC_I(rm, rm, 0), ctx); 627 628 if (dstk) { 629 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 630 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 631 } 632 } 633 634 /* dst = dst << src */ 635 static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, 636 bool sstk, struct jit_ctx *ctx) { 637 const u8 *tmp = bpf2a32[TMP_REG_1]; 638 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 639 640 /* Setup Operands */ 641 u8 rt = sstk ? tmp2[1] : src_lo; 642 u8 rd = dstk ? tmp[1] : dst_lo; 643 u8 rm = dstk ? tmp[0] : dst_hi; 644 645 if (sstk) 646 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 647 if (dstk) { 648 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 649 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 650 } 651 652 /* Do LSH operation */ 653 emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); 654 emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); 655 emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); 656 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); 657 emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); 658 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); 659 660 if (dstk) { 661 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); 662 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); 663 } else { 664 emit(ARM_MOV_R(rd, ARM_LR), ctx); 665 emit(ARM_MOV_R(rm, ARM_IP), ctx); 666 } 667 } 668 669 /* dst = dst >> src (signed)*/ 670 static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, 671 bool sstk, struct jit_ctx *ctx) { 672 const u8 *tmp = bpf2a32[TMP_REG_1]; 673 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 674 /* Setup Operands */ 675 u8 rt = sstk ? tmp2[1] : src_lo; 676 u8 rd = dstk ? tmp[1] : dst_lo; 677 u8 rm = dstk ? tmp[0] : dst_hi; 678 679 if (sstk) 680 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 681 if (dstk) { 682 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 683 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 684 } 685 686 /* Do the ARSH operation */ 687 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); 688 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); 689 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); 690 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); 691 _emit(ARM_COND_MI, ARM_B(0), ctx); 692 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); 693 emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); 694 if (dstk) { 695 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); 696 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); 697 } else { 698 emit(ARM_MOV_R(rd, ARM_LR), ctx); 699 emit(ARM_MOV_R(rm, ARM_IP), ctx); 700 } 701 } 702 703 /* dst = dst >> src */ 704 static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, 705 bool sstk, struct jit_ctx *ctx) { 706 const u8 *tmp = bpf2a32[TMP_REG_1]; 707 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 708 /* Setup Operands */ 709 u8 rt = sstk ? tmp2[1] : src_lo; 710 u8 rd = dstk ? tmp[1] : dst_lo; 711 u8 rm = dstk ? tmp[0] : dst_hi; 712 713 if (sstk) 714 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 715 if (dstk) { 716 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 717 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 718 } 719 720 /* Do RSH operation */ 721 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); 722 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); 723 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); 724 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); 725 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); 726 emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); 727 if (dstk) { 728 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); 729 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); 730 } else { 731 emit(ARM_MOV_R(rd, ARM_LR), ctx); 732 emit(ARM_MOV_R(rm, ARM_IP), ctx); 733 } 734 } 735 736 /* dst = dst << val */ 737 static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, 738 const u32 val, struct jit_ctx *ctx){ 739 const u8 *tmp = bpf2a32[TMP_REG_1]; 740 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 741 /* Setup operands */ 742 u8 rd = dstk ? tmp[1] : dst_lo; 743 u8 rm = dstk ? tmp[0] : dst_hi; 744 745 if (dstk) { 746 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 747 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 748 } 749 750 /* Do LSH operation */ 751 if (val < 32) { 752 emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); 753 emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); 754 emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); 755 } else { 756 if (val == 32) 757 emit(ARM_MOV_R(rm, rd), ctx); 758 else 759 emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); 760 emit(ARM_EOR_R(rd, rd, rd), ctx); 761 } 762 763 if (dstk) { 764 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 765 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 766 } 767 } 768 769 /* dst = dst >> val */ 770 static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, 771 const u32 val, struct jit_ctx *ctx) { 772 const u8 *tmp = bpf2a32[TMP_REG_1]; 773 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 774 /* Setup operands */ 775 u8 rd = dstk ? tmp[1] : dst_lo; 776 u8 rm = dstk ? tmp[0] : dst_hi; 777 778 if (dstk) { 779 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 780 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 781 } 782 783 /* Do LSR operation */ 784 if (val < 32) { 785 emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); 786 emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); 787 emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); 788 } else if (val == 32) { 789 emit(ARM_MOV_R(rd, rm), ctx); 790 emit(ARM_MOV_I(rm, 0), ctx); 791 } else { 792 emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); 793 emit(ARM_MOV_I(rm, 0), ctx); 794 } 795 796 if (dstk) { 797 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 798 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 799 } 800 } 801 802 /* dst = dst >> val (signed) */ 803 static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, 804 const u32 val, struct jit_ctx *ctx){ 805 const u8 *tmp = bpf2a32[TMP_REG_1]; 806 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 807 /* Setup operands */ 808 u8 rd = dstk ? tmp[1] : dst_lo; 809 u8 rm = dstk ? tmp[0] : dst_hi; 810 811 if (dstk) { 812 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 813 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 814 } 815 816 /* Do ARSH operation */ 817 if (val < 32) { 818 emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); 819 emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); 820 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); 821 } else if (val == 32) { 822 emit(ARM_MOV_R(rd, rm), ctx); 823 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); 824 } else { 825 emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx); 826 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); 827 } 828 829 if (dstk) { 830 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 831 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 832 } 833 } 834 835 static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, 836 bool sstk, struct jit_ctx *ctx) { 837 const u8 *tmp = bpf2a32[TMP_REG_1]; 838 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 839 /* Setup operands for multiplication */ 840 u8 rd = dstk ? tmp[1] : dst_lo; 841 u8 rm = dstk ? tmp[0] : dst_hi; 842 u8 rt = sstk ? tmp2[1] : src_lo; 843 u8 rn = sstk ? tmp2[0] : src_hi; 844 845 if (dstk) { 846 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 847 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 848 } 849 if (sstk) { 850 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 851 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); 852 } 853 854 /* Do Multiplication */ 855 emit(ARM_MUL(ARM_IP, rd, rn), ctx); 856 emit(ARM_MUL(ARM_LR, rm, rt), ctx); 857 emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); 858 859 emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); 860 emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); 861 if (dstk) { 862 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); 863 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 864 } else { 865 emit(ARM_MOV_R(rd, ARM_IP), ctx); 866 } 867 } 868 869 /* *(size *)(dst + off) = src */ 870 static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, 871 const s32 off, struct jit_ctx *ctx, const u8 sz){ 872 const u8 *tmp = bpf2a32[TMP_REG_1]; 873 u8 rd = dstk ? tmp[1] : dst; 874 875 if (dstk) 876 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 877 if (off) { 878 emit_a32_mov_i(tmp[0], off, false, ctx); 879 emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); 880 rd = tmp[0]; 881 } 882 switch (sz) { 883 case BPF_W: 884 /* Store a Word */ 885 emit(ARM_STR_I(src, rd, 0), ctx); 886 break; 887 case BPF_H: 888 /* Store a HalfWord */ 889 emit(ARM_STRH_I(src, rd, 0), ctx); 890 break; 891 case BPF_B: 892 /* Store a Byte */ 893 emit(ARM_STRB_I(src, rd, 0), ctx); 894 break; 895 } 896 } 897 898 /* dst = *(size*)(src + off) */ 899 static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, 900 s32 off, struct jit_ctx *ctx, const u8 sz){ 901 const u8 *tmp = bpf2a32[TMP_REG_1]; 902 const u8 *rd = dstk ? tmp : dst; 903 u8 rm = src; 904 s32 off_max; 905 906 if (sz == BPF_H) 907 off_max = 0xff; 908 else 909 off_max = 0xfff; 910 911 if (off < 0 || off > off_max) { 912 emit_a32_mov_i(tmp[0], off, false, ctx); 913 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); 914 rm = tmp[0]; 915 off = 0; 916 } else if (rd[1] == rm) { 917 emit(ARM_MOV_R(tmp[0], rm), ctx); 918 rm = tmp[0]; 919 } 920 switch (sz) { 921 case BPF_B: 922 /* Load a Byte */ 923 emit(ARM_LDRB_I(rd[1], rm, off), ctx); 924 emit_a32_mov_i(dst[0], 0, dstk, ctx); 925 break; 926 case BPF_H: 927 /* Load a HalfWord */ 928 emit(ARM_LDRH_I(rd[1], rm, off), ctx); 929 emit_a32_mov_i(dst[0], 0, dstk, ctx); 930 break; 931 case BPF_W: 932 /* Load a Word */ 933 emit(ARM_LDR_I(rd[1], rm, off), ctx); 934 emit_a32_mov_i(dst[0], 0, dstk, ctx); 935 break; 936 case BPF_DW: 937 /* Load a Double Word */ 938 emit(ARM_LDR_I(rd[1], rm, off), ctx); 939 emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); 940 break; 941 } 942 if (dstk) 943 emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); 944 if (dstk && sz == BPF_DW) 945 emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); 946 } 947 948 /* Arithmatic Operation */ 949 static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, 950 const u8 rn, struct jit_ctx *ctx, u8 op) { 951 switch (op) { 952 case BPF_JSET: 953 emit(ARM_AND_R(ARM_IP, rt, rn), ctx); 954 emit(ARM_AND_R(ARM_LR, rd, rm), ctx); 955 emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); 956 break; 957 case BPF_JEQ: 958 case BPF_JNE: 959 case BPF_JGT: 960 case BPF_JGE: 961 case BPF_JLE: 962 case BPF_JLT: 963 emit(ARM_CMP_R(rd, rm), ctx); 964 _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); 965 break; 966 case BPF_JSLE: 967 case BPF_JSGT: 968 emit(ARM_CMP_R(rn, rt), ctx); 969 emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); 970 break; 971 case BPF_JSLT: 972 case BPF_JSGE: 973 emit(ARM_CMP_R(rt, rn), ctx); 974 emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); 975 break; 976 } 977 } 978 979 static int out_offset = -1; /* initialized on the first pass of build_body() */ 980 static int emit_bpf_tail_call(struct jit_ctx *ctx) 981 { 982 983 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 984 const u8 *r2 = bpf2a32[BPF_REG_2]; 985 const u8 *r3 = bpf2a32[BPF_REG_3]; 986 const u8 *tmp = bpf2a32[TMP_REG_1]; 987 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 988 const u8 *tcc = bpf2a32[TCALL_CNT]; 989 const int idx0 = ctx->idx; 990 #define cur_offset (ctx->idx - idx0) 991 #define jmp_offset (out_offset - (cur_offset) - 2) 992 u32 off, lo, hi; 993 994 /* if (index >= array->map.max_entries) 995 * goto out; 996 */ 997 off = offsetof(struct bpf_array, map.max_entries); 998 /* array->map.max_entries */ 999 emit_a32_mov_i(tmp[1], off, false, ctx); 1000 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); 1001 emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); 1002 /* index is 32-bit for arrays */ 1003 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); 1004 /* index >= array->map.max_entries */ 1005 emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); 1006 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 1007 1008 /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) 1009 * goto out; 1010 * tail_call_cnt++; 1011 */ 1012 lo = (u32)MAX_TAIL_CALL_CNT; 1013 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); 1014 emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); 1015 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); 1016 emit(ARM_CMP_I(tmp[0], hi), ctx); 1017 _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx); 1018 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); 1019 emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx); 1020 emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx); 1021 emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); 1022 emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); 1023 1024 /* prog = array->ptrs[index] 1025 * if (prog == NULL) 1026 * goto out; 1027 */ 1028 off = offsetof(struct bpf_array, ptrs); 1029 emit_a32_mov_i(tmp[1], off, false, ctx); 1030 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); 1031 emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); 1032 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); 1033 emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx); 1034 emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); 1035 emit(ARM_CMP_I(tmp[1], 0), ctx); 1036 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 1037 1038 /* goto *(prog->bpf_func + prologue_size); */ 1039 off = offsetof(struct bpf_prog, bpf_func); 1040 emit_a32_mov_i(tmp2[1], off, false, ctx); 1041 emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); 1042 emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); 1043 emit_bx_r(tmp[1], ctx); 1044 1045 /* out: */ 1046 if (out_offset == -1) 1047 out_offset = cur_offset; 1048 if (cur_offset != out_offset) { 1049 pr_err_once("tail_call out_offset = %d, expected %d!\n", 1050 cur_offset, out_offset); 1051 return -1; 1052 } 1053 return 0; 1054 #undef cur_offset 1055 #undef jmp_offset 1056 } 1057 1058 /* 0xabcd => 0xcdab */ 1059 static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) 1060 { 1061 #if __LINUX_ARM_ARCH__ < 6 1062 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1063 1064 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); 1065 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); 1066 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); 1067 emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx); 1068 #else /* ARMv6+ */ 1069 emit(ARM_REV16(rd, rn), ctx); 1070 #endif 1071 } 1072 1073 /* 0xabcdefgh => 0xghefcdab */ 1074 static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) 1075 { 1076 #if __LINUX_ARM_ARCH__ < 6 1077 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1078 1079 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); 1080 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); 1081 emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx); 1082 1083 emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx); 1084 emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx); 1085 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx); 1086 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); 1087 emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx); 1088 emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx); 1089 emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx); 1090 1091 #else /* ARMv6+ */ 1092 emit(ARM_REV(rd, rn), ctx); 1093 #endif 1094 } 1095 1096 // push the scratch stack register on top of the stack 1097 static inline void emit_push_r64(const u8 src[], const u8 shift, 1098 struct jit_ctx *ctx) 1099 { 1100 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1101 u16 reg_set = 0; 1102 1103 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); 1104 emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx); 1105 1106 reg_set = (1 << tmp2[1]) | (1 << tmp2[0]); 1107 emit(ARM_PUSH(reg_set), ctx); 1108 } 1109 1110 static void build_prologue(struct jit_ctx *ctx) 1111 { 1112 const u8 r0 = bpf2a32[BPF_REG_0][1]; 1113 const u8 r2 = bpf2a32[BPF_REG_1][1]; 1114 const u8 r3 = bpf2a32[BPF_REG_1][0]; 1115 const u8 r4 = bpf2a32[BPF_REG_6][1]; 1116 const u8 fplo = bpf2a32[BPF_REG_FP][1]; 1117 const u8 fphi = bpf2a32[BPF_REG_FP][0]; 1118 const u8 *tcc = bpf2a32[TCALL_CNT]; 1119 1120 /* Save callee saved registers. */ 1121 #ifdef CONFIG_FRAME_POINTER 1122 u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; 1123 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); 1124 emit(ARM_PUSH(reg_set), ctx); 1125 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 1126 #else 1127 emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); 1128 emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); 1129 #endif 1130 /* Save frame pointer for later */ 1131 emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); 1132 1133 ctx->stack_size = imm8m(STACK_SIZE); 1134 1135 /* Set up function call stack */ 1136 emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); 1137 1138 /* Set up BPF prog stack base register */ 1139 emit_a32_mov_r(fplo, ARM_IP, true, false, ctx); 1140 emit_a32_mov_i(fphi, 0, true, ctx); 1141 1142 /* mov r4, 0 */ 1143 emit(ARM_MOV_I(r4, 0), ctx); 1144 1145 /* Move BPF_CTX to BPF_R1 */ 1146 emit(ARM_MOV_R(r3, r4), ctx); 1147 emit(ARM_MOV_R(r2, r0), ctx); 1148 /* Initialize Tail Count */ 1149 emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx); 1150 emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx); 1151 /* end of prologue */ 1152 } 1153 1154 /* restore callee saved registers. */ 1155 static void build_epilogue(struct jit_ctx *ctx) 1156 { 1157 #ifdef CONFIG_FRAME_POINTER 1158 /* When using frame pointers, some additional registers need to 1159 * be loaded. */ 1160 u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; 1161 emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); 1162 emit(ARM_LDM(ARM_SP, reg_set), ctx); 1163 #else 1164 /* Restore callee saved registers. */ 1165 emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); 1166 emit(ARM_POP(CALLEE_POP_MASK), ctx); 1167 #endif 1168 } 1169 1170 /* 1171 * Convert an eBPF instruction to native instruction, i.e 1172 * JITs an eBPF instruction. 1173 * Returns : 1174 * 0 - Successfully JITed an 8-byte eBPF instruction 1175 * >0 - Successfully JITed a 16-byte eBPF instruction 1176 * <0 - Failed to JIT. 1177 */ 1178 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) 1179 { 1180 const u8 code = insn->code; 1181 const u8 *dst = bpf2a32[insn->dst_reg]; 1182 const u8 *src = bpf2a32[insn->src_reg]; 1183 const u8 *tmp = bpf2a32[TMP_REG_1]; 1184 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1185 const s16 off = insn->off; 1186 const s32 imm = insn->imm; 1187 const int i = insn - ctx->prog->insnsi; 1188 const bool is64 = BPF_CLASS(code) == BPF_ALU64; 1189 const bool dstk = is_on_stack(insn->dst_reg); 1190 const bool sstk = is_on_stack(insn->src_reg); 1191 u8 rd, rt, rm, rn; 1192 s32 jmp_offset; 1193 1194 #define check_imm(bits, imm) do { \ 1195 if ((imm) >= (1 << ((bits) - 1)) || \ 1196 (imm) < -(1 << ((bits) - 1))) { \ 1197 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 1198 i, imm, imm); \ 1199 return -EINVAL; \ 1200 } \ 1201 } while (0) 1202 #define check_imm24(imm) check_imm(24, imm) 1203 1204 switch (code) { 1205 /* ALU operations */ 1206 1207 /* dst = src */ 1208 case BPF_ALU | BPF_MOV | BPF_K: 1209 case BPF_ALU | BPF_MOV | BPF_X: 1210 case BPF_ALU64 | BPF_MOV | BPF_K: 1211 case BPF_ALU64 | BPF_MOV | BPF_X: 1212 switch (BPF_SRC(code)) { 1213 case BPF_X: 1214 emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx); 1215 break; 1216 case BPF_K: 1217 /* Sign-extend immediate value to destination reg */ 1218 emit_a32_mov_i64(is64, dst, imm, dstk, ctx); 1219 break; 1220 } 1221 break; 1222 /* dst = dst + src/imm */ 1223 /* dst = dst - src/imm */ 1224 /* dst = dst | src/imm */ 1225 /* dst = dst & src/imm */ 1226 /* dst = dst ^ src/imm */ 1227 /* dst = dst * src/imm */ 1228 /* dst = dst << src */ 1229 /* dst = dst >> src */ 1230 case BPF_ALU | BPF_ADD | BPF_K: 1231 case BPF_ALU | BPF_ADD | BPF_X: 1232 case BPF_ALU | BPF_SUB | BPF_K: 1233 case BPF_ALU | BPF_SUB | BPF_X: 1234 case BPF_ALU | BPF_OR | BPF_K: 1235 case BPF_ALU | BPF_OR | BPF_X: 1236 case BPF_ALU | BPF_AND | BPF_K: 1237 case BPF_ALU | BPF_AND | BPF_X: 1238 case BPF_ALU | BPF_XOR | BPF_K: 1239 case BPF_ALU | BPF_XOR | BPF_X: 1240 case BPF_ALU | BPF_MUL | BPF_K: 1241 case BPF_ALU | BPF_MUL | BPF_X: 1242 case BPF_ALU | BPF_LSH | BPF_X: 1243 case BPF_ALU | BPF_RSH | BPF_X: 1244 case BPF_ALU | BPF_ARSH | BPF_K: 1245 case BPF_ALU | BPF_ARSH | BPF_X: 1246 case BPF_ALU64 | BPF_ADD | BPF_K: 1247 case BPF_ALU64 | BPF_ADD | BPF_X: 1248 case BPF_ALU64 | BPF_SUB | BPF_K: 1249 case BPF_ALU64 | BPF_SUB | BPF_X: 1250 case BPF_ALU64 | BPF_OR | BPF_K: 1251 case BPF_ALU64 | BPF_OR | BPF_X: 1252 case BPF_ALU64 | BPF_AND | BPF_K: 1253 case BPF_ALU64 | BPF_AND | BPF_X: 1254 case BPF_ALU64 | BPF_XOR | BPF_K: 1255 case BPF_ALU64 | BPF_XOR | BPF_X: 1256 switch (BPF_SRC(code)) { 1257 case BPF_X: 1258 emit_a32_alu_r64(is64, dst, src, dstk, sstk, 1259 ctx, BPF_OP(code)); 1260 break; 1261 case BPF_K: 1262 /* Move immediate value to the temporary register 1263 * and then do the ALU operation on the temporary 1264 * register as this will sign-extend the immediate 1265 * value into temporary reg and then it would be 1266 * safe to do the operation on it. 1267 */ 1268 emit_a32_mov_i64(is64, tmp2, imm, false, ctx); 1269 emit_a32_alu_r64(is64, dst, tmp2, dstk, false, 1270 ctx, BPF_OP(code)); 1271 break; 1272 } 1273 break; 1274 /* dst = dst / src(imm) */ 1275 /* dst = dst % src(imm) */ 1276 case BPF_ALU | BPF_DIV | BPF_K: 1277 case BPF_ALU | BPF_DIV | BPF_X: 1278 case BPF_ALU | BPF_MOD | BPF_K: 1279 case BPF_ALU | BPF_MOD | BPF_X: 1280 rt = src_lo; 1281 rd = dstk ? tmp2[1] : dst_lo; 1282 if (dstk) 1283 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 1284 switch (BPF_SRC(code)) { 1285 case BPF_X: 1286 rt = sstk ? tmp2[0] : rt; 1287 if (sstk) 1288 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), 1289 ctx); 1290 break; 1291 case BPF_K: 1292 rt = tmp2[0]; 1293 emit_a32_mov_i(rt, imm, false, ctx); 1294 break; 1295 } 1296 emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); 1297 if (dstk) 1298 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 1299 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1300 break; 1301 case BPF_ALU64 | BPF_DIV | BPF_K: 1302 case BPF_ALU64 | BPF_DIV | BPF_X: 1303 case BPF_ALU64 | BPF_MOD | BPF_K: 1304 case BPF_ALU64 | BPF_MOD | BPF_X: 1305 goto notyet; 1306 /* dst = dst >> imm */ 1307 /* dst = dst << imm */ 1308 case BPF_ALU | BPF_RSH | BPF_K: 1309 case BPF_ALU | BPF_LSH | BPF_K: 1310 if (unlikely(imm > 31)) 1311 return -EINVAL; 1312 if (imm) 1313 emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code)); 1314 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1315 break; 1316 /* dst = dst << imm */ 1317 case BPF_ALU64 | BPF_LSH | BPF_K: 1318 if (unlikely(imm > 63)) 1319 return -EINVAL; 1320 emit_a32_lsh_i64(dst, dstk, imm, ctx); 1321 break; 1322 /* dst = dst >> imm */ 1323 case BPF_ALU64 | BPF_RSH | BPF_K: 1324 if (unlikely(imm > 63)) 1325 return -EINVAL; 1326 emit_a32_rsh_i64(dst, dstk, imm, ctx); 1327 break; 1328 /* dst = dst << src */ 1329 case BPF_ALU64 | BPF_LSH | BPF_X: 1330 emit_a32_lsh_r64(dst, src, dstk, sstk, ctx); 1331 break; 1332 /* dst = dst >> src */ 1333 case BPF_ALU64 | BPF_RSH | BPF_X: 1334 emit_a32_rsh_r64(dst, src, dstk, sstk, ctx); 1335 break; 1336 /* dst = dst >> src (signed) */ 1337 case BPF_ALU64 | BPF_ARSH | BPF_X: 1338 emit_a32_arsh_r64(dst, src, dstk, sstk, ctx); 1339 break; 1340 /* dst = dst >> imm (signed) */ 1341 case BPF_ALU64 | BPF_ARSH | BPF_K: 1342 if (unlikely(imm > 63)) 1343 return -EINVAL; 1344 emit_a32_arsh_i64(dst, dstk, imm, ctx); 1345 break; 1346 /* dst = ~dst */ 1347 case BPF_ALU | BPF_NEG: 1348 emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code)); 1349 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1350 break; 1351 /* dst = ~dst (64 bit) */ 1352 case BPF_ALU64 | BPF_NEG: 1353 emit_a32_neg64(dst, dstk, ctx); 1354 break; 1355 /* dst = dst * src/imm */ 1356 case BPF_ALU64 | BPF_MUL | BPF_X: 1357 case BPF_ALU64 | BPF_MUL | BPF_K: 1358 switch (BPF_SRC(code)) { 1359 case BPF_X: 1360 emit_a32_mul_r64(dst, src, dstk, sstk, ctx); 1361 break; 1362 case BPF_K: 1363 /* Move immediate value to the temporary register 1364 * and then do the multiplication on it as this 1365 * will sign-extend the immediate value into temp 1366 * reg then it would be safe to do the operation 1367 * on it. 1368 */ 1369 emit_a32_mov_i64(is64, tmp2, imm, false, ctx); 1370 emit_a32_mul_r64(dst, tmp2, dstk, false, ctx); 1371 break; 1372 } 1373 break; 1374 /* dst = htole(dst) */ 1375 /* dst = htobe(dst) */ 1376 case BPF_ALU | BPF_END | BPF_FROM_LE: 1377 case BPF_ALU | BPF_END | BPF_FROM_BE: 1378 rd = dstk ? tmp[0] : dst_hi; 1379 rt = dstk ? tmp[1] : dst_lo; 1380 if (dstk) { 1381 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); 1382 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); 1383 } 1384 if (BPF_SRC(code) == BPF_FROM_LE) 1385 goto emit_bswap_uxt; 1386 switch (imm) { 1387 case 16: 1388 emit_rev16(rt, rt, ctx); 1389 goto emit_bswap_uxt; 1390 case 32: 1391 emit_rev32(rt, rt, ctx); 1392 goto emit_bswap_uxt; 1393 case 64: 1394 emit_rev32(ARM_LR, rt, ctx); 1395 emit_rev32(rt, rd, ctx); 1396 emit(ARM_MOV_R(rd, ARM_LR), ctx); 1397 break; 1398 } 1399 goto exit; 1400 emit_bswap_uxt: 1401 switch (imm) { 1402 case 16: 1403 /* zero-extend 16 bits into 64 bits */ 1404 #if __LINUX_ARM_ARCH__ < 6 1405 emit_a32_mov_i(tmp2[1], 0xffff, false, ctx); 1406 emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); 1407 #else /* ARMv6+ */ 1408 emit(ARM_UXTH(rt, rt), ctx); 1409 #endif 1410 emit(ARM_EOR_R(rd, rd, rd), ctx); 1411 break; 1412 case 32: 1413 /* zero-extend 32 bits into 64 bits */ 1414 emit(ARM_EOR_R(rd, rd, rd), ctx); 1415 break; 1416 case 64: 1417 /* nop */ 1418 break; 1419 } 1420 exit: 1421 if (dstk) { 1422 emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); 1423 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); 1424 } 1425 break; 1426 /* dst = imm64 */ 1427 case BPF_LD | BPF_IMM | BPF_DW: 1428 { 1429 const struct bpf_insn insn1 = insn[1]; 1430 u32 hi, lo = imm; 1431 1432 hi = insn1.imm; 1433 emit_a32_mov_i(dst_lo, lo, dstk, ctx); 1434 emit_a32_mov_i(dst_hi, hi, dstk, ctx); 1435 1436 return 1; 1437 } 1438 /* LDX: dst = *(size *)(src + off) */ 1439 case BPF_LDX | BPF_MEM | BPF_W: 1440 case BPF_LDX | BPF_MEM | BPF_H: 1441 case BPF_LDX | BPF_MEM | BPF_B: 1442 case BPF_LDX | BPF_MEM | BPF_DW: 1443 rn = sstk ? tmp2[1] : src_lo; 1444 if (sstk) 1445 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1446 emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); 1447 break; 1448 /* ST: *(size *)(dst + off) = imm */ 1449 case BPF_ST | BPF_MEM | BPF_W: 1450 case BPF_ST | BPF_MEM | BPF_H: 1451 case BPF_ST | BPF_MEM | BPF_B: 1452 case BPF_ST | BPF_MEM | BPF_DW: 1453 switch (BPF_SIZE(code)) { 1454 case BPF_DW: 1455 /* Sign-extend immediate value into temp reg */ 1456 emit_a32_mov_i64(true, tmp2, imm, false, ctx); 1457 emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W); 1458 emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W); 1459 break; 1460 case BPF_W: 1461 case BPF_H: 1462 case BPF_B: 1463 emit_a32_mov_i(tmp2[1], imm, false, ctx); 1464 emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, 1465 BPF_SIZE(code)); 1466 break; 1467 } 1468 break; 1469 /* STX XADD: lock *(u32 *)(dst + off) += src */ 1470 case BPF_STX | BPF_XADD | BPF_W: 1471 /* STX XADD: lock *(u64 *)(dst + off) += src */ 1472 case BPF_STX | BPF_XADD | BPF_DW: 1473 goto notyet; 1474 /* STX: *(size *)(dst + off) = src */ 1475 case BPF_STX | BPF_MEM | BPF_W: 1476 case BPF_STX | BPF_MEM | BPF_H: 1477 case BPF_STX | BPF_MEM | BPF_B: 1478 case BPF_STX | BPF_MEM | BPF_DW: 1479 { 1480 u8 sz = BPF_SIZE(code); 1481 1482 rn = sstk ? tmp2[1] : src_lo; 1483 rm = sstk ? tmp2[0] : src_hi; 1484 if (sstk) { 1485 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1486 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); 1487 } 1488 1489 /* Store the value */ 1490 if (BPF_SIZE(code) == BPF_DW) { 1491 emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W); 1492 emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W); 1493 } else { 1494 emit_str_r(dst_lo, rn, dstk, off, ctx, sz); 1495 } 1496 break; 1497 } 1498 /* PC += off if dst == src */ 1499 /* PC += off if dst > src */ 1500 /* PC += off if dst >= src */ 1501 /* PC += off if dst < src */ 1502 /* PC += off if dst <= src */ 1503 /* PC += off if dst != src */ 1504 /* PC += off if dst > src (signed) */ 1505 /* PC += off if dst >= src (signed) */ 1506 /* PC += off if dst < src (signed) */ 1507 /* PC += off if dst <= src (signed) */ 1508 /* PC += off if dst & src */ 1509 case BPF_JMP | BPF_JEQ | BPF_X: 1510 case BPF_JMP | BPF_JGT | BPF_X: 1511 case BPF_JMP | BPF_JGE | BPF_X: 1512 case BPF_JMP | BPF_JNE | BPF_X: 1513 case BPF_JMP | BPF_JSGT | BPF_X: 1514 case BPF_JMP | BPF_JSGE | BPF_X: 1515 case BPF_JMP | BPF_JSET | BPF_X: 1516 case BPF_JMP | BPF_JLE | BPF_X: 1517 case BPF_JMP | BPF_JLT | BPF_X: 1518 case BPF_JMP | BPF_JSLT | BPF_X: 1519 case BPF_JMP | BPF_JSLE | BPF_X: 1520 /* Setup source registers */ 1521 rm = sstk ? tmp2[0] : src_hi; 1522 rn = sstk ? tmp2[1] : src_lo; 1523 if (sstk) { 1524 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1525 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); 1526 } 1527 goto go_jmp; 1528 /* PC += off if dst == imm */ 1529 /* PC += off if dst > imm */ 1530 /* PC += off if dst >= imm */ 1531 /* PC += off if dst < imm */ 1532 /* PC += off if dst <= imm */ 1533 /* PC += off if dst != imm */ 1534 /* PC += off if dst > imm (signed) */ 1535 /* PC += off if dst >= imm (signed) */ 1536 /* PC += off if dst < imm (signed) */ 1537 /* PC += off if dst <= imm (signed) */ 1538 /* PC += off if dst & imm */ 1539 case BPF_JMP | BPF_JEQ | BPF_K: 1540 case BPF_JMP | BPF_JGT | BPF_K: 1541 case BPF_JMP | BPF_JGE | BPF_K: 1542 case BPF_JMP | BPF_JNE | BPF_K: 1543 case BPF_JMP | BPF_JSGT | BPF_K: 1544 case BPF_JMP | BPF_JSGE | BPF_K: 1545 case BPF_JMP | BPF_JSET | BPF_K: 1546 case BPF_JMP | BPF_JLT | BPF_K: 1547 case BPF_JMP | BPF_JLE | BPF_K: 1548 case BPF_JMP | BPF_JSLT | BPF_K: 1549 case BPF_JMP | BPF_JSLE | BPF_K: 1550 if (off == 0) 1551 break; 1552 rm = tmp2[0]; 1553 rn = tmp2[1]; 1554 /* Sign-extend immediate value */ 1555 emit_a32_mov_i64(true, tmp2, imm, false, ctx); 1556 go_jmp: 1557 /* Setup destination register */ 1558 rd = dstk ? tmp[0] : dst_hi; 1559 rt = dstk ? tmp[1] : dst_lo; 1560 if (dstk) { 1561 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); 1562 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); 1563 } 1564 1565 /* Check for the condition */ 1566 emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); 1567 1568 /* Setup JUMP instruction */ 1569 jmp_offset = bpf2a32_offset(i+off, i, ctx); 1570 switch (BPF_OP(code)) { 1571 case BPF_JNE: 1572 case BPF_JSET: 1573 _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx); 1574 break; 1575 case BPF_JEQ: 1576 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 1577 break; 1578 case BPF_JGT: 1579 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); 1580 break; 1581 case BPF_JGE: 1582 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 1583 break; 1584 case BPF_JSGT: 1585 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); 1586 break; 1587 case BPF_JSGE: 1588 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); 1589 break; 1590 case BPF_JLE: 1591 _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx); 1592 break; 1593 case BPF_JLT: 1594 _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx); 1595 break; 1596 case BPF_JSLT: 1597 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); 1598 break; 1599 case BPF_JSLE: 1600 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); 1601 break; 1602 } 1603 break; 1604 /* JMP OFF */ 1605 case BPF_JMP | BPF_JA: 1606 { 1607 if (off == 0) 1608 break; 1609 jmp_offset = bpf2a32_offset(i+off, i, ctx); 1610 check_imm24(jmp_offset); 1611 emit(ARM_B(jmp_offset), ctx); 1612 break; 1613 } 1614 /* tail call */ 1615 case BPF_JMP | BPF_TAIL_CALL: 1616 if (emit_bpf_tail_call(ctx)) 1617 return -EFAULT; 1618 break; 1619 /* function call */ 1620 case BPF_JMP | BPF_CALL: 1621 { 1622 const u8 *r0 = bpf2a32[BPF_REG_0]; 1623 const u8 *r1 = bpf2a32[BPF_REG_1]; 1624 const u8 *r2 = bpf2a32[BPF_REG_2]; 1625 const u8 *r3 = bpf2a32[BPF_REG_3]; 1626 const u8 *r4 = bpf2a32[BPF_REG_4]; 1627 const u8 *r5 = bpf2a32[BPF_REG_5]; 1628 const u32 func = (u32)__bpf_call_base + (u32)imm; 1629 1630 emit_a32_mov_r64(true, r0, r1, false, false, ctx); 1631 emit_a32_mov_r64(true, r1, r2, false, true, ctx); 1632 emit_push_r64(r5, 0, ctx); 1633 emit_push_r64(r4, 8, ctx); 1634 emit_push_r64(r3, 16, ctx); 1635 1636 emit_a32_mov_i(tmp[1], func, false, ctx); 1637 emit_blx_r(tmp[1], ctx); 1638 1639 emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean 1640 break; 1641 } 1642 /* function return */ 1643 case BPF_JMP | BPF_EXIT: 1644 /* Optimization: when last instruction is EXIT 1645 * simply fallthrough to epilogue. 1646 */ 1647 if (i == ctx->prog->len - 1) 1648 break; 1649 jmp_offset = epilogue_offset(ctx); 1650 check_imm24(jmp_offset); 1651 emit(ARM_B(jmp_offset), ctx); 1652 break; 1653 notyet: 1654 pr_info_once("*** NOT YET: opcode %02x ***\n", code); 1655 return -EFAULT; 1656 default: 1657 pr_err_once("unknown opcode %02x\n", code); 1658 return -EINVAL; 1659 } 1660 1661 if (ctx->flags & FLAG_IMM_OVERFLOW) 1662 /* 1663 * this instruction generated an overflow when 1664 * trying to access the literal pool, so 1665 * delegate this filter to the kernel interpreter. 1666 */ 1667 return -1; 1668 return 0; 1669 } 1670 1671 static int build_body(struct jit_ctx *ctx) 1672 { 1673 const struct bpf_prog *prog = ctx->prog; 1674 unsigned int i; 1675 1676 for (i = 0; i < prog->len; i++) { 1677 const struct bpf_insn *insn = &(prog->insnsi[i]); 1678 int ret; 1679 1680 ret = build_insn(insn, ctx); 1681 1682 /* It's used with loading the 64 bit immediate value. */ 1683 if (ret > 0) { 1684 i++; 1685 if (ctx->target == NULL) 1686 ctx->offsets[i] = ctx->idx; 1687 continue; 1688 } 1689 1690 if (ctx->target == NULL) 1691 ctx->offsets[i] = ctx->idx; 1692 1693 /* If unsuccesfull, return with error code */ 1694 if (ret) 1695 return ret; 1696 } 1697 return 0; 1698 } 1699 1700 static int validate_code(struct jit_ctx *ctx) 1701 { 1702 int i; 1703 1704 for (i = 0; i < ctx->idx; i++) { 1705 if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF)) 1706 return -1; 1707 } 1708 1709 return 0; 1710 } 1711 1712 void bpf_jit_compile(struct bpf_prog *prog) 1713 { 1714 /* Nothing to do here. We support Internal BPF. */ 1715 } 1716 1717 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1718 { 1719 struct bpf_prog *tmp, *orig_prog = prog; 1720 struct bpf_binary_header *header; 1721 bool tmp_blinded = false; 1722 struct jit_ctx ctx; 1723 unsigned int tmp_idx; 1724 unsigned int image_size; 1725 u8 *image_ptr; 1726 1727 /* If BPF JIT was not enabled then we must fall back to 1728 * the interpreter. 1729 */ 1730 if (!prog->jit_requested) 1731 return orig_prog; 1732 1733 /* If constant blinding was enabled and we failed during blinding 1734 * then we must fall back to the interpreter. Otherwise, we save 1735 * the new JITed code. 1736 */ 1737 tmp = bpf_jit_blind_constants(prog); 1738 1739 if (IS_ERR(tmp)) 1740 return orig_prog; 1741 if (tmp != prog) { 1742 tmp_blinded = true; 1743 prog = tmp; 1744 } 1745 1746 memset(&ctx, 0, sizeof(ctx)); 1747 ctx.prog = prog; 1748 1749 /* Not able to allocate memory for offsets[] , then 1750 * we must fall back to the interpreter 1751 */ 1752 ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); 1753 if (ctx.offsets == NULL) { 1754 prog = orig_prog; 1755 goto out; 1756 } 1757 1758 /* 1) fake pass to find in the length of the JITed code, 1759 * to compute ctx->offsets and other context variables 1760 * needed to compute final JITed code. 1761 * Also, calculate random starting pointer/start of JITed code 1762 * which is prefixed by random number of fault instructions. 1763 * 1764 * If the first pass fails then there is no chance of it 1765 * being successful in the second pass, so just fall back 1766 * to the interpreter. 1767 */ 1768 if (build_body(&ctx)) { 1769 prog = orig_prog; 1770 goto out_off; 1771 } 1772 1773 tmp_idx = ctx.idx; 1774 build_prologue(&ctx); 1775 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1776 1777 ctx.epilogue_offset = ctx.idx; 1778 1779 #if __LINUX_ARM_ARCH__ < 7 1780 tmp_idx = ctx.idx; 1781 build_epilogue(&ctx); 1782 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 1783 1784 ctx.idx += ctx.imm_count; 1785 if (ctx.imm_count) { 1786 ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); 1787 if (ctx.imms == NULL) { 1788 prog = orig_prog; 1789 goto out_off; 1790 } 1791 } 1792 #else 1793 /* there's nothing about the epilogue on ARMv7 */ 1794 build_epilogue(&ctx); 1795 #endif 1796 /* Now we can get the actual image size of the JITed arm code. 1797 * Currently, we are not considering the THUMB-2 instructions 1798 * for jit, although it can decrease the size of the image. 1799 * 1800 * As each arm instruction is of length 32bit, we are translating 1801 * number of JITed intructions into the size required to store these 1802 * JITed code. 1803 */ 1804 image_size = sizeof(u32) * ctx.idx; 1805 1806 /* Now we know the size of the structure to make */ 1807 header = bpf_jit_binary_alloc(image_size, &image_ptr, 1808 sizeof(u32), jit_fill_hole); 1809 /* Not able to allocate memory for the structure then 1810 * we must fall back to the interpretation 1811 */ 1812 if (header == NULL) { 1813 prog = orig_prog; 1814 goto out_imms; 1815 } 1816 1817 /* 2.) Actual pass to generate final JIT code */ 1818 ctx.target = (u32 *) image_ptr; 1819 ctx.idx = 0; 1820 1821 build_prologue(&ctx); 1822 1823 /* If building the body of the JITed code fails somehow, 1824 * we fall back to the interpretation. 1825 */ 1826 if (build_body(&ctx) < 0) { 1827 image_ptr = NULL; 1828 bpf_jit_binary_free(header); 1829 prog = orig_prog; 1830 goto out_imms; 1831 } 1832 build_epilogue(&ctx); 1833 1834 /* 3.) Extra pass to validate JITed Code */ 1835 if (validate_code(&ctx)) { 1836 image_ptr = NULL; 1837 bpf_jit_binary_free(header); 1838 prog = orig_prog; 1839 goto out_imms; 1840 } 1841 flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); 1842 1843 if (bpf_jit_enable > 1) 1844 /* there are 2 passes here */ 1845 bpf_jit_dump(prog->len, image_size, 2, ctx.target); 1846 1847 bpf_jit_binary_lock_ro(header); 1848 prog->bpf_func = (void *)ctx.target; 1849 prog->jited = 1; 1850 prog->jited_len = image_size; 1851 1852 out_imms: 1853 #if __LINUX_ARM_ARCH__ < 7 1854 if (ctx.imm_count) 1855 kfree(ctx.imms); 1856 #endif 1857 out_off: 1858 kfree(ctx.offsets); 1859 out: 1860 if (tmp_blinded) 1861 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1862 tmp : orig_prog); 1863 return prog; 1864 } 1865 1866