1 /* 2 * bpf_jit_comp.c: BPF JIT compiler 3 * 4 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 5 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 #include <linux/netdevice.h> 13 #include <linux/filter.h> 14 #include <linux/if_vlan.h> 15 #include <linux/bpf.h> 16 17 #include <asm/set_memory.h> 18 #include <asm/nospec-branch.h> 19 20 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 21 { 22 if (len == 1) 23 *ptr = bytes; 24 else if (len == 2) 25 *(u16 *)ptr = bytes; 26 else { 27 *(u32 *)ptr = bytes; 28 barrier(); 29 } 30 return ptr + len; 31 } 32 33 #define EMIT(bytes, len) \ 34 do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) 35 36 #define EMIT1(b1) EMIT(b1, 1) 37 #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 38 #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 39 #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 40 41 #define EMIT1_off32(b1, off) \ 42 do { EMIT1(b1); EMIT(off, 4); } while (0) 43 #define EMIT2_off32(b1, b2, off) \ 44 do { EMIT2(b1, b2); EMIT(off, 4); } while (0) 45 #define EMIT3_off32(b1, b2, b3, off) \ 46 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) 47 #define EMIT4_off32(b1, b2, b3, b4, off) \ 48 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) 49 50 static bool is_imm8(int value) 51 { 52 return value <= 127 && value >= -128; 53 } 54 55 static bool is_simm32(s64 value) 56 { 57 return value == (s64)(s32)value; 58 } 59 60 static bool is_uimm32(u64 value) 61 { 62 return value == (u64)(u32)value; 63 } 64 65 /* mov dst, src */ 66 #define EMIT_mov(DST, SRC) \ 67 do { \ 68 if (DST != SRC) \ 69 EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ 70 } while (0) 71 72 static int bpf_size_to_x86_bytes(int bpf_size) 73 { 74 if (bpf_size == BPF_W) 75 return 4; 76 else if (bpf_size == BPF_H) 77 return 2; 78 else if (bpf_size == BPF_B) 79 return 1; 80 else if (bpf_size == BPF_DW) 81 return 4; /* imm32 */ 82 else 83 return 0; 84 } 85 86 /* 87 * List of x86 cond jumps opcodes (. + s8) 88 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 89 */ 90 #define X86_JB 0x72 91 #define X86_JAE 0x73 92 #define X86_JE 0x74 93 #define X86_JNE 0x75 94 #define X86_JBE 0x76 95 #define X86_JA 0x77 96 #define X86_JL 0x7C 97 #define X86_JGE 0x7D 98 #define X86_JLE 0x7E 99 #define X86_JG 0x7F 100 101 /* Pick a register outside of BPF range for JIT internal work */ 102 #define AUX_REG (MAX_BPF_JIT_REG + 1) 103 104 /* 105 * The following table maps BPF registers to x86-64 registers. 106 * 107 * x86-64 register R12 is unused, since if used as base address 108 * register in load/store instructions, it always needs an 109 * extra byte of encoding and is callee saved. 110 * 111 * Also x86-64 register R9 is unused. x86-64 register R10 is 112 * used for blinding (if enabled). 113 */ 114 static const int reg2hex[] = { 115 [BPF_REG_0] = 0, /* RAX */ 116 [BPF_REG_1] = 7, /* RDI */ 117 [BPF_REG_2] = 6, /* RSI */ 118 [BPF_REG_3] = 2, /* RDX */ 119 [BPF_REG_4] = 1, /* RCX */ 120 [BPF_REG_5] = 0, /* R8 */ 121 [BPF_REG_6] = 3, /* RBX callee saved */ 122 [BPF_REG_7] = 5, /* R13 callee saved */ 123 [BPF_REG_8] = 6, /* R14 callee saved */ 124 [BPF_REG_9] = 7, /* R15 callee saved */ 125 [BPF_REG_FP] = 5, /* RBP readonly */ 126 [BPF_REG_AX] = 2, /* R10 temp register */ 127 [AUX_REG] = 3, /* R11 temp register */ 128 }; 129 130 /* 131 * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 132 * which need extra byte of encoding. 133 * rax,rcx,...,rbp have simpler encoding 134 */ 135 static bool is_ereg(u32 reg) 136 { 137 return (1 << reg) & (BIT(BPF_REG_5) | 138 BIT(AUX_REG) | 139 BIT(BPF_REG_7) | 140 BIT(BPF_REG_8) | 141 BIT(BPF_REG_9) | 142 BIT(BPF_REG_AX)); 143 } 144 145 static bool is_axreg(u32 reg) 146 { 147 return reg == BPF_REG_0; 148 } 149 150 /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ 151 static u8 add_1mod(u8 byte, u32 reg) 152 { 153 if (is_ereg(reg)) 154 byte |= 1; 155 return byte; 156 } 157 158 static u8 add_2mod(u8 byte, u32 r1, u32 r2) 159 { 160 if (is_ereg(r1)) 161 byte |= 1; 162 if (is_ereg(r2)) 163 byte |= 4; 164 return byte; 165 } 166 167 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ 168 static u8 add_1reg(u8 byte, u32 dst_reg) 169 { 170 return byte + reg2hex[dst_reg]; 171 } 172 173 /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ 174 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) 175 { 176 return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); 177 } 178 179 static void jit_fill_hole(void *area, unsigned int size) 180 { 181 /* Fill whole space with INT3 instructions */ 182 memset(area, 0xcc, size); 183 } 184 185 struct jit_context { 186 int cleanup_addr; /* Epilogue code offset */ 187 }; 188 189 /* Maximum number of bytes emitted while JITing one eBPF insn */ 190 #define BPF_MAX_INSN_SIZE 128 191 #define BPF_INSN_SAFETY 64 192 193 #define PROLOGUE_SIZE 20 194 195 /* 196 * Emit x86-64 prologue code for BPF program and check its size. 197 * bpf_tail_call helper will skip it while jumping into another program 198 */ 199 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) 200 { 201 u8 *prog = *pprog; 202 int cnt = 0; 203 204 EMIT1(0x55); /* push rbp */ 205 EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 206 /* sub rsp, rounded_stack_depth */ 207 EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); 208 EMIT1(0x53); /* push rbx */ 209 EMIT2(0x41, 0x55); /* push r13 */ 210 EMIT2(0x41, 0x56); /* push r14 */ 211 EMIT2(0x41, 0x57); /* push r15 */ 212 if (!ebpf_from_cbpf) { 213 /* zero init tail_call_cnt */ 214 EMIT2(0x6a, 0x00); 215 BUILD_BUG_ON(cnt != PROLOGUE_SIZE); 216 } 217 *pprog = prog; 218 } 219 220 /* 221 * Generate the following code: 222 * 223 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... 224 * if (index >= array->map.max_entries) 225 * goto out; 226 * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) 227 * goto out; 228 * prog = array->ptrs[index]; 229 * if (prog == NULL) 230 * goto out; 231 * goto *(prog->bpf_func + prologue_size); 232 * out: 233 */ 234 static void emit_bpf_tail_call(u8 **pprog) 235 { 236 u8 *prog = *pprog; 237 int label1, label2, label3; 238 int cnt = 0; 239 240 /* 241 * rdi - pointer to ctx 242 * rsi - pointer to bpf_array 243 * rdx - index in bpf_array 244 */ 245 246 /* 247 * if (index >= array->map.max_entries) 248 * goto out; 249 */ 250 EMIT2(0x89, 0xD2); /* mov edx, edx */ 251 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ 252 offsetof(struct bpf_array, map.max_entries)); 253 #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ 254 EMIT2(X86_JBE, OFFSET1); /* jbe out */ 255 label1 = cnt; 256 257 /* 258 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 259 * goto out; 260 */ 261 EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ 262 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 263 #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) 264 EMIT2(X86_JA, OFFSET2); /* ja out */ 265 label2 = cnt; 266 EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 267 EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ 268 269 /* prog = array->ptrs[index]; */ 270 EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ 271 offsetof(struct bpf_array, ptrs)); 272 273 /* 274 * if (prog == NULL) 275 * goto out; 276 */ 277 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ 278 #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) 279 EMIT2(X86_JE, OFFSET3); /* je out */ 280 label3 = cnt; 281 282 /* goto *(prog->bpf_func + prologue_size); */ 283 EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */ 284 offsetof(struct bpf_prog, bpf_func)); 285 EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ 286 287 /* 288 * Wow we're ready to jump into next BPF program 289 * rdi == ctx (1st arg) 290 * rax == prog->bpf_func + prologue_size 291 */ 292 RETPOLINE_RAX_BPF_JIT(); 293 294 /* out: */ 295 BUILD_BUG_ON(cnt - label1 != OFFSET1); 296 BUILD_BUG_ON(cnt - label2 != OFFSET2); 297 BUILD_BUG_ON(cnt - label3 != OFFSET3); 298 *pprog = prog; 299 } 300 301 static void emit_mov_imm32(u8 **pprog, bool sign_propagate, 302 u32 dst_reg, const u32 imm32) 303 { 304 u8 *prog = *pprog; 305 u8 b1, b2, b3; 306 int cnt = 0; 307 308 /* 309 * Optimization: if imm32 is positive, use 'mov %eax, imm32' 310 * (which zero-extends imm32) to save 2 bytes. 311 */ 312 if (sign_propagate && (s32)imm32 < 0) { 313 /* 'mov %rax, imm32' sign extends imm32 */ 314 b1 = add_1mod(0x48, dst_reg); 315 b2 = 0xC7; 316 b3 = 0xC0; 317 EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); 318 goto done; 319 } 320 321 /* 322 * Optimization: if imm32 is zero, use 'xor %eax, %eax' 323 * to save 3 bytes. 324 */ 325 if (imm32 == 0) { 326 if (is_ereg(dst_reg)) 327 EMIT1(add_2mod(0x40, dst_reg, dst_reg)); 328 b2 = 0x31; /* xor */ 329 b3 = 0xC0; 330 EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); 331 goto done; 332 } 333 334 /* mov %eax, imm32 */ 335 if (is_ereg(dst_reg)) 336 EMIT1(add_1mod(0x40, dst_reg)); 337 EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); 338 done: 339 *pprog = prog; 340 } 341 342 static void emit_mov_imm64(u8 **pprog, u32 dst_reg, 343 const u32 imm32_hi, const u32 imm32_lo) 344 { 345 u8 *prog = *pprog; 346 int cnt = 0; 347 348 if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { 349 /* 350 * For emitting plain u32, where sign bit must not be 351 * propagated LLVM tends to load imm64 over mov32 352 * directly, so save couple of bytes by just doing 353 * 'mov %eax, imm32' instead. 354 */ 355 emit_mov_imm32(&prog, false, dst_reg, imm32_lo); 356 } else { 357 /* movabsq %rax, imm64 */ 358 EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); 359 EMIT(imm32_lo, 4); 360 EMIT(imm32_hi, 4); 361 } 362 363 *pprog = prog; 364 } 365 366 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) 367 { 368 u8 *prog = *pprog; 369 int cnt = 0; 370 371 if (is64) { 372 /* mov dst, src */ 373 EMIT_mov(dst_reg, src_reg); 374 } else { 375 /* mov32 dst, src */ 376 if (is_ereg(dst_reg) || is_ereg(src_reg)) 377 EMIT1(add_2mod(0x40, dst_reg, src_reg)); 378 EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); 379 } 380 381 *pprog = prog; 382 } 383 384 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, 385 int oldproglen, struct jit_context *ctx) 386 { 387 struct bpf_insn *insn = bpf_prog->insnsi; 388 int insn_cnt = bpf_prog->len; 389 bool seen_exit = false; 390 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; 391 int i, cnt = 0; 392 int proglen = 0; 393 u8 *prog = temp; 394 395 emit_prologue(&prog, bpf_prog->aux->stack_depth, 396 bpf_prog_was_classic(bpf_prog)); 397 398 for (i = 0; i < insn_cnt; i++, insn++) { 399 const s32 imm32 = insn->imm; 400 u32 dst_reg = insn->dst_reg; 401 u32 src_reg = insn->src_reg; 402 u8 b2 = 0, b3 = 0; 403 s64 jmp_offset; 404 u8 jmp_cond; 405 int ilen; 406 u8 *func; 407 408 switch (insn->code) { 409 /* ALU */ 410 case BPF_ALU | BPF_ADD | BPF_X: 411 case BPF_ALU | BPF_SUB | BPF_X: 412 case BPF_ALU | BPF_AND | BPF_X: 413 case BPF_ALU | BPF_OR | BPF_X: 414 case BPF_ALU | BPF_XOR | BPF_X: 415 case BPF_ALU64 | BPF_ADD | BPF_X: 416 case BPF_ALU64 | BPF_SUB | BPF_X: 417 case BPF_ALU64 | BPF_AND | BPF_X: 418 case BPF_ALU64 | BPF_OR | BPF_X: 419 case BPF_ALU64 | BPF_XOR | BPF_X: 420 switch (BPF_OP(insn->code)) { 421 case BPF_ADD: b2 = 0x01; break; 422 case BPF_SUB: b2 = 0x29; break; 423 case BPF_AND: b2 = 0x21; break; 424 case BPF_OR: b2 = 0x09; break; 425 case BPF_XOR: b2 = 0x31; break; 426 } 427 if (BPF_CLASS(insn->code) == BPF_ALU64) 428 EMIT1(add_2mod(0x48, dst_reg, src_reg)); 429 else if (is_ereg(dst_reg) || is_ereg(src_reg)) 430 EMIT1(add_2mod(0x40, dst_reg, src_reg)); 431 EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); 432 break; 433 434 case BPF_ALU64 | BPF_MOV | BPF_X: 435 case BPF_ALU | BPF_MOV | BPF_X: 436 emit_mov_reg(&prog, 437 BPF_CLASS(insn->code) == BPF_ALU64, 438 dst_reg, src_reg); 439 break; 440 441 /* neg dst */ 442 case BPF_ALU | BPF_NEG: 443 case BPF_ALU64 | BPF_NEG: 444 if (BPF_CLASS(insn->code) == BPF_ALU64) 445 EMIT1(add_1mod(0x48, dst_reg)); 446 else if (is_ereg(dst_reg)) 447 EMIT1(add_1mod(0x40, dst_reg)); 448 EMIT2(0xF7, add_1reg(0xD8, dst_reg)); 449 break; 450 451 case BPF_ALU | BPF_ADD | BPF_K: 452 case BPF_ALU | BPF_SUB | BPF_K: 453 case BPF_ALU | BPF_AND | BPF_K: 454 case BPF_ALU | BPF_OR | BPF_K: 455 case BPF_ALU | BPF_XOR | BPF_K: 456 case BPF_ALU64 | BPF_ADD | BPF_K: 457 case BPF_ALU64 | BPF_SUB | BPF_K: 458 case BPF_ALU64 | BPF_AND | BPF_K: 459 case BPF_ALU64 | BPF_OR | BPF_K: 460 case BPF_ALU64 | BPF_XOR | BPF_K: 461 if (BPF_CLASS(insn->code) == BPF_ALU64) 462 EMIT1(add_1mod(0x48, dst_reg)); 463 else if (is_ereg(dst_reg)) 464 EMIT1(add_1mod(0x40, dst_reg)); 465 466 /* 467 * b3 holds 'normal' opcode, b2 short form only valid 468 * in case dst is eax/rax. 469 */ 470 switch (BPF_OP(insn->code)) { 471 case BPF_ADD: 472 b3 = 0xC0; 473 b2 = 0x05; 474 break; 475 case BPF_SUB: 476 b3 = 0xE8; 477 b2 = 0x2D; 478 break; 479 case BPF_AND: 480 b3 = 0xE0; 481 b2 = 0x25; 482 break; 483 case BPF_OR: 484 b3 = 0xC8; 485 b2 = 0x0D; 486 break; 487 case BPF_XOR: 488 b3 = 0xF0; 489 b2 = 0x35; 490 break; 491 } 492 493 if (is_imm8(imm32)) 494 EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 495 else if (is_axreg(dst_reg)) 496 EMIT1_off32(b2, imm32); 497 else 498 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 499 break; 500 501 case BPF_ALU64 | BPF_MOV | BPF_K: 502 case BPF_ALU | BPF_MOV | BPF_K: 503 emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, 504 dst_reg, imm32); 505 break; 506 507 case BPF_LD | BPF_IMM | BPF_DW: 508 emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); 509 insn++; 510 i++; 511 break; 512 513 /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ 514 case BPF_ALU | BPF_MOD | BPF_X: 515 case BPF_ALU | BPF_DIV | BPF_X: 516 case BPF_ALU | BPF_MOD | BPF_K: 517 case BPF_ALU | BPF_DIV | BPF_K: 518 case BPF_ALU64 | BPF_MOD | BPF_X: 519 case BPF_ALU64 | BPF_DIV | BPF_X: 520 case BPF_ALU64 | BPF_MOD | BPF_K: 521 case BPF_ALU64 | BPF_DIV | BPF_K: 522 EMIT1(0x50); /* push rax */ 523 EMIT1(0x52); /* push rdx */ 524 525 if (BPF_SRC(insn->code) == BPF_X) 526 /* mov r11, src_reg */ 527 EMIT_mov(AUX_REG, src_reg); 528 else 529 /* mov r11, imm32 */ 530 EMIT3_off32(0x49, 0xC7, 0xC3, imm32); 531 532 /* mov rax, dst_reg */ 533 EMIT_mov(BPF_REG_0, dst_reg); 534 535 /* 536 * xor edx, edx 537 * equivalent to 'xor rdx, rdx', but one byte less 538 */ 539 EMIT2(0x31, 0xd2); 540 541 if (BPF_CLASS(insn->code) == BPF_ALU64) 542 /* div r11 */ 543 EMIT3(0x49, 0xF7, 0xF3); 544 else 545 /* div r11d */ 546 EMIT3(0x41, 0xF7, 0xF3); 547 548 if (BPF_OP(insn->code) == BPF_MOD) 549 /* mov r11, rdx */ 550 EMIT3(0x49, 0x89, 0xD3); 551 else 552 /* mov r11, rax */ 553 EMIT3(0x49, 0x89, 0xC3); 554 555 EMIT1(0x5A); /* pop rdx */ 556 EMIT1(0x58); /* pop rax */ 557 558 /* mov dst_reg, r11 */ 559 EMIT_mov(dst_reg, AUX_REG); 560 break; 561 562 case BPF_ALU | BPF_MUL | BPF_K: 563 case BPF_ALU | BPF_MUL | BPF_X: 564 case BPF_ALU64 | BPF_MUL | BPF_K: 565 case BPF_ALU64 | BPF_MUL | BPF_X: 566 { 567 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 568 569 if (dst_reg != BPF_REG_0) 570 EMIT1(0x50); /* push rax */ 571 if (dst_reg != BPF_REG_3) 572 EMIT1(0x52); /* push rdx */ 573 574 /* mov r11, dst_reg */ 575 EMIT_mov(AUX_REG, dst_reg); 576 577 if (BPF_SRC(insn->code) == BPF_X) 578 emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); 579 else 580 emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); 581 582 if (is64) 583 EMIT1(add_1mod(0x48, AUX_REG)); 584 else if (is_ereg(AUX_REG)) 585 EMIT1(add_1mod(0x40, AUX_REG)); 586 /* mul(q) r11 */ 587 EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); 588 589 if (dst_reg != BPF_REG_3) 590 EMIT1(0x5A); /* pop rdx */ 591 if (dst_reg != BPF_REG_0) { 592 /* mov dst_reg, rax */ 593 EMIT_mov(dst_reg, BPF_REG_0); 594 EMIT1(0x58); /* pop rax */ 595 } 596 break; 597 } 598 /* Shifts */ 599 case BPF_ALU | BPF_LSH | BPF_K: 600 case BPF_ALU | BPF_RSH | BPF_K: 601 case BPF_ALU | BPF_ARSH | BPF_K: 602 case BPF_ALU64 | BPF_LSH | BPF_K: 603 case BPF_ALU64 | BPF_RSH | BPF_K: 604 case BPF_ALU64 | BPF_ARSH | BPF_K: 605 if (BPF_CLASS(insn->code) == BPF_ALU64) 606 EMIT1(add_1mod(0x48, dst_reg)); 607 else if (is_ereg(dst_reg)) 608 EMIT1(add_1mod(0x40, dst_reg)); 609 610 switch (BPF_OP(insn->code)) { 611 case BPF_LSH: b3 = 0xE0; break; 612 case BPF_RSH: b3 = 0xE8; break; 613 case BPF_ARSH: b3 = 0xF8; break; 614 } 615 616 if (imm32 == 1) 617 EMIT2(0xD1, add_1reg(b3, dst_reg)); 618 else 619 EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); 620 break; 621 622 case BPF_ALU | BPF_LSH | BPF_X: 623 case BPF_ALU | BPF_RSH | BPF_X: 624 case BPF_ALU | BPF_ARSH | BPF_X: 625 case BPF_ALU64 | BPF_LSH | BPF_X: 626 case BPF_ALU64 | BPF_RSH | BPF_X: 627 case BPF_ALU64 | BPF_ARSH | BPF_X: 628 629 /* Check for bad case when dst_reg == rcx */ 630 if (dst_reg == BPF_REG_4) { 631 /* mov r11, dst_reg */ 632 EMIT_mov(AUX_REG, dst_reg); 633 dst_reg = AUX_REG; 634 } 635 636 if (src_reg != BPF_REG_4) { /* common case */ 637 EMIT1(0x51); /* push rcx */ 638 639 /* mov rcx, src_reg */ 640 EMIT_mov(BPF_REG_4, src_reg); 641 } 642 643 /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ 644 if (BPF_CLASS(insn->code) == BPF_ALU64) 645 EMIT1(add_1mod(0x48, dst_reg)); 646 else if (is_ereg(dst_reg)) 647 EMIT1(add_1mod(0x40, dst_reg)); 648 649 switch (BPF_OP(insn->code)) { 650 case BPF_LSH: b3 = 0xE0; break; 651 case BPF_RSH: b3 = 0xE8; break; 652 case BPF_ARSH: b3 = 0xF8; break; 653 } 654 EMIT2(0xD3, add_1reg(b3, dst_reg)); 655 656 if (src_reg != BPF_REG_4) 657 EMIT1(0x59); /* pop rcx */ 658 659 if (insn->dst_reg == BPF_REG_4) 660 /* mov dst_reg, r11 */ 661 EMIT_mov(insn->dst_reg, AUX_REG); 662 break; 663 664 case BPF_ALU | BPF_END | BPF_FROM_BE: 665 switch (imm32) { 666 case 16: 667 /* Emit 'ror %ax, 8' to swap lower 2 bytes */ 668 EMIT1(0x66); 669 if (is_ereg(dst_reg)) 670 EMIT1(0x41); 671 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); 672 673 /* Emit 'movzwl eax, ax' */ 674 if (is_ereg(dst_reg)) 675 EMIT3(0x45, 0x0F, 0xB7); 676 else 677 EMIT2(0x0F, 0xB7); 678 EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 679 break; 680 case 32: 681 /* Emit 'bswap eax' to swap lower 4 bytes */ 682 if (is_ereg(dst_reg)) 683 EMIT2(0x41, 0x0F); 684 else 685 EMIT1(0x0F); 686 EMIT1(add_1reg(0xC8, dst_reg)); 687 break; 688 case 64: 689 /* Emit 'bswap rax' to swap 8 bytes */ 690 EMIT3(add_1mod(0x48, dst_reg), 0x0F, 691 add_1reg(0xC8, dst_reg)); 692 break; 693 } 694 break; 695 696 case BPF_ALU | BPF_END | BPF_FROM_LE: 697 switch (imm32) { 698 case 16: 699 /* 700 * Emit 'movzwl eax, ax' to zero extend 16-bit 701 * into 64 bit 702 */ 703 if (is_ereg(dst_reg)) 704 EMIT3(0x45, 0x0F, 0xB7); 705 else 706 EMIT2(0x0F, 0xB7); 707 EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 708 break; 709 case 32: 710 /* Emit 'mov eax, eax' to clear upper 32-bits */ 711 if (is_ereg(dst_reg)) 712 EMIT1(0x45); 713 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); 714 break; 715 case 64: 716 /* nop */ 717 break; 718 } 719 break; 720 721 /* ST: *(u8*)(dst_reg + off) = imm */ 722 case BPF_ST | BPF_MEM | BPF_B: 723 if (is_ereg(dst_reg)) 724 EMIT2(0x41, 0xC6); 725 else 726 EMIT1(0xC6); 727 goto st; 728 case BPF_ST | BPF_MEM | BPF_H: 729 if (is_ereg(dst_reg)) 730 EMIT3(0x66, 0x41, 0xC7); 731 else 732 EMIT2(0x66, 0xC7); 733 goto st; 734 case BPF_ST | BPF_MEM | BPF_W: 735 if (is_ereg(dst_reg)) 736 EMIT2(0x41, 0xC7); 737 else 738 EMIT1(0xC7); 739 goto st; 740 case BPF_ST | BPF_MEM | BPF_DW: 741 EMIT2(add_1mod(0x48, dst_reg), 0xC7); 742 743 st: if (is_imm8(insn->off)) 744 EMIT2(add_1reg(0x40, dst_reg), insn->off); 745 else 746 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); 747 748 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); 749 break; 750 751 /* STX: *(u8*)(dst_reg + off) = src_reg */ 752 case BPF_STX | BPF_MEM | BPF_B: 753 /* Emit 'mov byte ptr [rax + off], al' */ 754 if (is_ereg(dst_reg) || is_ereg(src_reg) || 755 /* We have to add extra byte for x86 SIL, DIL regs */ 756 src_reg == BPF_REG_1 || src_reg == BPF_REG_2) 757 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 758 else 759 EMIT1(0x88); 760 goto stx; 761 case BPF_STX | BPF_MEM | BPF_H: 762 if (is_ereg(dst_reg) || is_ereg(src_reg)) 763 EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 764 else 765 EMIT2(0x66, 0x89); 766 goto stx; 767 case BPF_STX | BPF_MEM | BPF_W: 768 if (is_ereg(dst_reg) || is_ereg(src_reg)) 769 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 770 else 771 EMIT1(0x89); 772 goto stx; 773 case BPF_STX | BPF_MEM | BPF_DW: 774 EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 775 stx: if (is_imm8(insn->off)) 776 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 777 else 778 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 779 insn->off); 780 break; 781 782 /* LDX: dst_reg = *(u8*)(src_reg + off) */ 783 case BPF_LDX | BPF_MEM | BPF_B: 784 /* Emit 'movzx rax, byte ptr [rax + off]' */ 785 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 786 goto ldx; 787 case BPF_LDX | BPF_MEM | BPF_H: 788 /* Emit 'movzx rax, word ptr [rax + off]' */ 789 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 790 goto ldx; 791 case BPF_LDX | BPF_MEM | BPF_W: 792 /* Emit 'mov eax, dword ptr [rax+0x14]' */ 793 if (is_ereg(dst_reg) || is_ereg(src_reg)) 794 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 795 else 796 EMIT1(0x8B); 797 goto ldx; 798 case BPF_LDX | BPF_MEM | BPF_DW: 799 /* Emit 'mov rax, qword ptr [rax+0x14]' */ 800 EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 801 ldx: /* 802 * If insn->off == 0 we can save one extra byte, but 803 * special case of x86 R13 which always needs an offset 804 * is not worth the hassle 805 */ 806 if (is_imm8(insn->off)) 807 EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); 808 else 809 EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), 810 insn->off); 811 break; 812 813 /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ 814 case BPF_STX | BPF_XADD | BPF_W: 815 /* Emit 'lock add dword ptr [rax + off], eax' */ 816 if (is_ereg(dst_reg) || is_ereg(src_reg)) 817 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); 818 else 819 EMIT2(0xF0, 0x01); 820 goto xadd; 821 case BPF_STX | BPF_XADD | BPF_DW: 822 EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); 823 xadd: if (is_imm8(insn->off)) 824 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 825 else 826 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 827 insn->off); 828 break; 829 830 /* call */ 831 case BPF_JMP | BPF_CALL: 832 func = (u8 *) __bpf_call_base + imm32; 833 jmp_offset = func - (image + addrs[i]); 834 if (!imm32 || !is_simm32(jmp_offset)) { 835 pr_err("unsupported BPF func %d addr %p image %p\n", 836 imm32, func, image); 837 return -EINVAL; 838 } 839 EMIT1_off32(0xE8, jmp_offset); 840 break; 841 842 case BPF_JMP | BPF_TAIL_CALL: 843 emit_bpf_tail_call(&prog); 844 break; 845 846 /* cond jump */ 847 case BPF_JMP | BPF_JEQ | BPF_X: 848 case BPF_JMP | BPF_JNE | BPF_X: 849 case BPF_JMP | BPF_JGT | BPF_X: 850 case BPF_JMP | BPF_JLT | BPF_X: 851 case BPF_JMP | BPF_JGE | BPF_X: 852 case BPF_JMP | BPF_JLE | BPF_X: 853 case BPF_JMP | BPF_JSGT | BPF_X: 854 case BPF_JMP | BPF_JSLT | BPF_X: 855 case BPF_JMP | BPF_JSGE | BPF_X: 856 case BPF_JMP | BPF_JSLE | BPF_X: 857 case BPF_JMP32 | BPF_JEQ | BPF_X: 858 case BPF_JMP32 | BPF_JNE | BPF_X: 859 case BPF_JMP32 | BPF_JGT | BPF_X: 860 case BPF_JMP32 | BPF_JLT | BPF_X: 861 case BPF_JMP32 | BPF_JGE | BPF_X: 862 case BPF_JMP32 | BPF_JLE | BPF_X: 863 case BPF_JMP32 | BPF_JSGT | BPF_X: 864 case BPF_JMP32 | BPF_JSLT | BPF_X: 865 case BPF_JMP32 | BPF_JSGE | BPF_X: 866 case BPF_JMP32 | BPF_JSLE | BPF_X: 867 /* cmp dst_reg, src_reg */ 868 if (BPF_CLASS(insn->code) == BPF_JMP) 869 EMIT1(add_2mod(0x48, dst_reg, src_reg)); 870 else if (is_ereg(dst_reg) || is_ereg(src_reg)) 871 EMIT1(add_2mod(0x40, dst_reg, src_reg)); 872 EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); 873 goto emit_cond_jmp; 874 875 case BPF_JMP | BPF_JSET | BPF_X: 876 case BPF_JMP32 | BPF_JSET | BPF_X: 877 /* test dst_reg, src_reg */ 878 if (BPF_CLASS(insn->code) == BPF_JMP) 879 EMIT1(add_2mod(0x48, dst_reg, src_reg)); 880 else if (is_ereg(dst_reg) || is_ereg(src_reg)) 881 EMIT1(add_2mod(0x40, dst_reg, src_reg)); 882 EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); 883 goto emit_cond_jmp; 884 885 case BPF_JMP | BPF_JSET | BPF_K: 886 case BPF_JMP32 | BPF_JSET | BPF_K: 887 /* test dst_reg, imm32 */ 888 if (BPF_CLASS(insn->code) == BPF_JMP) 889 EMIT1(add_1mod(0x48, dst_reg)); 890 else if (is_ereg(dst_reg)) 891 EMIT1(add_1mod(0x40, dst_reg)); 892 EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); 893 goto emit_cond_jmp; 894 895 case BPF_JMP | BPF_JEQ | BPF_K: 896 case BPF_JMP | BPF_JNE | BPF_K: 897 case BPF_JMP | BPF_JGT | BPF_K: 898 case BPF_JMP | BPF_JLT | BPF_K: 899 case BPF_JMP | BPF_JGE | BPF_K: 900 case BPF_JMP | BPF_JLE | BPF_K: 901 case BPF_JMP | BPF_JSGT | BPF_K: 902 case BPF_JMP | BPF_JSLT | BPF_K: 903 case BPF_JMP | BPF_JSGE | BPF_K: 904 case BPF_JMP | BPF_JSLE | BPF_K: 905 case BPF_JMP32 | BPF_JEQ | BPF_K: 906 case BPF_JMP32 | BPF_JNE | BPF_K: 907 case BPF_JMP32 | BPF_JGT | BPF_K: 908 case BPF_JMP32 | BPF_JLT | BPF_K: 909 case BPF_JMP32 | BPF_JGE | BPF_K: 910 case BPF_JMP32 | BPF_JLE | BPF_K: 911 case BPF_JMP32 | BPF_JSGT | BPF_K: 912 case BPF_JMP32 | BPF_JSLT | BPF_K: 913 case BPF_JMP32 | BPF_JSGE | BPF_K: 914 case BPF_JMP32 | BPF_JSLE | BPF_K: 915 /* cmp dst_reg, imm8/32 */ 916 if (BPF_CLASS(insn->code) == BPF_JMP) 917 EMIT1(add_1mod(0x48, dst_reg)); 918 else if (is_ereg(dst_reg)) 919 EMIT1(add_1mod(0x40, dst_reg)); 920 921 if (is_imm8(imm32)) 922 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); 923 else 924 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); 925 926 emit_cond_jmp: /* Convert BPF opcode to x86 */ 927 switch (BPF_OP(insn->code)) { 928 case BPF_JEQ: 929 jmp_cond = X86_JE; 930 break; 931 case BPF_JSET: 932 case BPF_JNE: 933 jmp_cond = X86_JNE; 934 break; 935 case BPF_JGT: 936 /* GT is unsigned '>', JA in x86 */ 937 jmp_cond = X86_JA; 938 break; 939 case BPF_JLT: 940 /* LT is unsigned '<', JB in x86 */ 941 jmp_cond = X86_JB; 942 break; 943 case BPF_JGE: 944 /* GE is unsigned '>=', JAE in x86 */ 945 jmp_cond = X86_JAE; 946 break; 947 case BPF_JLE: 948 /* LE is unsigned '<=', JBE in x86 */ 949 jmp_cond = X86_JBE; 950 break; 951 case BPF_JSGT: 952 /* Signed '>', GT in x86 */ 953 jmp_cond = X86_JG; 954 break; 955 case BPF_JSLT: 956 /* Signed '<', LT in x86 */ 957 jmp_cond = X86_JL; 958 break; 959 case BPF_JSGE: 960 /* Signed '>=', GE in x86 */ 961 jmp_cond = X86_JGE; 962 break; 963 case BPF_JSLE: 964 /* Signed '<=', LE in x86 */ 965 jmp_cond = X86_JLE; 966 break; 967 default: /* to silence GCC warning */ 968 return -EFAULT; 969 } 970 jmp_offset = addrs[i + insn->off] - addrs[i]; 971 if (is_imm8(jmp_offset)) { 972 EMIT2(jmp_cond, jmp_offset); 973 } else if (is_simm32(jmp_offset)) { 974 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); 975 } else { 976 pr_err("cond_jmp gen bug %llx\n", jmp_offset); 977 return -EFAULT; 978 } 979 980 break; 981 982 case BPF_JMP | BPF_JA: 983 if (insn->off == -1) 984 /* -1 jmp instructions will always jump 985 * backwards two bytes. Explicitly handling 986 * this case avoids wasting too many passes 987 * when there are long sequences of replaced 988 * dead code. 989 */ 990 jmp_offset = -2; 991 else 992 jmp_offset = addrs[i + insn->off] - addrs[i]; 993 994 if (!jmp_offset) 995 /* Optimize out nop jumps */ 996 break; 997 emit_jmp: 998 if (is_imm8(jmp_offset)) { 999 EMIT2(0xEB, jmp_offset); 1000 } else if (is_simm32(jmp_offset)) { 1001 EMIT1_off32(0xE9, jmp_offset); 1002 } else { 1003 pr_err("jmp gen bug %llx\n", jmp_offset); 1004 return -EFAULT; 1005 } 1006 break; 1007 1008 case BPF_JMP | BPF_EXIT: 1009 if (seen_exit) { 1010 jmp_offset = ctx->cleanup_addr - addrs[i]; 1011 goto emit_jmp; 1012 } 1013 seen_exit = true; 1014 /* Update cleanup_addr */ 1015 ctx->cleanup_addr = proglen; 1016 if (!bpf_prog_was_classic(bpf_prog)) 1017 EMIT1(0x5B); /* get rid of tail_call_cnt */ 1018 EMIT2(0x41, 0x5F); /* pop r15 */ 1019 EMIT2(0x41, 0x5E); /* pop r14 */ 1020 EMIT2(0x41, 0x5D); /* pop r13 */ 1021 EMIT1(0x5B); /* pop rbx */ 1022 EMIT1(0xC9); /* leave */ 1023 EMIT1(0xC3); /* ret */ 1024 break; 1025 1026 default: 1027 /* 1028 * By design x86-64 JIT should support all BPF instructions. 1029 * This error will be seen if new instruction was added 1030 * to the interpreter, but not to the JIT, or if there is 1031 * junk in bpf_prog. 1032 */ 1033 pr_err("bpf_jit: unknown opcode %02x\n", insn->code); 1034 return -EINVAL; 1035 } 1036 1037 ilen = prog - temp; 1038 if (ilen > BPF_MAX_INSN_SIZE) { 1039 pr_err("bpf_jit: fatal insn size error\n"); 1040 return -EFAULT; 1041 } 1042 1043 if (image) { 1044 if (unlikely(proglen + ilen > oldproglen)) { 1045 pr_err("bpf_jit: fatal error\n"); 1046 return -EFAULT; 1047 } 1048 memcpy(image + proglen, temp, ilen); 1049 } 1050 proglen += ilen; 1051 addrs[i] = proglen; 1052 prog = temp; 1053 } 1054 return proglen; 1055 } 1056 1057 struct x64_jit_data { 1058 struct bpf_binary_header *header; 1059 int *addrs; 1060 u8 *image; 1061 int proglen; 1062 struct jit_context ctx; 1063 }; 1064 1065 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1066 { 1067 struct bpf_binary_header *header = NULL; 1068 struct bpf_prog *tmp, *orig_prog = prog; 1069 struct x64_jit_data *jit_data; 1070 int proglen, oldproglen = 0; 1071 struct jit_context ctx = {}; 1072 bool tmp_blinded = false; 1073 bool extra_pass = false; 1074 u8 *image = NULL; 1075 int *addrs; 1076 int pass; 1077 int i; 1078 1079 if (!prog->jit_requested) 1080 return orig_prog; 1081 1082 tmp = bpf_jit_blind_constants(prog); 1083 /* 1084 * If blinding was requested and we failed during blinding, 1085 * we must fall back to the interpreter. 1086 */ 1087 if (IS_ERR(tmp)) 1088 return orig_prog; 1089 if (tmp != prog) { 1090 tmp_blinded = true; 1091 prog = tmp; 1092 } 1093 1094 jit_data = prog->aux->jit_data; 1095 if (!jit_data) { 1096 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1097 if (!jit_data) { 1098 prog = orig_prog; 1099 goto out; 1100 } 1101 prog->aux->jit_data = jit_data; 1102 } 1103 addrs = jit_data->addrs; 1104 if (addrs) { 1105 ctx = jit_data->ctx; 1106 oldproglen = jit_data->proglen; 1107 image = jit_data->image; 1108 header = jit_data->header; 1109 extra_pass = true; 1110 goto skip_init_addrs; 1111 } 1112 addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL); 1113 if (!addrs) { 1114 prog = orig_prog; 1115 goto out_addrs; 1116 } 1117 1118 /* 1119 * Before first pass, make a rough estimation of addrs[] 1120 * each BPF instruction is translated to less than 64 bytes 1121 */ 1122 for (proglen = 0, i = 0; i < prog->len; i++) { 1123 proglen += 64; 1124 addrs[i] = proglen; 1125 } 1126 ctx.cleanup_addr = proglen; 1127 skip_init_addrs: 1128 1129 /* 1130 * JITed image shrinks with every pass and the loop iterates 1131 * until the image stops shrinking. Very large BPF programs 1132 * may converge on the last pass. In such case do one more 1133 * pass to emit the final image. 1134 */ 1135 for (pass = 0; pass < 20 || image; pass++) { 1136 proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 1137 if (proglen <= 0) { 1138 out_image: 1139 image = NULL; 1140 if (header) 1141 bpf_jit_binary_free(header); 1142 prog = orig_prog; 1143 goto out_addrs; 1144 } 1145 if (image) { 1146 if (proglen != oldproglen) { 1147 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", 1148 proglen, oldproglen); 1149 goto out_image; 1150 } 1151 break; 1152 } 1153 if (proglen == oldproglen) { 1154 header = bpf_jit_binary_alloc(proglen, &image, 1155 1, jit_fill_hole); 1156 if (!header) { 1157 prog = orig_prog; 1158 goto out_addrs; 1159 } 1160 } 1161 oldproglen = proglen; 1162 cond_resched(); 1163 } 1164 1165 if (bpf_jit_enable > 1) 1166 bpf_jit_dump(prog->len, proglen, pass + 1, image); 1167 1168 if (image) { 1169 if (!prog->is_func || extra_pass) { 1170 bpf_jit_binary_lock_ro(header); 1171 } else { 1172 jit_data->addrs = addrs; 1173 jit_data->ctx = ctx; 1174 jit_data->proglen = proglen; 1175 jit_data->image = image; 1176 jit_data->header = header; 1177 } 1178 prog->bpf_func = (void *)image; 1179 prog->jited = 1; 1180 prog->jited_len = proglen; 1181 } else { 1182 prog = orig_prog; 1183 } 1184 1185 if (!image || !prog->is_func || extra_pass) { 1186 if (image) 1187 bpf_prog_fill_jited_linfo(prog, addrs); 1188 out_addrs: 1189 kfree(addrs); 1190 kfree(jit_data); 1191 prog->aux->jit_data = NULL; 1192 } 1193 out: 1194 if (tmp_blinded) 1195 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1196 tmp : orig_prog); 1197 return prog; 1198 } 1199