1 // SPDX-License-Identifier: GPL-2.0-only 2 #define pr_fmt(fmt) "SMP alternatives: " fmt 3 4 #include <linux/module.h> 5 #include <linux/sched.h> 6 #include <linux/perf_event.h> 7 #include <linux/mutex.h> 8 #include <linux/list.h> 9 #include <linux/stringify.h> 10 #include <linux/highmem.h> 11 #include <linux/mm.h> 12 #include <linux/vmalloc.h> 13 #include <linux/memory.h> 14 #include <linux/stop_machine.h> 15 #include <linux/slab.h> 16 #include <linux/kdebug.h> 17 #include <linux/kprobes.h> 18 #include <linux/mmu_context.h> 19 #include <linux/bsearch.h> 20 #include <linux/sync_core.h> 21 #include <asm/text-patching.h> 22 #include <asm/alternative.h> 23 #include <asm/sections.h> 24 #include <asm/mce.h> 25 #include <asm/nmi.h> 26 #include <asm/cacheflush.h> 27 #include <asm/tlbflush.h> 28 #include <asm/insn.h> 29 #include <asm/io.h> 30 #include <asm/fixmap.h> 31 #include <asm/paravirt.h> 32 #include <asm/asm-prototypes.h> 33 34 int __read_mostly alternatives_patched; 35 36 EXPORT_SYMBOL_GPL(alternatives_patched); 37 38 #define MAX_PATCH_LEN (255-1) 39 40 static int __initdata_or_module debug_alternative; 41 42 static int __init debug_alt(char *str) 43 { 44 debug_alternative = 1; 45 return 1; 46 } 47 __setup("debug-alternative", debug_alt); 48 49 static int noreplace_smp; 50 51 static int __init setup_noreplace_smp(char *str) 52 { 53 noreplace_smp = 1; 54 return 1; 55 } 56 __setup("noreplace-smp", setup_noreplace_smp); 57 58 #define DPRINTK(fmt, args...) \ 59 do { \ 60 if (debug_alternative) \ 61 printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \ 62 } while (0) 63 64 #define DUMP_BYTES(buf, len, fmt, args...) \ 65 do { \ 66 if (unlikely(debug_alternative)) { \ 67 int j; \ 68 \ 69 if (!(len)) \ 70 break; \ 71 \ 72 printk(KERN_DEBUG pr_fmt(fmt), ##args); \ 73 for (j = 0; j < (len) - 1; j++) \ 74 printk(KERN_CONT "%02hhx ", buf[j]); \ 75 printk(KERN_CONT "%02hhx\n", buf[j]); \ 76 } \ 77 } while (0) 78 79 static const unsigned char x86nops[] = 80 { 81 BYTES_NOP1, 82 BYTES_NOP2, 83 BYTES_NOP3, 84 BYTES_NOP4, 85 BYTES_NOP5, 86 BYTES_NOP6, 87 BYTES_NOP7, 88 BYTES_NOP8, 89 }; 90 91 const unsigned char * const x86_nops[ASM_NOP_MAX+1] = 92 { 93 NULL, 94 x86nops, 95 x86nops + 1, 96 x86nops + 1 + 2, 97 x86nops + 1 + 2 + 3, 98 x86nops + 1 + 2 + 3 + 4, 99 x86nops + 1 + 2 + 3 + 4 + 5, 100 x86nops + 1 + 2 + 3 + 4 + 5 + 6, 101 x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 102 }; 103 104 /* Use this to add nops to a buffer, then text_poke the whole buffer. */ 105 static void __init_or_module add_nops(void *insns, unsigned int len) 106 { 107 while (len > 0) { 108 unsigned int noplen = len; 109 if (noplen > ASM_NOP_MAX) 110 noplen = ASM_NOP_MAX; 111 memcpy(insns, x86_nops[noplen], noplen); 112 insns += noplen; 113 len -= noplen; 114 } 115 } 116 117 extern s32 __retpoline_sites[], __retpoline_sites_end[]; 118 extern s32 __return_sites[], __return_sites_end[]; 119 extern s32 __cfi_sites[], __cfi_sites_end[]; 120 extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; 121 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 122 extern s32 __smp_locks[], __smp_locks_end[]; 123 void text_poke_early(void *addr, const void *opcode, size_t len); 124 125 /* 126 * Are we looking at a near JMP with a 1 or 4-byte displacement. 127 */ 128 static inline bool is_jmp(const u8 opcode) 129 { 130 return opcode == 0xeb || opcode == 0xe9; 131 } 132 133 static void __init_or_module 134 recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff) 135 { 136 u8 *next_rip, *tgt_rip; 137 s32 n_dspl, o_dspl; 138 int repl_len; 139 140 if (a->replacementlen != 5) 141 return; 142 143 o_dspl = *(s32 *)(insn_buff + 1); 144 145 /* next_rip of the replacement JMP */ 146 next_rip = repl_insn + a->replacementlen; 147 /* target rip of the replacement JMP */ 148 tgt_rip = next_rip + o_dspl; 149 n_dspl = tgt_rip - orig_insn; 150 151 DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); 152 153 if (tgt_rip - orig_insn >= 0) { 154 if (n_dspl - 2 <= 127) 155 goto two_byte_jmp; 156 else 157 goto five_byte_jmp; 158 /* negative offset */ 159 } else { 160 if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) 161 goto two_byte_jmp; 162 else 163 goto five_byte_jmp; 164 } 165 166 two_byte_jmp: 167 n_dspl -= 2; 168 169 insn_buff[0] = 0xeb; 170 insn_buff[1] = (s8)n_dspl; 171 add_nops(insn_buff + 2, 3); 172 173 repl_len = 2; 174 goto done; 175 176 five_byte_jmp: 177 n_dspl -= 5; 178 179 insn_buff[0] = 0xe9; 180 *(s32 *)&insn_buff[1] = n_dspl; 181 182 repl_len = 5; 183 184 done: 185 186 DPRINTK("final displ: 0x%08x, JMP 0x%lx", 187 n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); 188 } 189 190 /* 191 * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) 192 * 193 * @instr: instruction byte stream 194 * @instrlen: length of the above 195 * @off: offset within @instr where the first NOP has been detected 196 * 197 * Return: number of NOPs found (and replaced). 198 */ 199 static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) 200 { 201 unsigned long flags; 202 int i = off, nnops; 203 204 while (i < instrlen) { 205 if (instr[i] != 0x90) 206 break; 207 208 i++; 209 } 210 211 nnops = i - off; 212 213 if (nnops <= 1) 214 return nnops; 215 216 local_irq_save(flags); 217 add_nops(instr + off, nnops); 218 local_irq_restore(flags); 219 220 DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); 221 222 return nnops; 223 } 224 225 /* 226 * "noinline" to cause control flow change and thus invalidate I$ and 227 * cause refetch after modification. 228 */ 229 static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) 230 { 231 struct insn insn; 232 int i = 0; 233 234 /* 235 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger 236 * ones. 237 */ 238 for (;;) { 239 if (insn_decode_kernel(&insn, &instr[i])) 240 return; 241 242 /* 243 * See if this and any potentially following NOPs can be 244 * optimized. 245 */ 246 if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) 247 i += optimize_nops_range(instr, len, i); 248 else 249 i += insn.length; 250 251 if (i >= len) 252 return; 253 } 254 } 255 256 /* 257 * Replace instructions with better alternatives for this CPU type. This runs 258 * before SMP is initialized to avoid SMP problems with self modifying code. 259 * This implies that asymmetric systems where APs have less capabilities than 260 * the boot processor are not handled. Tough. Make sure you disable such 261 * features by hand. 262 * 263 * Marked "noinline" to cause control flow change and thus insn cache 264 * to refetch changed I$ lines. 265 */ 266 void __init_or_module noinline apply_alternatives(struct alt_instr *start, 267 struct alt_instr *end) 268 { 269 struct alt_instr *a; 270 u8 *instr, *replacement; 271 u8 insn_buff[MAX_PATCH_LEN]; 272 273 DPRINTK("alt table %px, -> %px", start, end); 274 /* 275 * The scan order should be from start to end. A later scanned 276 * alternative code can overwrite previously scanned alternative code. 277 * Some kernel functions (e.g. memcpy, memset, etc) use this order to 278 * patch code. 279 * 280 * So be careful if you want to change the scan order to any other 281 * order. 282 */ 283 for (a = start; a < end; a++) { 284 int insn_buff_sz = 0; 285 286 instr = (u8 *)&a->instr_offset + a->instr_offset; 287 replacement = (u8 *)&a->repl_offset + a->repl_offset; 288 BUG_ON(a->instrlen > sizeof(insn_buff)); 289 BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); 290 291 /* 292 * Patch if either: 293 * - feature is present 294 * - feature not present but ALT_FLAG_NOT is set to mean, 295 * patch if feature is *NOT* present. 296 */ 297 if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) 298 goto next; 299 300 DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", 301 (a->flags & ALT_FLAG_NOT) ? "!" : "", 302 a->cpuid >> 5, 303 a->cpuid & 0x1f, 304 instr, instr, a->instrlen, 305 replacement, a->replacementlen); 306 307 DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); 308 DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); 309 310 memcpy(insn_buff, replacement, a->replacementlen); 311 insn_buff_sz = a->replacementlen; 312 313 /* 314 * 0xe8 is a relative jump; fix the offset. 315 * 316 * Instruction length is checked before the opcode to avoid 317 * accessing uninitialized bytes for zero-length replacements. 318 */ 319 if (a->replacementlen == 5 && *insn_buff == 0xe8) { 320 *(s32 *)(insn_buff + 1) += replacement - instr; 321 DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", 322 *(s32 *)(insn_buff + 1), 323 (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5); 324 } 325 326 if (a->replacementlen && is_jmp(replacement[0])) 327 recompute_jump(a, instr, replacement, insn_buff); 328 329 for (; insn_buff_sz < a->instrlen; insn_buff_sz++) 330 insn_buff[insn_buff_sz] = 0x90; 331 332 DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); 333 334 text_poke_early(instr, insn_buff, insn_buff_sz); 335 336 next: 337 optimize_nops(instr, a->instrlen); 338 } 339 } 340 341 static inline bool is_jcc32(struct insn *insn) 342 { 343 /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ 344 return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; 345 } 346 347 #if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL) 348 349 /* 350 * CALL/JMP *%\reg 351 */ 352 static int emit_indirect(int op, int reg, u8 *bytes) 353 { 354 int i = 0; 355 u8 modrm; 356 357 switch (op) { 358 case CALL_INSN_OPCODE: 359 modrm = 0x10; /* Reg = 2; CALL r/m */ 360 break; 361 362 case JMP32_INSN_OPCODE: 363 modrm = 0x20; /* Reg = 4; JMP r/m */ 364 break; 365 366 default: 367 WARN_ON_ONCE(1); 368 return -1; 369 } 370 371 if (reg >= 8) { 372 bytes[i++] = 0x41; /* REX.B prefix */ 373 reg -= 8; 374 } 375 376 modrm |= 0xc0; /* Mod = 3 */ 377 modrm += reg; 378 379 bytes[i++] = 0xff; /* opcode */ 380 bytes[i++] = modrm; 381 382 return i; 383 } 384 385 static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) 386 { 387 u8 op = insn->opcode.bytes[0]; 388 int i = 0; 389 390 /* 391 * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional 392 * tail-calls. Deal with them. 393 */ 394 if (is_jcc32(insn)) { 395 bytes[i++] = op; 396 op = insn->opcode.bytes[1]; 397 goto clang_jcc; 398 } 399 400 if (insn->length == 6) 401 bytes[i++] = 0x2e; /* CS-prefix */ 402 403 switch (op) { 404 case CALL_INSN_OPCODE: 405 __text_gen_insn(bytes+i, op, addr+i, 406 __x86_indirect_call_thunk_array[reg], 407 CALL_INSN_SIZE); 408 i += CALL_INSN_SIZE; 409 break; 410 411 case JMP32_INSN_OPCODE: 412 clang_jcc: 413 __text_gen_insn(bytes+i, op, addr+i, 414 __x86_indirect_jump_thunk_array[reg], 415 JMP32_INSN_SIZE); 416 i += JMP32_INSN_SIZE; 417 break; 418 419 default: 420 WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr); 421 return -1; 422 } 423 424 WARN_ON_ONCE(i != insn->length); 425 426 return i; 427 } 428 429 /* 430 * Rewrite the compiler generated retpoline thunk calls. 431 * 432 * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate 433 * indirect instructions, avoiding the extra indirection. 434 * 435 * For example, convert: 436 * 437 * CALL __x86_indirect_thunk_\reg 438 * 439 * into: 440 * 441 * CALL *%\reg 442 * 443 * It also tries to inline spectre_v2=retpoline,lfence when size permits. 444 */ 445 static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) 446 { 447 retpoline_thunk_t *target; 448 int reg, ret, i = 0; 449 u8 op, cc; 450 451 target = addr + insn->length + insn->immediate.value; 452 reg = target - __x86_indirect_thunk_array; 453 454 if (WARN_ON_ONCE(reg & ~0xf)) 455 return -1; 456 457 /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ 458 BUG_ON(reg == 4); 459 460 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && 461 !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { 462 if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) 463 return emit_call_track_retpoline(addr, insn, reg, bytes); 464 465 return -1; 466 } 467 468 op = insn->opcode.bytes[0]; 469 470 /* 471 * Convert: 472 * 473 * Jcc.d32 __x86_indirect_thunk_\reg 474 * 475 * into: 476 * 477 * Jncc.d8 1f 478 * [ LFENCE ] 479 * JMP *%\reg 480 * [ NOP ] 481 * 1: 482 */ 483 if (is_jcc32(insn)) { 484 cc = insn->opcode.bytes[1] & 0xf; 485 cc ^= 1; /* invert condition */ 486 487 bytes[i++] = 0x70 + cc; /* Jcc.d8 */ 488 bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */ 489 490 /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */ 491 op = JMP32_INSN_OPCODE; 492 } 493 494 /* 495 * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE. 496 */ 497 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { 498 bytes[i++] = 0x0f; 499 bytes[i++] = 0xae; 500 bytes[i++] = 0xe8; /* LFENCE */ 501 } 502 503 ret = emit_indirect(op, reg, bytes + i); 504 if (ret < 0) 505 return ret; 506 i += ret; 507 508 /* 509 * The compiler is supposed to EMIT an INT3 after every unconditional 510 * JMP instruction due to AMD BTC. However, if the compiler is too old 511 * or SLS isn't enabled, we still need an INT3 after indirect JMPs 512 * even on Intel. 513 */ 514 if (op == JMP32_INSN_OPCODE && i < insn->length) 515 bytes[i++] = INT3_INSN_OPCODE; 516 517 for (; i < insn->length;) 518 bytes[i++] = BYTES_NOP1; 519 520 return i; 521 } 522 523 /* 524 * Generated by 'objtool --retpoline'. 525 */ 526 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) 527 { 528 s32 *s; 529 530 for (s = start; s < end; s++) { 531 void *addr = (void *)s + *s; 532 struct insn insn; 533 int len, ret; 534 u8 bytes[16]; 535 u8 op1, op2; 536 537 ret = insn_decode_kernel(&insn, addr); 538 if (WARN_ON_ONCE(ret < 0)) 539 continue; 540 541 op1 = insn.opcode.bytes[0]; 542 op2 = insn.opcode.bytes[1]; 543 544 switch (op1) { 545 case CALL_INSN_OPCODE: 546 case JMP32_INSN_OPCODE: 547 break; 548 549 case 0x0f: /* escape */ 550 if (op2 >= 0x80 && op2 <= 0x8f) 551 break; 552 fallthrough; 553 default: 554 WARN_ON_ONCE(1); 555 continue; 556 } 557 558 DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", 559 addr, addr, insn.length, 560 addr + insn.length + insn.immediate.value); 561 562 len = patch_retpoline(addr, &insn, bytes); 563 if (len == insn.length) { 564 optimize_nops(bytes, len); 565 DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); 566 DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); 567 text_poke_early(addr, bytes, len); 568 } 569 } 570 } 571 572 #ifdef CONFIG_RETHUNK 573 574 #ifdef CONFIG_CALL_THUNKS 575 void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; 576 #endif 577 578 /* 579 * Rewrite the compiler generated return thunk tail-calls. 580 * 581 * For example, convert: 582 * 583 * JMP __x86_return_thunk 584 * 585 * into: 586 * 587 * RET 588 */ 589 static int patch_return(void *addr, struct insn *insn, u8 *bytes) 590 { 591 int i = 0; 592 593 if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { 594 if (x86_return_thunk == __x86_return_thunk) 595 return -1; 596 597 i = JMP32_INSN_SIZE; 598 __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); 599 } else { 600 bytes[i++] = RET_INSN_OPCODE; 601 } 602 603 for (; i < insn->length;) 604 bytes[i++] = INT3_INSN_OPCODE; 605 return i; 606 } 607 608 void __init_or_module noinline apply_returns(s32 *start, s32 *end) 609 { 610 s32 *s; 611 612 for (s = start; s < end; s++) { 613 void *dest = NULL, *addr = (void *)s + *s; 614 struct insn insn; 615 int len, ret; 616 u8 bytes[16]; 617 u8 op; 618 619 ret = insn_decode_kernel(&insn, addr); 620 if (WARN_ON_ONCE(ret < 0)) 621 continue; 622 623 op = insn.opcode.bytes[0]; 624 if (op == JMP32_INSN_OPCODE) 625 dest = addr + insn.length + insn.immediate.value; 626 627 if (__static_call_fixup(addr, op, dest) || 628 WARN_ONCE(dest != &__x86_return_thunk, 629 "missing return thunk: %pS-%pS: %*ph", 630 addr, dest, 5, addr)) 631 continue; 632 633 DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", 634 addr, addr, insn.length, 635 addr + insn.length + insn.immediate.value); 636 637 len = patch_return(addr, &insn, bytes); 638 if (len == insn.length) { 639 DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); 640 DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); 641 text_poke_early(addr, bytes, len); 642 } 643 } 644 } 645 #else 646 void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } 647 #endif /* CONFIG_RETHUNK */ 648 649 #else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ 650 651 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } 652 void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } 653 654 #endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ 655 656 #ifdef CONFIG_X86_KERNEL_IBT 657 658 static void poison_endbr(void *addr, bool warn) 659 { 660 u32 endbr, poison = gen_endbr_poison(); 661 662 if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) 663 return; 664 665 if (!is_endbr(endbr)) { 666 WARN_ON_ONCE(warn); 667 return; 668 } 669 670 DPRINTK("ENDBR at: %pS (%px)", addr, addr); 671 672 /* 673 * When we have IBT, the lack of ENDBR will trigger #CP 674 */ 675 DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); 676 DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); 677 text_poke_early(addr, &poison, 4); 678 } 679 680 /* 681 * Generated by: objtool --ibt 682 */ 683 void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) 684 { 685 s32 *s; 686 687 for (s = start; s < end; s++) { 688 void *addr = (void *)s + *s; 689 690 poison_endbr(addr, true); 691 if (IS_ENABLED(CONFIG_FINEIBT)) 692 poison_endbr(addr - 16, false); 693 } 694 } 695 696 #else 697 698 void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } 699 700 #endif /* CONFIG_X86_KERNEL_IBT */ 701 702 #ifdef CONFIG_FINEIBT 703 704 enum cfi_mode { 705 CFI_DEFAULT, 706 CFI_OFF, 707 CFI_KCFI, 708 CFI_FINEIBT, 709 }; 710 711 static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT; 712 static bool cfi_rand __ro_after_init = true; 713 static u32 cfi_seed __ro_after_init; 714 715 /* 716 * Re-hash the CFI hash with a boot-time seed while making sure the result is 717 * not a valid ENDBR instruction. 718 */ 719 static u32 cfi_rehash(u32 hash) 720 { 721 hash ^= cfi_seed; 722 while (unlikely(is_endbr(hash) || is_endbr(-hash))) { 723 bool lsb = hash & 1; 724 hash >>= 1; 725 if (lsb) 726 hash ^= 0x80200003; 727 } 728 return hash; 729 } 730 731 static __init int cfi_parse_cmdline(char *str) 732 { 733 if (!str) 734 return -EINVAL; 735 736 while (str) { 737 char *next = strchr(str, ','); 738 if (next) { 739 *next = 0; 740 next++; 741 } 742 743 if (!strcmp(str, "auto")) { 744 cfi_mode = CFI_DEFAULT; 745 } else if (!strcmp(str, "off")) { 746 cfi_mode = CFI_OFF; 747 cfi_rand = false; 748 } else if (!strcmp(str, "kcfi")) { 749 cfi_mode = CFI_KCFI; 750 } else if (!strcmp(str, "fineibt")) { 751 cfi_mode = CFI_FINEIBT; 752 } else if (!strcmp(str, "norand")) { 753 cfi_rand = false; 754 } else { 755 pr_err("Ignoring unknown cfi option (%s).", str); 756 } 757 758 str = next; 759 } 760 761 return 0; 762 } 763 early_param("cfi", cfi_parse_cmdline); 764 765 /* 766 * kCFI FineIBT 767 * 768 * __cfi_\func: __cfi_\func: 769 * movl $0x12345678,%eax // 5 endbr64 // 4 770 * nop subl $0x12345678,%r10d // 7 771 * nop jz 1f // 2 772 * nop ud2 // 2 773 * nop 1: nop // 1 774 * nop 775 * nop 776 * nop 777 * nop 778 * nop 779 * nop 780 * nop 781 * 782 * 783 * caller: caller: 784 * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 785 * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4 786 * je 1f // 2 nop4 // 4 787 * ud2 // 2 788 * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5 789 * 790 */ 791 792 asm( ".pushsection .rodata \n" 793 "fineibt_preamble_start: \n" 794 " endbr64 \n" 795 " subl $0x12345678, %r10d \n" 796 " je fineibt_preamble_end \n" 797 " ud2 \n" 798 " nop \n" 799 "fineibt_preamble_end: \n" 800 ".popsection\n" 801 ); 802 803 extern u8 fineibt_preamble_start[]; 804 extern u8 fineibt_preamble_end[]; 805 806 #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) 807 #define fineibt_preamble_hash 7 808 809 asm( ".pushsection .rodata \n" 810 "fineibt_caller_start: \n" 811 " movl $0x12345678, %r10d \n" 812 " sub $16, %r11 \n" 813 ASM_NOP4 814 "fineibt_caller_end: \n" 815 ".popsection \n" 816 ); 817 818 extern u8 fineibt_caller_start[]; 819 extern u8 fineibt_caller_end[]; 820 821 #define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) 822 #define fineibt_caller_hash 2 823 824 #define fineibt_caller_jmp (fineibt_caller_size - 2) 825 826 static u32 decode_preamble_hash(void *addr) 827 { 828 u8 *p = addr; 829 830 /* b8 78 56 34 12 mov $0x12345678,%eax */ 831 if (p[0] == 0xb8) 832 return *(u32 *)(addr + 1); 833 834 return 0; /* invalid hash value */ 835 } 836 837 static u32 decode_caller_hash(void *addr) 838 { 839 u8 *p = addr; 840 841 /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */ 842 if (p[0] == 0x41 && p[1] == 0xba) 843 return -*(u32 *)(addr + 2); 844 845 /* e8 0c 78 56 34 12 jmp.d8 +12 */ 846 if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp) 847 return -*(u32 *)(addr + 2); 848 849 return 0; /* invalid hash value */ 850 } 851 852 /* .retpoline_sites */ 853 static int cfi_disable_callers(s32 *start, s32 *end) 854 { 855 /* 856 * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate 857 * in tact for later usage. Also see decode_caller_hash() and 858 * cfi_rewrite_callers(). 859 */ 860 const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp }; 861 s32 *s; 862 863 for (s = start; s < end; s++) { 864 void *addr = (void *)s + *s; 865 u32 hash; 866 867 addr -= fineibt_caller_size; 868 hash = decode_caller_hash(addr); 869 if (!hash) /* nocfi callers */ 870 continue; 871 872 text_poke_early(addr, jmp, 2); 873 } 874 875 return 0; 876 } 877 878 static int cfi_enable_callers(s32 *start, s32 *end) 879 { 880 /* 881 * Re-enable kCFI, undo what cfi_disable_callers() did. 882 */ 883 const u8 mov[] = { 0x41, 0xba }; 884 s32 *s; 885 886 for (s = start; s < end; s++) { 887 void *addr = (void *)s + *s; 888 u32 hash; 889 890 addr -= fineibt_caller_size; 891 hash = decode_caller_hash(addr); 892 if (!hash) /* nocfi callers */ 893 continue; 894 895 text_poke_early(addr, mov, 2); 896 } 897 898 return 0; 899 } 900 901 /* .cfi_sites */ 902 static int cfi_rand_preamble(s32 *start, s32 *end) 903 { 904 s32 *s; 905 906 for (s = start; s < end; s++) { 907 void *addr = (void *)s + *s; 908 u32 hash; 909 910 hash = decode_preamble_hash(addr); 911 if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", 912 addr, addr, 5, addr)) 913 return -EINVAL; 914 915 hash = cfi_rehash(hash); 916 text_poke_early(addr + 1, &hash, 4); 917 } 918 919 return 0; 920 } 921 922 static int cfi_rewrite_preamble(s32 *start, s32 *end) 923 { 924 s32 *s; 925 926 for (s = start; s < end; s++) { 927 void *addr = (void *)s + *s; 928 u32 hash; 929 930 hash = decode_preamble_hash(addr); 931 if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", 932 addr, addr, 5, addr)) 933 return -EINVAL; 934 935 text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); 936 WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); 937 text_poke_early(addr + fineibt_preamble_hash, &hash, 4); 938 } 939 940 return 0; 941 } 942 943 /* .retpoline_sites */ 944 static int cfi_rand_callers(s32 *start, s32 *end) 945 { 946 s32 *s; 947 948 for (s = start; s < end; s++) { 949 void *addr = (void *)s + *s; 950 u32 hash; 951 952 addr -= fineibt_caller_size; 953 hash = decode_caller_hash(addr); 954 if (hash) { 955 hash = -cfi_rehash(hash); 956 text_poke_early(addr + 2, &hash, 4); 957 } 958 } 959 960 return 0; 961 } 962 963 static int cfi_rewrite_callers(s32 *start, s32 *end) 964 { 965 s32 *s; 966 967 for (s = start; s < end; s++) { 968 void *addr = (void *)s + *s; 969 u32 hash; 970 971 addr -= fineibt_caller_size; 972 hash = decode_caller_hash(addr); 973 if (hash) { 974 text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); 975 WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); 976 text_poke_early(addr + fineibt_caller_hash, &hash, 4); 977 } 978 /* rely on apply_retpolines() */ 979 } 980 981 return 0; 982 } 983 984 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 985 s32 *start_cfi, s32 *end_cfi, bool builtin) 986 { 987 int ret; 988 989 if (WARN_ONCE(fineibt_preamble_size != 16, 990 "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) 991 return; 992 993 if (cfi_mode == CFI_DEFAULT) { 994 cfi_mode = CFI_KCFI; 995 if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) 996 cfi_mode = CFI_FINEIBT; 997 } 998 999 /* 1000 * Rewrite the callers to not use the __cfi_ stubs, such that we might 1001 * rewrite them. This disables all CFI. If this succeeds but any of the 1002 * later stages fails, we're without CFI. 1003 */ 1004 ret = cfi_disable_callers(start_retpoline, end_retpoline); 1005 if (ret) 1006 goto err; 1007 1008 if (cfi_rand) { 1009 if (builtin) 1010 cfi_seed = get_random_u32(); 1011 1012 ret = cfi_rand_preamble(start_cfi, end_cfi); 1013 if (ret) 1014 goto err; 1015 1016 ret = cfi_rand_callers(start_retpoline, end_retpoline); 1017 if (ret) 1018 goto err; 1019 } 1020 1021 switch (cfi_mode) { 1022 case CFI_OFF: 1023 if (builtin) 1024 pr_info("Disabling CFI\n"); 1025 return; 1026 1027 case CFI_KCFI: 1028 ret = cfi_enable_callers(start_retpoline, end_retpoline); 1029 if (ret) 1030 goto err; 1031 1032 if (builtin) 1033 pr_info("Using kCFI\n"); 1034 return; 1035 1036 case CFI_FINEIBT: 1037 ret = cfi_rewrite_preamble(start_cfi, end_cfi); 1038 if (ret) 1039 goto err; 1040 1041 ret = cfi_rewrite_callers(start_retpoline, end_retpoline); 1042 if (ret) 1043 goto err; 1044 1045 if (builtin) 1046 pr_info("Using FineIBT CFI\n"); 1047 return; 1048 1049 default: 1050 break; 1051 } 1052 1053 err: 1054 pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); 1055 } 1056 1057 #else 1058 1059 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 1060 s32 *start_cfi, s32 *end_cfi, bool builtin) 1061 { 1062 } 1063 1064 #endif 1065 1066 void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 1067 s32 *start_cfi, s32 *end_cfi) 1068 { 1069 return __apply_fineibt(start_retpoline, end_retpoline, 1070 start_cfi, end_cfi, 1071 /* .builtin = */ false); 1072 } 1073 1074 #ifdef CONFIG_SMP 1075 static void alternatives_smp_lock(const s32 *start, const s32 *end, 1076 u8 *text, u8 *text_end) 1077 { 1078 const s32 *poff; 1079 1080 for (poff = start; poff < end; poff++) { 1081 u8 *ptr = (u8 *)poff + *poff; 1082 1083 if (!*poff || ptr < text || ptr >= text_end) 1084 continue; 1085 /* turn DS segment override prefix into lock prefix */ 1086 if (*ptr == 0x3e) 1087 text_poke(ptr, ((unsigned char []){0xf0}), 1); 1088 } 1089 } 1090 1091 static void alternatives_smp_unlock(const s32 *start, const s32 *end, 1092 u8 *text, u8 *text_end) 1093 { 1094 const s32 *poff; 1095 1096 for (poff = start; poff < end; poff++) { 1097 u8 *ptr = (u8 *)poff + *poff; 1098 1099 if (!*poff || ptr < text || ptr >= text_end) 1100 continue; 1101 /* turn lock prefix into DS segment override prefix */ 1102 if (*ptr == 0xf0) 1103 text_poke(ptr, ((unsigned char []){0x3E}), 1); 1104 } 1105 } 1106 1107 struct smp_alt_module { 1108 /* what is this ??? */ 1109 struct module *mod; 1110 char *name; 1111 1112 /* ptrs to lock prefixes */ 1113 const s32 *locks; 1114 const s32 *locks_end; 1115 1116 /* .text segment, needed to avoid patching init code ;) */ 1117 u8 *text; 1118 u8 *text_end; 1119 1120 struct list_head next; 1121 }; 1122 static LIST_HEAD(smp_alt_modules); 1123 static bool uniproc_patched = false; /* protected by text_mutex */ 1124 1125 void __init_or_module alternatives_smp_module_add(struct module *mod, 1126 char *name, 1127 void *locks, void *locks_end, 1128 void *text, void *text_end) 1129 { 1130 struct smp_alt_module *smp; 1131 1132 mutex_lock(&text_mutex); 1133 if (!uniproc_patched) 1134 goto unlock; 1135 1136 if (num_possible_cpus() == 1) 1137 /* Don't bother remembering, we'll never have to undo it. */ 1138 goto smp_unlock; 1139 1140 smp = kzalloc(sizeof(*smp), GFP_KERNEL); 1141 if (NULL == smp) 1142 /* we'll run the (safe but slow) SMP code then ... */ 1143 goto unlock; 1144 1145 smp->mod = mod; 1146 smp->name = name; 1147 smp->locks = locks; 1148 smp->locks_end = locks_end; 1149 smp->text = text; 1150 smp->text_end = text_end; 1151 DPRINTK("locks %p -> %p, text %p -> %p, name %s\n", 1152 smp->locks, smp->locks_end, 1153 smp->text, smp->text_end, smp->name); 1154 1155 list_add_tail(&smp->next, &smp_alt_modules); 1156 smp_unlock: 1157 alternatives_smp_unlock(locks, locks_end, text, text_end); 1158 unlock: 1159 mutex_unlock(&text_mutex); 1160 } 1161 1162 void __init_or_module alternatives_smp_module_del(struct module *mod) 1163 { 1164 struct smp_alt_module *item; 1165 1166 mutex_lock(&text_mutex); 1167 list_for_each_entry(item, &smp_alt_modules, next) { 1168 if (mod != item->mod) 1169 continue; 1170 list_del(&item->next); 1171 kfree(item); 1172 break; 1173 } 1174 mutex_unlock(&text_mutex); 1175 } 1176 1177 void alternatives_enable_smp(void) 1178 { 1179 struct smp_alt_module *mod; 1180 1181 /* Why bother if there are no other CPUs? */ 1182 BUG_ON(num_possible_cpus() == 1); 1183 1184 mutex_lock(&text_mutex); 1185 1186 if (uniproc_patched) { 1187 pr_info("switching to SMP code\n"); 1188 BUG_ON(num_online_cpus() != 1); 1189 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 1190 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 1191 list_for_each_entry(mod, &smp_alt_modules, next) 1192 alternatives_smp_lock(mod->locks, mod->locks_end, 1193 mod->text, mod->text_end); 1194 uniproc_patched = false; 1195 } 1196 mutex_unlock(&text_mutex); 1197 } 1198 1199 /* 1200 * Return 1 if the address range is reserved for SMP-alternatives. 1201 * Must hold text_mutex. 1202 */ 1203 int alternatives_text_reserved(void *start, void *end) 1204 { 1205 struct smp_alt_module *mod; 1206 const s32 *poff; 1207 u8 *text_start = start; 1208 u8 *text_end = end; 1209 1210 lockdep_assert_held(&text_mutex); 1211 1212 list_for_each_entry(mod, &smp_alt_modules, next) { 1213 if (mod->text > text_end || mod->text_end < text_start) 1214 continue; 1215 for (poff = mod->locks; poff < mod->locks_end; poff++) { 1216 const u8 *ptr = (const u8 *)poff + *poff; 1217 1218 if (text_start <= ptr && text_end > ptr) 1219 return 1; 1220 } 1221 } 1222 1223 return 0; 1224 } 1225 #endif /* CONFIG_SMP */ 1226 1227 #ifdef CONFIG_PARAVIRT 1228 void __init_or_module apply_paravirt(struct paravirt_patch_site *start, 1229 struct paravirt_patch_site *end) 1230 { 1231 struct paravirt_patch_site *p; 1232 char insn_buff[MAX_PATCH_LEN]; 1233 1234 for (p = start; p < end; p++) { 1235 unsigned int used; 1236 1237 BUG_ON(p->len > MAX_PATCH_LEN); 1238 /* prep the buffer with the original instructions */ 1239 memcpy(insn_buff, p->instr, p->len); 1240 used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len); 1241 1242 BUG_ON(used > p->len); 1243 1244 /* Pad the rest with nops */ 1245 add_nops(insn_buff + used, p->len - used); 1246 text_poke_early(p->instr, insn_buff, p->len); 1247 } 1248 } 1249 extern struct paravirt_patch_site __start_parainstructions[], 1250 __stop_parainstructions[]; 1251 #endif /* CONFIG_PARAVIRT */ 1252 1253 /* 1254 * Self-test for the INT3 based CALL emulation code. 1255 * 1256 * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up 1257 * properly and that there is a stack gap between the INT3 frame and the 1258 * previous context. Without this gap doing a virtual PUSH on the interrupted 1259 * stack would corrupt the INT3 IRET frame. 1260 * 1261 * See entry_{32,64}.S for more details. 1262 */ 1263 1264 /* 1265 * We define the int3_magic() function in assembly to control the calling 1266 * convention such that we can 'call' it from assembly. 1267 */ 1268 1269 extern void int3_magic(unsigned int *ptr); /* defined in asm */ 1270 1271 asm ( 1272 " .pushsection .init.text, \"ax\", @progbits\n" 1273 " .type int3_magic, @function\n" 1274 "int3_magic:\n" 1275 ANNOTATE_NOENDBR 1276 " movl $1, (%" _ASM_ARG1 ")\n" 1277 ASM_RET 1278 " .size int3_magic, .-int3_magic\n" 1279 " .popsection\n" 1280 ); 1281 1282 extern void int3_selftest_ip(void); /* defined in asm below */ 1283 1284 static int __init 1285 int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) 1286 { 1287 unsigned long selftest = (unsigned long)&int3_selftest_ip; 1288 struct die_args *args = data; 1289 struct pt_regs *regs = args->regs; 1290 1291 OPTIMIZER_HIDE_VAR(selftest); 1292 1293 if (!regs || user_mode(regs)) 1294 return NOTIFY_DONE; 1295 1296 if (val != DIE_INT3) 1297 return NOTIFY_DONE; 1298 1299 if (regs->ip - INT3_INSN_SIZE != selftest) 1300 return NOTIFY_DONE; 1301 1302 int3_emulate_call(regs, (unsigned long)&int3_magic); 1303 return NOTIFY_STOP; 1304 } 1305 1306 /* Must be noinline to ensure uniqueness of int3_selftest_ip. */ 1307 static noinline void __init int3_selftest(void) 1308 { 1309 static __initdata struct notifier_block int3_exception_nb = { 1310 .notifier_call = int3_exception_notify, 1311 .priority = INT_MAX-1, /* last */ 1312 }; 1313 unsigned int val = 0; 1314 1315 BUG_ON(register_die_notifier(&int3_exception_nb)); 1316 1317 /* 1318 * Basically: int3_magic(&val); but really complicated :-) 1319 * 1320 * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb 1321 * notifier above will emulate CALL for us. 1322 */ 1323 asm volatile ("int3_selftest_ip:\n\t" 1324 ANNOTATE_NOENDBR 1325 " int3; nop; nop; nop; nop\n\t" 1326 : ASM_CALL_CONSTRAINT 1327 : __ASM_SEL_RAW(a, D) (&val) 1328 : "memory"); 1329 1330 BUG_ON(val != 1); 1331 1332 unregister_die_notifier(&int3_exception_nb); 1333 } 1334 1335 void __init alternative_instructions(void) 1336 { 1337 int3_selftest(); 1338 1339 /* 1340 * The patching is not fully atomic, so try to avoid local 1341 * interruptions that might execute the to be patched code. 1342 * Other CPUs are not running. 1343 */ 1344 stop_nmi(); 1345 1346 /* 1347 * Don't stop machine check exceptions while patching. 1348 * MCEs only happen when something got corrupted and in this 1349 * case we must do something about the corruption. 1350 * Ignoring it is worse than an unlikely patching race. 1351 * Also machine checks tend to be broadcast and if one CPU 1352 * goes into machine check the others follow quickly, so we don't 1353 * expect a machine check to cause undue problems during to code 1354 * patching. 1355 */ 1356 1357 /* 1358 * Paravirt patching and alternative patching can be combined to 1359 * replace a function call with a short direct code sequence (e.g. 1360 * by setting a constant return value instead of doing that in an 1361 * external function). 1362 * In order to make this work the following sequence is required: 1363 * 1. set (artificial) features depending on used paravirt 1364 * functions which can later influence alternative patching 1365 * 2. apply paravirt patching (generally replacing an indirect 1366 * function call with a direct one) 1367 * 3. apply alternative patching (e.g. replacing a direct function 1368 * call with a custom code sequence) 1369 * Doing paravirt patching after alternative patching would clobber 1370 * the optimization of the custom code with a function call again. 1371 */ 1372 paravirt_set_cap(); 1373 1374 /* 1375 * First patch paravirt functions, such that we overwrite the indirect 1376 * call with the direct call. 1377 */ 1378 apply_paravirt(__parainstructions, __parainstructions_end); 1379 1380 __apply_fineibt(__retpoline_sites, __retpoline_sites_end, 1381 __cfi_sites, __cfi_sites_end, true); 1382 1383 /* 1384 * Rewrite the retpolines, must be done before alternatives since 1385 * those can rewrite the retpoline thunks. 1386 */ 1387 apply_retpolines(__retpoline_sites, __retpoline_sites_end); 1388 apply_returns(__return_sites, __return_sites_end); 1389 1390 /* 1391 * Then patch alternatives, such that those paravirt calls that are in 1392 * alternatives can be overwritten by their immediate fragments. 1393 */ 1394 apply_alternatives(__alt_instructions, __alt_instructions_end); 1395 1396 /* 1397 * Now all calls are established. Apply the call thunks if 1398 * required. 1399 */ 1400 callthunks_patch_builtin_calls(); 1401 1402 apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); 1403 1404 #ifdef CONFIG_SMP 1405 /* Patch to UP if other cpus not imminent. */ 1406 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { 1407 uniproc_patched = true; 1408 alternatives_smp_module_add(NULL, "core kernel", 1409 __smp_locks, __smp_locks_end, 1410 _text, _etext); 1411 } 1412 1413 if (!uniproc_patched || num_possible_cpus() == 1) { 1414 free_init_pages("SMP alternatives", 1415 (unsigned long)__smp_locks, 1416 (unsigned long)__smp_locks_end); 1417 } 1418 #endif 1419 1420 restart_nmi(); 1421 alternatives_patched = 1; 1422 } 1423 1424 /** 1425 * text_poke_early - Update instructions on a live kernel at boot time 1426 * @addr: address to modify 1427 * @opcode: source of the copy 1428 * @len: length to copy 1429 * 1430 * When you use this code to patch more than one byte of an instruction 1431 * you need to make sure that other CPUs cannot execute this code in parallel. 1432 * Also no thread must be currently preempted in the middle of these 1433 * instructions. And on the local CPU you need to be protected against NMI or 1434 * MCE handlers seeing an inconsistent instruction while you patch. 1435 */ 1436 void __init_or_module text_poke_early(void *addr, const void *opcode, 1437 size_t len) 1438 { 1439 unsigned long flags; 1440 1441 if (boot_cpu_has(X86_FEATURE_NX) && 1442 is_module_text_address((unsigned long)addr)) { 1443 /* 1444 * Modules text is marked initially as non-executable, so the 1445 * code cannot be running and speculative code-fetches are 1446 * prevented. Just change the code. 1447 */ 1448 memcpy(addr, opcode, len); 1449 } else { 1450 local_irq_save(flags); 1451 memcpy(addr, opcode, len); 1452 local_irq_restore(flags); 1453 sync_core(); 1454 1455 /* 1456 * Could also do a CLFLUSH here to speed up CPU recovery; but 1457 * that causes hangs on some VIA CPUs. 1458 */ 1459 } 1460 } 1461 1462 typedef struct { 1463 struct mm_struct *mm; 1464 } temp_mm_state_t; 1465 1466 /* 1467 * Using a temporary mm allows to set temporary mappings that are not accessible 1468 * by other CPUs. Such mappings are needed to perform sensitive memory writes 1469 * that override the kernel memory protections (e.g., W^X), without exposing the 1470 * temporary page-table mappings that are required for these write operations to 1471 * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the 1472 * mapping is torn down. 1473 * 1474 * Context: The temporary mm needs to be used exclusively by a single core. To 1475 * harden security IRQs must be disabled while the temporary mm is 1476 * loaded, thereby preventing interrupt handler bugs from overriding 1477 * the kernel memory protection. 1478 */ 1479 static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) 1480 { 1481 temp_mm_state_t temp_state; 1482 1483 lockdep_assert_irqs_disabled(); 1484 1485 /* 1486 * Make sure not to be in TLB lazy mode, as otherwise we'll end up 1487 * with a stale address space WITHOUT being in lazy mode after 1488 * restoring the previous mm. 1489 */ 1490 if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) 1491 leave_mm(smp_processor_id()); 1492 1493 temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); 1494 switch_mm_irqs_off(NULL, mm, current); 1495 1496 /* 1497 * If breakpoints are enabled, disable them while the temporary mm is 1498 * used. Userspace might set up watchpoints on addresses that are used 1499 * in the temporary mm, which would lead to wrong signals being sent or 1500 * crashes. 1501 * 1502 * Note that breakpoints are not disabled selectively, which also causes 1503 * kernel breakpoints (e.g., perf's) to be disabled. This might be 1504 * undesirable, but still seems reasonable as the code that runs in the 1505 * temporary mm should be short. 1506 */ 1507 if (hw_breakpoint_active()) 1508 hw_breakpoint_disable(); 1509 1510 return temp_state; 1511 } 1512 1513 static inline void unuse_temporary_mm(temp_mm_state_t prev_state) 1514 { 1515 lockdep_assert_irqs_disabled(); 1516 switch_mm_irqs_off(NULL, prev_state.mm, current); 1517 1518 /* 1519 * Restore the breakpoints if they were disabled before the temporary mm 1520 * was loaded. 1521 */ 1522 if (hw_breakpoint_active()) 1523 hw_breakpoint_restore(); 1524 } 1525 1526 __ro_after_init struct mm_struct *poking_mm; 1527 __ro_after_init unsigned long poking_addr; 1528 1529 static void text_poke_memcpy(void *dst, const void *src, size_t len) 1530 { 1531 memcpy(dst, src, len); 1532 } 1533 1534 static void text_poke_memset(void *dst, const void *src, size_t len) 1535 { 1536 int c = *(const int *)src; 1537 1538 memset(dst, c, len); 1539 } 1540 1541 typedef void text_poke_f(void *dst, const void *src, size_t len); 1542 1543 static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len) 1544 { 1545 bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; 1546 struct page *pages[2] = {NULL}; 1547 temp_mm_state_t prev; 1548 unsigned long flags; 1549 pte_t pte, *ptep; 1550 spinlock_t *ptl; 1551 pgprot_t pgprot; 1552 1553 /* 1554 * While boot memory allocator is running we cannot use struct pages as 1555 * they are not yet initialized. There is no way to recover. 1556 */ 1557 BUG_ON(!after_bootmem); 1558 1559 if (!core_kernel_text((unsigned long)addr)) { 1560 pages[0] = vmalloc_to_page(addr); 1561 if (cross_page_boundary) 1562 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 1563 } else { 1564 pages[0] = virt_to_page(addr); 1565 WARN_ON(!PageReserved(pages[0])); 1566 if (cross_page_boundary) 1567 pages[1] = virt_to_page(addr + PAGE_SIZE); 1568 } 1569 /* 1570 * If something went wrong, crash and burn since recovery paths are not 1571 * implemented. 1572 */ 1573 BUG_ON(!pages[0] || (cross_page_boundary && !pages[1])); 1574 1575 /* 1576 * Map the page without the global bit, as TLB flushing is done with 1577 * flush_tlb_mm_range(), which is intended for non-global PTEs. 1578 */ 1579 pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL); 1580 1581 /* 1582 * The lock is not really needed, but this allows to avoid open-coding. 1583 */ 1584 ptep = get_locked_pte(poking_mm, poking_addr, &ptl); 1585 1586 /* 1587 * This must not fail; preallocated in poking_init(). 1588 */ 1589 VM_BUG_ON(!ptep); 1590 1591 local_irq_save(flags); 1592 1593 pte = mk_pte(pages[0], pgprot); 1594 set_pte_at(poking_mm, poking_addr, ptep, pte); 1595 1596 if (cross_page_boundary) { 1597 pte = mk_pte(pages[1], pgprot); 1598 set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte); 1599 } 1600 1601 /* 1602 * Loading the temporary mm behaves as a compiler barrier, which 1603 * guarantees that the PTE will be set at the time memcpy() is done. 1604 */ 1605 prev = use_temporary_mm(poking_mm); 1606 1607 kasan_disable_current(); 1608 func((u8 *)poking_addr + offset_in_page(addr), src, len); 1609 kasan_enable_current(); 1610 1611 /* 1612 * Ensure that the PTE is only cleared after the instructions of memcpy 1613 * were issued by using a compiler barrier. 1614 */ 1615 barrier(); 1616 1617 pte_clear(poking_mm, poking_addr, ptep); 1618 if (cross_page_boundary) 1619 pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1); 1620 1621 /* 1622 * Loading the previous page-table hierarchy requires a serializing 1623 * instruction that already allows the core to see the updated version. 1624 * Xen-PV is assumed to serialize execution in a similar manner. 1625 */ 1626 unuse_temporary_mm(prev); 1627 1628 /* 1629 * Flushing the TLB might involve IPIs, which would require enabled 1630 * IRQs, but not if the mm is not used, as it is in this point. 1631 */ 1632 flush_tlb_mm_range(poking_mm, poking_addr, poking_addr + 1633 (cross_page_boundary ? 2 : 1) * PAGE_SIZE, 1634 PAGE_SHIFT, false); 1635 1636 if (func == text_poke_memcpy) { 1637 /* 1638 * If the text does not match what we just wrote then something is 1639 * fundamentally screwy; there's nothing we can really do about that. 1640 */ 1641 BUG_ON(memcmp(addr, src, len)); 1642 } 1643 1644 local_irq_restore(flags); 1645 pte_unmap_unlock(ptep, ptl); 1646 return addr; 1647 } 1648 1649 /** 1650 * text_poke - Update instructions on a live kernel 1651 * @addr: address to modify 1652 * @opcode: source of the copy 1653 * @len: length to copy 1654 * 1655 * Only atomic text poke/set should be allowed when not doing early patching. 1656 * It means the size must be writable atomically and the address must be aligned 1657 * in a way that permits an atomic write. It also makes sure we fit on a single 1658 * page. 1659 * 1660 * Note that the caller must ensure that if the modified code is part of a 1661 * module, the module would not be removed during poking. This can be achieved 1662 * by registering a module notifier, and ordering module removal and patching 1663 * trough a mutex. 1664 */ 1665 void *text_poke(void *addr, const void *opcode, size_t len) 1666 { 1667 lockdep_assert_held(&text_mutex); 1668 1669 return __text_poke(text_poke_memcpy, addr, opcode, len); 1670 } 1671 1672 /** 1673 * text_poke_kgdb - Update instructions on a live kernel by kgdb 1674 * @addr: address to modify 1675 * @opcode: source of the copy 1676 * @len: length to copy 1677 * 1678 * Only atomic text poke/set should be allowed when not doing early patching. 1679 * It means the size must be writable atomically and the address must be aligned 1680 * in a way that permits an atomic write. It also makes sure we fit on a single 1681 * page. 1682 * 1683 * Context: should only be used by kgdb, which ensures no other core is running, 1684 * despite the fact it does not hold the text_mutex. 1685 */ 1686 void *text_poke_kgdb(void *addr, const void *opcode, size_t len) 1687 { 1688 return __text_poke(text_poke_memcpy, addr, opcode, len); 1689 } 1690 1691 void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, 1692 bool core_ok) 1693 { 1694 unsigned long start = (unsigned long)addr; 1695 size_t patched = 0; 1696 1697 if (WARN_ON_ONCE(!core_ok && core_kernel_text(start))) 1698 return NULL; 1699 1700 while (patched < len) { 1701 unsigned long ptr = start + patched; 1702 size_t s; 1703 1704 s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); 1705 1706 __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); 1707 patched += s; 1708 } 1709 return addr; 1710 } 1711 1712 /** 1713 * text_poke_copy - Copy instructions into (an unused part of) RX memory 1714 * @addr: address to modify 1715 * @opcode: source of the copy 1716 * @len: length to copy, could be more than 2x PAGE_SIZE 1717 * 1718 * Not safe against concurrent execution; useful for JITs to dump 1719 * new code blocks into unused regions of RX memory. Can be used in 1720 * conjunction with synchronize_rcu_tasks() to wait for existing 1721 * execution to quiesce after having made sure no existing functions 1722 * pointers are live. 1723 */ 1724 void *text_poke_copy(void *addr, const void *opcode, size_t len) 1725 { 1726 mutex_lock(&text_mutex); 1727 addr = text_poke_copy_locked(addr, opcode, len, false); 1728 mutex_unlock(&text_mutex); 1729 return addr; 1730 } 1731 1732 /** 1733 * text_poke_set - memset into (an unused part of) RX memory 1734 * @addr: address to modify 1735 * @c: the byte to fill the area with 1736 * @len: length to copy, could be more than 2x PAGE_SIZE 1737 * 1738 * This is useful to overwrite unused regions of RX memory with illegal 1739 * instructions. 1740 */ 1741 void *text_poke_set(void *addr, int c, size_t len) 1742 { 1743 unsigned long start = (unsigned long)addr; 1744 size_t patched = 0; 1745 1746 if (WARN_ON_ONCE(core_kernel_text(start))) 1747 return NULL; 1748 1749 mutex_lock(&text_mutex); 1750 while (patched < len) { 1751 unsigned long ptr = start + patched; 1752 size_t s; 1753 1754 s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); 1755 1756 __text_poke(text_poke_memset, (void *)ptr, (void *)&c, s); 1757 patched += s; 1758 } 1759 mutex_unlock(&text_mutex); 1760 return addr; 1761 } 1762 1763 static void do_sync_core(void *info) 1764 { 1765 sync_core(); 1766 } 1767 1768 void text_poke_sync(void) 1769 { 1770 on_each_cpu(do_sync_core, NULL, 1); 1771 } 1772 1773 /* 1774 * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of 1775 * this thing. When len == 6 everything is prefixed with 0x0f and we map 1776 * opcode to Jcc.d8, using len to distinguish. 1777 */ 1778 struct text_poke_loc { 1779 /* addr := _stext + rel_addr */ 1780 s32 rel_addr; 1781 s32 disp; 1782 u8 len; 1783 u8 opcode; 1784 const u8 text[POKE_MAX_OPCODE_SIZE]; 1785 /* see text_poke_bp_batch() */ 1786 u8 old; 1787 }; 1788 1789 struct bp_patching_desc { 1790 struct text_poke_loc *vec; 1791 int nr_entries; 1792 atomic_t refs; 1793 }; 1794 1795 static struct bp_patching_desc bp_desc; 1796 1797 static __always_inline 1798 struct bp_patching_desc *try_get_desc(void) 1799 { 1800 struct bp_patching_desc *desc = &bp_desc; 1801 1802 if (!arch_atomic_inc_not_zero(&desc->refs)) 1803 return NULL; 1804 1805 return desc; 1806 } 1807 1808 static __always_inline void put_desc(void) 1809 { 1810 struct bp_patching_desc *desc = &bp_desc; 1811 1812 smp_mb__before_atomic(); 1813 arch_atomic_dec(&desc->refs); 1814 } 1815 1816 static __always_inline void *text_poke_addr(struct text_poke_loc *tp) 1817 { 1818 return _stext + tp->rel_addr; 1819 } 1820 1821 static __always_inline int patch_cmp(const void *key, const void *elt) 1822 { 1823 struct text_poke_loc *tp = (struct text_poke_loc *) elt; 1824 1825 if (key < text_poke_addr(tp)) 1826 return -1; 1827 if (key > text_poke_addr(tp)) 1828 return 1; 1829 return 0; 1830 } 1831 1832 noinstr int poke_int3_handler(struct pt_regs *regs) 1833 { 1834 struct bp_patching_desc *desc; 1835 struct text_poke_loc *tp; 1836 int ret = 0; 1837 void *ip; 1838 1839 if (user_mode(regs)) 1840 return 0; 1841 1842 /* 1843 * Having observed our INT3 instruction, we now must observe 1844 * bp_desc with non-zero refcount: 1845 * 1846 * bp_desc.refs = 1 INT3 1847 * WMB RMB 1848 * write INT3 if (bp_desc.refs != 0) 1849 */ 1850 smp_rmb(); 1851 1852 desc = try_get_desc(); 1853 if (!desc) 1854 return 0; 1855 1856 /* 1857 * Discount the INT3. See text_poke_bp_batch(). 1858 */ 1859 ip = (void *) regs->ip - INT3_INSN_SIZE; 1860 1861 /* 1862 * Skip the binary search if there is a single member in the vector. 1863 */ 1864 if (unlikely(desc->nr_entries > 1)) { 1865 tp = __inline_bsearch(ip, desc->vec, desc->nr_entries, 1866 sizeof(struct text_poke_loc), 1867 patch_cmp); 1868 if (!tp) 1869 goto out_put; 1870 } else { 1871 tp = desc->vec; 1872 if (text_poke_addr(tp) != ip) 1873 goto out_put; 1874 } 1875 1876 ip += tp->len; 1877 1878 switch (tp->opcode) { 1879 case INT3_INSN_OPCODE: 1880 /* 1881 * Someone poked an explicit INT3, they'll want to handle it, 1882 * do not consume. 1883 */ 1884 goto out_put; 1885 1886 case RET_INSN_OPCODE: 1887 int3_emulate_ret(regs); 1888 break; 1889 1890 case CALL_INSN_OPCODE: 1891 int3_emulate_call(regs, (long)ip + tp->disp); 1892 break; 1893 1894 case JMP32_INSN_OPCODE: 1895 case JMP8_INSN_OPCODE: 1896 int3_emulate_jmp(regs, (long)ip + tp->disp); 1897 break; 1898 1899 case 0x70 ... 0x7f: /* Jcc */ 1900 int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp); 1901 break; 1902 1903 default: 1904 BUG(); 1905 } 1906 1907 ret = 1; 1908 1909 out_put: 1910 put_desc(); 1911 return ret; 1912 } 1913 1914 #define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) 1915 static struct text_poke_loc tp_vec[TP_VEC_MAX]; 1916 static int tp_vec_nr; 1917 1918 /** 1919 * text_poke_bp_batch() -- update instructions on live kernel on SMP 1920 * @tp: vector of instructions to patch 1921 * @nr_entries: number of entries in the vector 1922 * 1923 * Modify multi-byte instruction by using int3 breakpoint on SMP. 1924 * We completely avoid stop_machine() here, and achieve the 1925 * synchronization using int3 breakpoint. 1926 * 1927 * The way it is done: 1928 * - For each entry in the vector: 1929 * - add a int3 trap to the address that will be patched 1930 * - sync cores 1931 * - For each entry in the vector: 1932 * - update all but the first byte of the patched range 1933 * - sync cores 1934 * - For each entry in the vector: 1935 * - replace the first byte (int3) by the first byte of 1936 * replacing opcode 1937 * - sync cores 1938 */ 1939 static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1940 { 1941 unsigned char int3 = INT3_INSN_OPCODE; 1942 unsigned int i; 1943 int do_sync; 1944 1945 lockdep_assert_held(&text_mutex); 1946 1947 bp_desc.vec = tp; 1948 bp_desc.nr_entries = nr_entries; 1949 1950 /* 1951 * Corresponds to the implicit memory barrier in try_get_desc() to 1952 * ensure reading a non-zero refcount provides up to date bp_desc data. 1953 */ 1954 atomic_set_release(&bp_desc.refs, 1); 1955 1956 /* 1957 * Corresponding read barrier in int3 notifier for making sure the 1958 * nr_entries and handler are correctly ordered wrt. patching. 1959 */ 1960 smp_wmb(); 1961 1962 /* 1963 * First step: add a int3 trap to the address that will be patched. 1964 */ 1965 for (i = 0; i < nr_entries; i++) { 1966 tp[i].old = *(u8 *)text_poke_addr(&tp[i]); 1967 text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); 1968 } 1969 1970 text_poke_sync(); 1971 1972 /* 1973 * Second step: update all but the first byte of the patched range. 1974 */ 1975 for (do_sync = 0, i = 0; i < nr_entries; i++) { 1976 u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, }; 1977 u8 _new[POKE_MAX_OPCODE_SIZE+1]; 1978 const u8 *new = tp[i].text; 1979 int len = tp[i].len; 1980 1981 if (len - INT3_INSN_SIZE > 0) { 1982 memcpy(old + INT3_INSN_SIZE, 1983 text_poke_addr(&tp[i]) + INT3_INSN_SIZE, 1984 len - INT3_INSN_SIZE); 1985 1986 if (len == 6) { 1987 _new[0] = 0x0f; 1988 memcpy(_new + 1, new, 5); 1989 new = _new; 1990 } 1991 1992 text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, 1993 new + INT3_INSN_SIZE, 1994 len - INT3_INSN_SIZE); 1995 1996 do_sync++; 1997 } 1998 1999 /* 2000 * Emit a perf event to record the text poke, primarily to 2001 * support Intel PT decoding which must walk the executable code 2002 * to reconstruct the trace. The flow up to here is: 2003 * - write INT3 byte 2004 * - IPI-SYNC 2005 * - write instruction tail 2006 * At this point the actual control flow will be through the 2007 * INT3 and handler and not hit the old or new instruction. 2008 * Intel PT outputs FUP/TIP packets for the INT3, so the flow 2009 * can still be decoded. Subsequently: 2010 * - emit RECORD_TEXT_POKE with the new instruction 2011 * - IPI-SYNC 2012 * - write first byte 2013 * - IPI-SYNC 2014 * So before the text poke event timestamp, the decoder will see 2015 * either the old instruction flow or FUP/TIP of INT3. After the 2016 * text poke event timestamp, the decoder will see either the 2017 * new instruction flow or FUP/TIP of INT3. Thus decoders can 2018 * use the timestamp as the point at which to modify the 2019 * executable code. 2020 * The old instruction is recorded so that the event can be 2021 * processed forwards or backwards. 2022 */ 2023 perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len); 2024 } 2025 2026 if (do_sync) { 2027 /* 2028 * According to Intel, this core syncing is very likely 2029 * not necessary and we'd be safe even without it. But 2030 * better safe than sorry (plus there's not only Intel). 2031 */ 2032 text_poke_sync(); 2033 } 2034 2035 /* 2036 * Third step: replace the first byte (int3) by the first byte of 2037 * replacing opcode. 2038 */ 2039 for (do_sync = 0, i = 0; i < nr_entries; i++) { 2040 u8 byte = tp[i].text[0]; 2041 2042 if (tp[i].len == 6) 2043 byte = 0x0f; 2044 2045 if (byte == INT3_INSN_OPCODE) 2046 continue; 2047 2048 text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE); 2049 do_sync++; 2050 } 2051 2052 if (do_sync) 2053 text_poke_sync(); 2054 2055 /* 2056 * Remove and wait for refs to be zero. 2057 */ 2058 if (!atomic_dec_and_test(&bp_desc.refs)) 2059 atomic_cond_read_acquire(&bp_desc.refs, !VAL); 2060 } 2061 2062 static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, 2063 const void *opcode, size_t len, const void *emulate) 2064 { 2065 struct insn insn; 2066 int ret, i = 0; 2067 2068 if (len == 6) 2069 i = 1; 2070 memcpy((void *)tp->text, opcode+i, len-i); 2071 if (!emulate) 2072 emulate = opcode; 2073 2074 ret = insn_decode_kernel(&insn, emulate); 2075 BUG_ON(ret < 0); 2076 2077 tp->rel_addr = addr - (void *)_stext; 2078 tp->len = len; 2079 tp->opcode = insn.opcode.bytes[0]; 2080 2081 if (is_jcc32(&insn)) { 2082 /* 2083 * Map Jcc.d32 onto Jcc.d8 and use len to distinguish. 2084 */ 2085 tp->opcode = insn.opcode.bytes[1] - 0x10; 2086 } 2087 2088 switch (tp->opcode) { 2089 case RET_INSN_OPCODE: 2090 case JMP32_INSN_OPCODE: 2091 case JMP8_INSN_OPCODE: 2092 /* 2093 * Control flow instructions without implied execution of the 2094 * next instruction can be padded with INT3. 2095 */ 2096 for (i = insn.length; i < len; i++) 2097 BUG_ON(tp->text[i] != INT3_INSN_OPCODE); 2098 break; 2099 2100 default: 2101 BUG_ON(len != insn.length); 2102 } 2103 2104 switch (tp->opcode) { 2105 case INT3_INSN_OPCODE: 2106 case RET_INSN_OPCODE: 2107 break; 2108 2109 case CALL_INSN_OPCODE: 2110 case JMP32_INSN_OPCODE: 2111 case JMP8_INSN_OPCODE: 2112 case 0x70 ... 0x7f: /* Jcc */ 2113 tp->disp = insn.immediate.value; 2114 break; 2115 2116 default: /* assume NOP */ 2117 switch (len) { 2118 case 2: /* NOP2 -- emulate as JMP8+0 */ 2119 BUG_ON(memcmp(emulate, x86_nops[len], len)); 2120 tp->opcode = JMP8_INSN_OPCODE; 2121 tp->disp = 0; 2122 break; 2123 2124 case 5: /* NOP5 -- emulate as JMP32+0 */ 2125 BUG_ON(memcmp(emulate, x86_nops[len], len)); 2126 tp->opcode = JMP32_INSN_OPCODE; 2127 tp->disp = 0; 2128 break; 2129 2130 default: /* unknown instruction */ 2131 BUG(); 2132 } 2133 break; 2134 } 2135 } 2136 2137 /* 2138 * We hard rely on the tp_vec being ordered; ensure this is so by flushing 2139 * early if needed. 2140 */ 2141 static bool tp_order_fail(void *addr) 2142 { 2143 struct text_poke_loc *tp; 2144 2145 if (!tp_vec_nr) 2146 return false; 2147 2148 if (!addr) /* force */ 2149 return true; 2150 2151 tp = &tp_vec[tp_vec_nr - 1]; 2152 if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) 2153 return true; 2154 2155 return false; 2156 } 2157 2158 static void text_poke_flush(void *addr) 2159 { 2160 if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { 2161 text_poke_bp_batch(tp_vec, tp_vec_nr); 2162 tp_vec_nr = 0; 2163 } 2164 } 2165 2166 void text_poke_finish(void) 2167 { 2168 text_poke_flush(NULL); 2169 } 2170 2171 void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) 2172 { 2173 struct text_poke_loc *tp; 2174 2175 text_poke_flush(addr); 2176 2177 tp = &tp_vec[tp_vec_nr++]; 2178 text_poke_loc_init(tp, addr, opcode, len, emulate); 2179 } 2180 2181 /** 2182 * text_poke_bp() -- update instructions on live kernel on SMP 2183 * @addr: address to patch 2184 * @opcode: opcode of new instruction 2185 * @len: length to copy 2186 * @emulate: instruction to be emulated 2187 * 2188 * Update a single instruction with the vector in the stack, avoiding 2189 * dynamically allocated memory. This function should be used when it is 2190 * not possible to allocate memory. 2191 */ 2192 void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) 2193 { 2194 struct text_poke_loc tp; 2195 2196 text_poke_loc_init(&tp, addr, opcode, len, emulate); 2197 text_poke_bp_batch(&tp, 1); 2198 } 2199