1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16#include <asm/asm-offsets.h> 17 18/* 19 * Must be relocatable PIC code callable as a C function, in particular 20 * there must be a plain RET and not jump to return thunk. 21 */ 22 23#define PTR(x) (x << 3) 24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25 26/* 27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied 28 * into the control page, and the remainder of the page is used as the stack. 29 */ 30 31 .section .data..relocate_kernel,"a"; 32/* Minimal CPU state */ 33SYM_DATA_LOCAL(saved_rsp, .quad 0) 34SYM_DATA_LOCAL(saved_cr0, .quad 0) 35SYM_DATA_LOCAL(saved_cr3, .quad 0) 36SYM_DATA_LOCAL(saved_cr4, .quad 0) 37 /* other data */ 38SYM_DATA(kexec_va_control_page, .quad 0) 39SYM_DATA(kexec_pa_table_page, .quad 0) 40SYM_DATA(kexec_pa_swap_page, .quad 0) 41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) 42SYM_DATA(kexec_debug_8250_mmio32, .quad 0) 43SYM_DATA(kexec_debug_8250_port, .word 0) 44 45 .balign 16 46SYM_DATA_START_LOCAL(kexec_debug_gdt) 47 .word kexec_debug_gdt_end - kexec_debug_gdt - 1 48 .long 0 49 .word 0 50 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 51 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 52 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 53SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) 54 55 .balign 8 56SYM_DATA_START(kexec_debug_idt) 57 .skip 0x100, 0x00 58SYM_DATA_END(kexec_debug_idt) 59 60 .section .text..relocate_kernel,"ax"; 61 .code64 62SYM_CODE_START_NOALIGN(relocate_kernel) 63 UNWIND_HINT_END_OF_STACK 64 ANNOTATE_NOENDBR 65 /* 66 * %rdi indirection_page 67 * %rsi pa_control_page 68 * %rdx start address 69 * %rcx flags: RELOC_KERNEL_* 70 */ 71 72 /* Save the CPU context, used for jumping back */ 73 pushq %rbx 74 pushq %rbp 75 pushq %r12 76 pushq %r13 77 pushq %r14 78 pushq %r15 79 pushf 80 81 /* Invalidate GDT/IDT, zero out flags */ 82 pushq $0 83 pushq $0 84 85 lidt (%rsp) 86 lgdt (%rsp) 87 addq $8, %rsp 88 popfq 89 90 /* Switch to the identity mapped page tables */ 91 movq %cr3, %rax 92 movq kexec_pa_table_page(%rip), %r9 93 movq %r9, %cr3 94 95 /* Leave CR4 in %r13 to enable the right paging mode later. */ 96 movq %cr4, %r13 97 98 /* 99 * Disable global pages immediately to ensure this mapping is RWX. 100 * Disable LASS before jumping to the identity mapped page. 101 */ 102 movq %r13, %r12 103 andq $~(X86_CR4_PGE | X86_CR4_LASS), %r12 104 movq %r12, %cr4 105 106 /* Save %rsp and CRs. */ 107 movq %r13, saved_cr4(%rip) 108 movq %rsp, saved_rsp(%rip) 109 movq %rax, saved_cr3(%rip) 110 movq %cr0, %rax 111 movq %rax, saved_cr0(%rip) 112 113 /* save indirection list for jumping back */ 114 movq %rdi, pa_backup_pages_map(%rip) 115 116 /* Save the flags to %r11 as swap_pages clobbers %rcx. */ 117 movq %rcx, %r11 118 119 /* setup a new stack at the end of the physical control page */ 120 lea PAGE_SIZE(%rsi), %rsp 121 122 /* jump to identity mapped page */ 1230: addq $identity_mapped - 0b, %rsi 124 subq $__relocate_kernel_start - 0b, %rsi 125 ANNOTATE_RETPOLINE_SAFE 126 jmp *%rsi 127SYM_CODE_END(relocate_kernel) 128 129SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 130 UNWIND_HINT_END_OF_STACK 131 /* 132 * %rdi indirection page 133 * %rdx start address 134 * %r9 page table page 135 * %r11 flags: RELOC_KERNEL_* 136 * %r13 original CR4 when relocate_kernel() was invoked 137 */ 138 139 /* store the start address on the stack */ 140 pushq %rdx 141 142 /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ 143 leaq kexec_debug_gdt(%rip), %rax 144 pushq %rax 145 pushw (%rax) 146 147 /* Load the GDT, put the stack back */ 148 lgdt (%rsp) 149 addq $10, %rsp 150 151 /* Test that we can load segments */ 152 movq %ds, %rax 153 movq %rax, %ds 154 155 /* Now an IDTR on the stack to load the IDT the kernel created */ 156 leaq kexec_debug_idt(%rip), %rsi 157 pushq %rsi 158 pushw $0xff 159 lidt (%rsp) 160 addq $10, %rsp 161 162 //int3 163 164 /* 165 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 166 * below. 167 */ 168 movq %cr4, %rax 169 andq $~(X86_CR4_CET), %rax 170 movq %rax, %cr4 171 172 /* 173 * Set cr0 to a known state: 174 * - Paging enabled 175 * - Alignment check disabled 176 * - Write protect disabled 177 * - No task switch 178 * - Don't do FP software emulation. 179 * - Protected mode enabled 180 */ 181 movq %cr0, %rax 182 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 183 orl $(X86_CR0_PG | X86_CR0_PE), %eax 184 movq %rax, %cr0 185 186 /* 187 * Set cr4 to a known state: 188 * - physical address extension enabled 189 * - 5-level paging, if it was enabled before 190 * - Machine check exception on TDX guest, if it was enabled before. 191 * Clearing MCE might not be allowed in TDX guests, depending on setup. 192 * 193 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 194 * PAE is always set in the original CR4. 195 */ 196 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 197 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 198 movq %r13, %cr4 199 200 /* Flush the TLB (needed?) */ 201 movq %r9, %cr3 202 203 /* 204 * If the memory cache is in incoherent state, e.g., due to 205 * memory encryption, do WBINVD to flush cache. 206 * 207 * If SME is active, there could be old encrypted cache line 208 * entries that will conflict with the now unencrypted memory 209 * used by kexec. Flush the caches before copying the kernel. 210 * 211 * Note SME sets this flag to true when the platform supports 212 * SME, so the WBINVD is performed even SME is not activated 213 * by the kernel. But this has no harm. 214 */ 215 testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b 216 jz .Lnowbinvd 217 wbinvd 218.Lnowbinvd: 219 220 call swap_pages 221 222 /* 223 * To be certain of avoiding problems with self-modifying code 224 * I need to execute a serializing instruction here. 225 * So I flush the TLB by reloading %cr3 here, it's handy, 226 * and not processor dependent. 227 */ 228 movq %cr3, %rax 229 movq %rax, %cr3 230 231 testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b 232 jnz .Lrelocate 233 234 /* 235 * set all of the registers to known values 236 * leave %rsp alone 237 */ 238 239 xorl %eax, %eax 240 xorl %ebx, %ebx 241 xorl %ecx, %ecx 242 xorl %edx, %edx 243 xorl %esi, %esi 244 xorl %edi, %edi 245 xorl %ebp, %ebp 246 xorl %r8d, %r8d 247 xorl %r9d, %r9d 248 xorl %r10d, %r10d 249 xorl %r11d, %r11d 250 xorl %r12d, %r12d 251 xorl %r13d, %r13d 252 xorl %r14d, %r14d 253 xorl %r15d, %r15d 254 255 ANNOTATE_UNRET_SAFE 256 ret 257 int3 258 259.Lrelocate: 260 popq %rdx 261 262 /* Use the swap page for the callee's stack */ 263 movq kexec_pa_swap_page(%rip), %r10 264 leaq PAGE_SIZE(%r10), %rsp 265 266 /* push the existing entry point onto the callee's stack */ 267 pushq %rdx 268 269 ANNOTATE_RETPOLINE_SAFE 270 call *%rdx 271 272 /* get the re-entry point of the peer system */ 273 popq %rbp 274 movq kexec_pa_swap_page(%rip), %r10 275 movq pa_backup_pages_map(%rip), %rdi 276 movq kexec_pa_table_page(%rip), %rax 277 movq %rax, %cr3 278 279 /* Find start (and end) of this physical mapping of control page */ 280 leaq (%rip), %r8 281 ANNOTATE_NOENDBR 282 andq $PAGE_MASK, %r8 283 lea PAGE_SIZE(%r8), %rsp 284 /* 285 * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so that 286 * swap_pages() can swap pages correctly. Note all other 287 * RELOC_KERNEL_* flags passed to relocate_kernel() are not 288 * restored. 289 */ 290 movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d 291 call swap_pages 292 movq kexec_va_control_page(%rip), %rax 2930: addq $virtual_mapped - 0b, %rax 294 subq $__relocate_kernel_start - 0b, %rax 295 pushq %rax 296 ANNOTATE_UNRET_SAFE 297 ret 298 int3 299SYM_CODE_END(identity_mapped) 300 301SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 302 UNWIND_HINT_END_OF_STACK 303 ANNOTATE_NOENDBR // RET target, above 304 movq saved_rsp(%rip), %rsp 305 movq saved_cr4(%rip), %rax 306 movq %rax, %cr4 307 movq saved_cr3(%rip), %rax 308 movq saved_cr0(%rip), %r8 309 movq %rax, %cr3 310 movq %r8, %cr0 311 312#ifdef CONFIG_KEXEC_JUMP 313 /* Saved in save_processor_state. */ 314 movq $saved_context, %rax 315 lgdt saved_context_gdt_desc(%rax) 316#endif 317 318 /* relocate_kernel() returns the re-entry point for next time */ 319 movq %rbp, %rax 320 321 popf 322 popq %r15 323 popq %r14 324 popq %r13 325 popq %r12 326 popq %rbp 327 popq %rbx 328 ANNOTATE_UNRET_SAFE 329 ret 330 int3 331SYM_CODE_END(virtual_mapped) 332 333 /* Do the copies */ 334SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 335 UNWIND_HINT_END_OF_STACK 336 /* 337 * %rdi indirection page 338 * %r11 flags: RELOC_KERNEL_* 339 */ 340 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 341 xorl %edi, %edi 342 xorl %esi, %esi 343 jmp .Lstart /* Should start with an indirection record */ 344 345.Lloop: /* top, read another word for the indirection page */ 346 347 movq (%rbx), %rcx 348 addq $8, %rbx 349.Lstart: 350 testb $0x1, %cl /* is it a destination page? */ 351 jz .Lnotdest 352 movq %rcx, %rdi 353 andq $0xfffffffffffff000, %rdi 354 jmp .Lloop 355.Lnotdest: 356 testb $0x2, %cl /* is it an indirection page? */ 357 jz .Lnotind 358 movq %rcx, %rbx 359 andq $0xfffffffffffff000, %rbx 360 jmp .Lloop 361.Lnotind: 362 testb $0x4, %cl /* is it the done indicator? */ 363 jz .Lnotdone 364 jmp .Ldone 365.Lnotdone: 366 testb $0x8, %cl /* is it the source indicator? */ 367 jz .Lloop /* Ignore it otherwise */ 368 movq %rcx, %rsi /* For ever source page do a copy */ 369 andq $0xfffffffffffff000, %rsi 370 371 movq %rdi, %rdx /* Save destination page to %rdx */ 372 movq %rsi, %rax /* Save source page to %rax */ 373 374 /* Only actually swap for ::preserve_context */ 375 testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b 376 jz .Lnoswap 377 378 /* copy source page to swap page */ 379 movq kexec_pa_swap_page(%rip), %rdi 380 movl $512, %ecx 381 rep movsq 382 383 /* copy destination page to source page */ 384 movq %rax, %rdi 385 movq %rdx, %rsi 386 movl $512, %ecx 387 rep movsq 388 389 /* copy swap page to destination page */ 390 movq %rdx, %rdi 391 movq kexec_pa_swap_page(%rip), %rsi 392.Lnoswap: 393 movl $512, %ecx 394 rep movsq 395 396 lea PAGE_SIZE(%rax), %rsi 397 jmp .Lloop 398.Ldone: 399 ANNOTATE_UNRET_SAFE 400 ret 401 int3 402SYM_CODE_END(swap_pages) 403 404/* 405 * Generic 'print character' routine 406 * - %al: Character to be printed (may clobber %rax) 407 * - %rdx: MMIO address or port. 408 */ 409#define XMTRDY 0x20 410 411#define TXR 0 /* Transmit register (WRITE) */ 412#define LSR 5 /* Line Status */ 413 414SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250) 415 UNWIND_HINT_FUNC 416 ANNOTATE_NOENDBR 417 addw $LSR, %dx 418 xchg %al, %ah 419.Lxmtrdy_loop: 420 inb %dx, %al 421 testb $XMTRDY, %al 422 jnz .Lready 423 pause 424 jmp .Lxmtrdy_loop 425 426.Lready: 427 subw $LSR, %dx 428 xchg %al, %ah 429 outb %al, %dx 430pr_char_null: 431 ANNOTATE_NOENDBR 432 433 ANNOTATE_UNRET_SAFE 434 ret 435SYM_CODE_END(pr_char_8250) 436 437SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32) 438 UNWIND_HINT_FUNC 439 ANNOTATE_NOENDBR 440.Lxmtrdy_loop_mmio: 441 movb (LSR*4)(%rdx), %ah 442 testb $XMTRDY, %ah 443 jnz .Lready_mmio 444 pause 445 jmp .Lxmtrdy_loop_mmio 446 447.Lready_mmio: 448 movb %al, (%rdx) 449 ANNOTATE_UNRET_SAFE 450 ret 451SYM_CODE_END(pr_char_8250_mmio32) 452 453/* 454 * Load pr_char function pointer into %rsi and load %rdx with whatever 455 * that function wants to see there (typically port/MMIO address). 456 */ 457.macro pr_setup 458 leaq pr_char_8250(%rip), %rsi 459 movw kexec_debug_8250_port(%rip), %dx 460 testw %dx, %dx 461 jnz 1f 462 463 leaq pr_char_8250_mmio32(%rip), %rsi 464 movq kexec_debug_8250_mmio32(%rip), %rdx 465 testq %rdx, %rdx 466 jnz 1f 467 468 leaq pr_char_null(%rip), %rsi 4691: 470.endm 471 472/* Print the nybble in %bl, clobber %rax */ 473SYM_CODE_START_LOCAL_NOALIGN(pr_nybble) 474 UNWIND_HINT_FUNC 475 movb %bl, %al 476 nop 477 andb $0x0f, %al 478 addb $0x30, %al 479 cmpb $0x3a, %al 480 jb 1f 481 addb $('a' - '0' - 10), %al 482 ANNOTATE_RETPOLINE_SAFE 4831: jmp *%rsi 484SYM_CODE_END(pr_nybble) 485 486SYM_CODE_START_LOCAL_NOALIGN(pr_qword) 487 UNWIND_HINT_FUNC 488 movq $16, %rcx 4891: rolq $4, %rbx 490 call pr_nybble 491 loop 1b 492 movb $'\n', %al 493 ANNOTATE_RETPOLINE_SAFE 494 jmp *%rsi 495SYM_CODE_END(pr_qword) 496 497.macro print_reg a, b, c, d, r 498 movb $\a, %al 499 ANNOTATE_RETPOLINE_SAFE 500 call *%rsi 501 movb $\b, %al 502 ANNOTATE_RETPOLINE_SAFE 503 call *%rsi 504 movb $\c, %al 505 ANNOTATE_RETPOLINE_SAFE 506 call *%rsi 507 movb $\d, %al 508 ANNOTATE_RETPOLINE_SAFE 509 call *%rsi 510 movq \r, %rbx 511 call pr_qword 512.endm 513 514SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors) 515 /* Each of these is 6 bytes. */ 516.macro vec_err exc 517 UNWIND_HINT_ENTRY 518 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) 519 nop 520 nop 521 pushq $\exc 522 jmp exc_handler 523.endm 524 525.macro vec_noerr exc 526 UNWIND_HINT_ENTRY 527 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) 528 pushq $0 529 pushq $\exc 530 jmp exc_handler 531.endm 532 533 ANNOTATE_NOENDBR 534 vec_noerr 0 // #DE 535 vec_noerr 1 // #DB 536 vec_noerr 2 // #NMI 537 vec_noerr 3 // #BP 538 vec_noerr 4 // #OF 539 vec_noerr 5 // #BR 540 vec_noerr 6 // #UD 541 vec_noerr 7 // #NM 542 vec_err 8 // #DF 543 vec_noerr 9 544 vec_err 10 // #TS 545 vec_err 11 // #NP 546 vec_err 12 // #SS 547 vec_err 13 // #GP 548 vec_err 14 // #PF 549 vec_noerr 15 550SYM_CODE_END(kexec_debug_exc_vectors) 551 552SYM_CODE_START_LOCAL_NOALIGN(exc_handler) 553 /* No need for RET mitigations during kexec */ 554 VALIDATE_UNRET_END 555 556 pushq %rax 557 pushq %rbx 558 pushq %rcx 559 pushq %rdx 560 pushq %rsi 561 562 /* Stack frame */ 563#define EXC_SS 0x58 /* Architectural... */ 564#define EXC_RSP 0x50 565#define EXC_EFLAGS 0x48 566#define EXC_CS 0x40 567#define EXC_RIP 0x38 568#define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */ 569#define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */ 570#define EXC_RAX 0x20 /* Pushed just above in exc_handler */ 571#define EXC_RBX 0x18 572#define EXC_RCX 0x10 573#define EXC_RDX 0x08 574#define EXC_RSI 0x00 575 576 /* Set up %rdx/%rsi for debug output */ 577 pr_setup 578 579 /* rip and exception info */ 580 print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp) 581 print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp) 582 print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp) 583 print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp) 584 585 /* We spilled these to the stack */ 586 print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp) 587 print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp) 588 print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp) 589 print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp) 590 print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp) 591 592 /* Other registers untouched */ 593 print_reg 'r', 'd', 'i', ':', %rdi 594 print_reg 'r', '8', ' ', ':', %r8 595 print_reg 'r', '9', ' ', ':', %r9 596 print_reg 'r', '1', '0', ':', %r10 597 print_reg 'r', '1', '1', ':', %r11 598 print_reg 'r', '1', '2', ':', %r12 599 print_reg 'r', '1', '3', ':', %r13 600 print_reg 'r', '1', '4', ':', %r14 601 print_reg 'r', '1', '5', ':', %r15 602 print_reg 'c', 'r', '2', ':', %cr2 603 604 /* Only return from INT3 */ 605 cmpq $3, EXC_EXCEPTION(%rsp) 606 jne .Ldie 607 608 popq %rsi 609 popq %rdx 610 popq %rcx 611 popq %rbx 612 popq %rax 613 614 addq $16, %rsp 615 iretq 616 617.Ldie: 618 hlt 619 jmp .Ldie 620 621SYM_CODE_END(exc_handler) 622