1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16#include <asm/asm-offsets.h> 17 18/* 19 * Must be relocatable PIC code callable as a C function, in particular 20 * there must be a plain RET and not jump to return thunk. 21 */ 22 23#define PTR(x) (x << 3) 24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25 26/* 27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied 28 * into the control page, and the remainder of the page is used as the stack. 29 */ 30 31 .section .data..relocate_kernel,"a"; 32/* Minimal CPU state */ 33SYM_DATA_LOCAL(saved_rsp, .quad 0) 34SYM_DATA_LOCAL(saved_cr0, .quad 0) 35SYM_DATA_LOCAL(saved_cr3, .quad 0) 36SYM_DATA_LOCAL(saved_cr4, .quad 0) 37 /* other data */ 38SYM_DATA(kexec_va_control_page, .quad 0) 39SYM_DATA(kexec_pa_table_page, .quad 0) 40SYM_DATA(kexec_pa_swap_page, .quad 0) 41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) 42SYM_DATA(kexec_debug_8250_mmio32, .quad 0) 43SYM_DATA(kexec_debug_8250_port, .word 0) 44 45 .balign 16 46SYM_DATA_START_LOCAL(kexec_debug_gdt) 47 .word kexec_debug_gdt_end - kexec_debug_gdt - 1 48 .long 0 49 .word 0 50 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 51 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 52 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 53SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) 54 55 .balign 8 56SYM_DATA_START(kexec_debug_idt) 57 .skip 0x100, 0x00 58SYM_DATA_END(kexec_debug_idt) 59 60 .section .text..relocate_kernel,"ax"; 61 .code64 62SYM_CODE_START_NOALIGN(relocate_kernel) 63 UNWIND_HINT_END_OF_STACK 64 ANNOTATE_NOENDBR 65 /* 66 * %rdi indirection_page 67 * %rsi pa_control_page 68 * %rdx start address 69 * %rcx flags: RELOC_KERNEL_* 70 */ 71 72 /* Save the CPU context, used for jumping back */ 73 pushq %rbx 74 pushq %rbp 75 pushq %r12 76 pushq %r13 77 pushq %r14 78 pushq %r15 79 pushf 80 81 /* Invalidate GDT/IDT, zero out flags */ 82 pushq $0 83 pushq $0 84 85 lidt (%rsp) 86 lgdt (%rsp) 87 addq $8, %rsp 88 popfq 89 90 /* Switch to the identity mapped page tables */ 91 movq %cr3, %rax 92 movq kexec_pa_table_page(%rip), %r9 93 movq %r9, %cr3 94 95 /* Leave CR4 in %r13 to enable the right paging mode later. */ 96 movq %cr4, %r13 97 98 /* Disable global pages immediately to ensure this mapping is RWX */ 99 movq %r13, %r12 100 andq $~(X86_CR4_PGE), %r12 101 movq %r12, %cr4 102 103 /* Save %rsp and CRs. */ 104 movq %r13, saved_cr4(%rip) 105 movq %rsp, saved_rsp(%rip) 106 movq %rax, saved_cr3(%rip) 107 movq %cr0, %rax 108 movq %rax, saved_cr0(%rip) 109 110 /* save indirection list for jumping back */ 111 movq %rdi, pa_backup_pages_map(%rip) 112 113 /* Save the flags to %r11 as swap_pages clobbers %rcx. */ 114 movq %rcx, %r11 115 116 /* setup a new stack at the end of the physical control page */ 117 lea PAGE_SIZE(%rsi), %rsp 118 119 /* jump to identity mapped page */ 1200: addq $identity_mapped - 0b, %rsi 121 subq $__relocate_kernel_start - 0b, %rsi 122 ANNOTATE_RETPOLINE_SAFE 123 jmp *%rsi 124SYM_CODE_END(relocate_kernel) 125 126SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 127 UNWIND_HINT_END_OF_STACK 128 /* 129 * %rdi indirection page 130 * %rdx start address 131 * %r9 page table page 132 * %r11 flags: RELOC_KERNEL_* 133 * %r13 original CR4 when relocate_kernel() was invoked 134 */ 135 136 /* store the start address on the stack */ 137 pushq %rdx 138 139 /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ 140 leaq kexec_debug_gdt(%rip), %rax 141 pushq %rax 142 pushw (%rax) 143 144 /* Load the GDT, put the stack back */ 145 lgdt (%rsp) 146 addq $10, %rsp 147 148 /* Test that we can load segments */ 149 movq %ds, %rax 150 movq %rax, %ds 151 152 /* Now an IDTR on the stack to load the IDT the kernel created */ 153 leaq kexec_debug_idt(%rip), %rsi 154 pushq %rsi 155 pushw $0xff 156 lidt (%rsp) 157 addq $10, %rsp 158 159 //int3 160 161 /* 162 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 163 * below. 164 */ 165 movq %cr4, %rax 166 andq $~(X86_CR4_CET), %rax 167 movq %rax, %cr4 168 169 /* 170 * Set cr0 to a known state: 171 * - Paging enabled 172 * - Alignment check disabled 173 * - Write protect disabled 174 * - No task switch 175 * - Don't do FP software emulation. 176 * - Protected mode enabled 177 */ 178 movq %cr0, %rax 179 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 180 orl $(X86_CR0_PG | X86_CR0_PE), %eax 181 movq %rax, %cr0 182 183 /* 184 * Set cr4 to a known state: 185 * - physical address extension enabled 186 * - 5-level paging, if it was enabled before 187 * - Machine check exception on TDX guest, if it was enabled before. 188 * Clearing MCE might not be allowed in TDX guests, depending on setup. 189 * 190 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 191 * PAE is always set in the original CR4. 192 */ 193 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 194 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 195 movq %r13, %cr4 196 197 /* Flush the TLB (needed?) */ 198 movq %r9, %cr3 199 200 /* 201 * If the memory cache is in incoherent state, e.g., due to 202 * memory encryption, do WBINVD to flush cache. 203 * 204 * If SME is active, there could be old encrypted cache line 205 * entries that will conflict with the now unencrypted memory 206 * used by kexec. Flush the caches before copying the kernel. 207 * 208 * Note SME sets this flag to true when the platform supports 209 * SME, so the WBINVD is performed even SME is not activated 210 * by the kernel. But this has no harm. 211 */ 212 testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b 213 jz .Lnowbinvd 214 wbinvd 215.Lnowbinvd: 216 217 call swap_pages 218 219 /* 220 * To be certain of avoiding problems with self-modifying code 221 * I need to execute a serializing instruction here. 222 * So I flush the TLB by reloading %cr3 here, it's handy, 223 * and not processor dependent. 224 */ 225 movq %cr3, %rax 226 movq %rax, %cr3 227 228 testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b 229 jnz .Lrelocate 230 231 /* 232 * set all of the registers to known values 233 * leave %rsp alone 234 */ 235 236 xorl %eax, %eax 237 xorl %ebx, %ebx 238 xorl %ecx, %ecx 239 xorl %edx, %edx 240 xorl %esi, %esi 241 xorl %edi, %edi 242 xorl %ebp, %ebp 243 xorl %r8d, %r8d 244 xorl %r9d, %r9d 245 xorl %r10d, %r10d 246 xorl %r11d, %r11d 247 xorl %r12d, %r12d 248 xorl %r13d, %r13d 249 xorl %r14d, %r14d 250 xorl %r15d, %r15d 251 252 ANNOTATE_UNRET_SAFE 253 ret 254 int3 255 256.Lrelocate: 257 popq %rdx 258 259 /* Use the swap page for the callee's stack */ 260 movq kexec_pa_swap_page(%rip), %r10 261 leaq PAGE_SIZE(%r10), %rsp 262 263 /* push the existing entry point onto the callee's stack */ 264 pushq %rdx 265 266 ANNOTATE_RETPOLINE_SAFE 267 call *%rdx 268 269 /* get the re-entry point of the peer system */ 270 popq %rbp 271 movq kexec_pa_swap_page(%rip), %r10 272 movq pa_backup_pages_map(%rip), %rdi 273 movq kexec_pa_table_page(%rip), %rax 274 movq %rax, %cr3 275 276 /* Find start (and end) of this physical mapping of control page */ 277 leaq (%rip), %r8 278 ANNOTATE_NOENDBR 279 andq $PAGE_MASK, %r8 280 lea PAGE_SIZE(%r8), %rsp 281 /* 282 * Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so that 283 * swap_pages() can swap pages correctly. Note all other 284 * RELOC_KERNEL_* flags passed to relocate_kernel() are not 285 * restored. 286 */ 287 movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d 288 call swap_pages 289 movq kexec_va_control_page(%rip), %rax 2900: addq $virtual_mapped - 0b, %rax 291 subq $__relocate_kernel_start - 0b, %rax 292 pushq %rax 293 ANNOTATE_UNRET_SAFE 294 ret 295 int3 296SYM_CODE_END(identity_mapped) 297 298SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 299 UNWIND_HINT_END_OF_STACK 300 ANNOTATE_NOENDBR // RET target, above 301 movq saved_rsp(%rip), %rsp 302 movq saved_cr4(%rip), %rax 303 movq %rax, %cr4 304 movq saved_cr3(%rip), %rax 305 movq saved_cr0(%rip), %r8 306 movq %rax, %cr3 307 movq %r8, %cr0 308 309#ifdef CONFIG_KEXEC_JUMP 310 /* Saved in save_processor_state. */ 311 movq $saved_context, %rax 312 lgdt saved_context_gdt_desc(%rax) 313#endif 314 315 /* relocate_kernel() returns the re-entry point for next time */ 316 movq %rbp, %rax 317 318 popf 319 popq %r15 320 popq %r14 321 popq %r13 322 popq %r12 323 popq %rbp 324 popq %rbx 325 ANNOTATE_UNRET_SAFE 326 ret 327 int3 328SYM_CODE_END(virtual_mapped) 329 330 /* Do the copies */ 331SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 332 UNWIND_HINT_END_OF_STACK 333 /* 334 * %rdi indirection page 335 * %r11 flags: RELOC_KERNEL_* 336 */ 337 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 338 xorl %edi, %edi 339 xorl %esi, %esi 340 jmp .Lstart /* Should start with an indirection record */ 341 342.Lloop: /* top, read another word for the indirection page */ 343 344 movq (%rbx), %rcx 345 addq $8, %rbx 346.Lstart: 347 testb $0x1, %cl /* is it a destination page? */ 348 jz .Lnotdest 349 movq %rcx, %rdi 350 andq $0xfffffffffffff000, %rdi 351 jmp .Lloop 352.Lnotdest: 353 testb $0x2, %cl /* is it an indirection page? */ 354 jz .Lnotind 355 movq %rcx, %rbx 356 andq $0xfffffffffffff000, %rbx 357 jmp .Lloop 358.Lnotind: 359 testb $0x4, %cl /* is it the done indicator? */ 360 jz .Lnotdone 361 jmp .Ldone 362.Lnotdone: 363 testb $0x8, %cl /* is it the source indicator? */ 364 jz .Lloop /* Ignore it otherwise */ 365 movq %rcx, %rsi /* For ever source page do a copy */ 366 andq $0xfffffffffffff000, %rsi 367 368 movq %rdi, %rdx /* Save destination page to %rdx */ 369 movq %rsi, %rax /* Save source page to %rax */ 370 371 /* Only actually swap for ::preserve_context */ 372 testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b 373 jz .Lnoswap 374 375 /* copy source page to swap page */ 376 movq kexec_pa_swap_page(%rip), %rdi 377 movl $512, %ecx 378 rep movsq 379 380 /* copy destination page to source page */ 381 movq %rax, %rdi 382 movq %rdx, %rsi 383 movl $512, %ecx 384 rep movsq 385 386 /* copy swap page to destination page */ 387 movq %rdx, %rdi 388 movq kexec_pa_swap_page(%rip), %rsi 389.Lnoswap: 390 movl $512, %ecx 391 rep movsq 392 393 lea PAGE_SIZE(%rax), %rsi 394 jmp .Lloop 395.Ldone: 396 ANNOTATE_UNRET_SAFE 397 ret 398 int3 399SYM_CODE_END(swap_pages) 400 401/* 402 * Generic 'print character' routine 403 * - %al: Character to be printed (may clobber %rax) 404 * - %rdx: MMIO address or port. 405 */ 406#define XMTRDY 0x20 407 408#define TXR 0 /* Transmit register (WRITE) */ 409#define LSR 5 /* Line Status */ 410 411SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250) 412 UNWIND_HINT_FUNC 413 ANNOTATE_NOENDBR 414 addw $LSR, %dx 415 xchg %al, %ah 416.Lxmtrdy_loop: 417 inb %dx, %al 418 testb $XMTRDY, %al 419 jnz .Lready 420 pause 421 jmp .Lxmtrdy_loop 422 423.Lready: 424 subw $LSR, %dx 425 xchg %al, %ah 426 outb %al, %dx 427pr_char_null: 428 ANNOTATE_NOENDBR 429 430 ANNOTATE_UNRET_SAFE 431 ret 432SYM_CODE_END(pr_char_8250) 433 434SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32) 435 UNWIND_HINT_FUNC 436 ANNOTATE_NOENDBR 437.Lxmtrdy_loop_mmio: 438 movb (LSR*4)(%rdx), %ah 439 testb $XMTRDY, %ah 440 jnz .Lready_mmio 441 pause 442 jmp .Lxmtrdy_loop_mmio 443 444.Lready_mmio: 445 movb %al, (%rdx) 446 ANNOTATE_UNRET_SAFE 447 ret 448SYM_CODE_END(pr_char_8250_mmio32) 449 450/* 451 * Load pr_char function pointer into %rsi and load %rdx with whatever 452 * that function wants to see there (typically port/MMIO address). 453 */ 454.macro pr_setup 455 leaq pr_char_8250(%rip), %rsi 456 movw kexec_debug_8250_port(%rip), %dx 457 testw %dx, %dx 458 jnz 1f 459 460 leaq pr_char_8250_mmio32(%rip), %rsi 461 movq kexec_debug_8250_mmio32(%rip), %rdx 462 testq %rdx, %rdx 463 jnz 1f 464 465 leaq pr_char_null(%rip), %rsi 4661: 467.endm 468 469/* Print the nybble in %bl, clobber %rax */ 470SYM_CODE_START_LOCAL_NOALIGN(pr_nybble) 471 UNWIND_HINT_FUNC 472 movb %bl, %al 473 nop 474 andb $0x0f, %al 475 addb $0x30, %al 476 cmpb $0x3a, %al 477 jb 1f 478 addb $('a' - '0' - 10), %al 479 ANNOTATE_RETPOLINE_SAFE 4801: jmp *%rsi 481SYM_CODE_END(pr_nybble) 482 483SYM_CODE_START_LOCAL_NOALIGN(pr_qword) 484 UNWIND_HINT_FUNC 485 movq $16, %rcx 4861: rolq $4, %rbx 487 call pr_nybble 488 loop 1b 489 movb $'\n', %al 490 ANNOTATE_RETPOLINE_SAFE 491 jmp *%rsi 492SYM_CODE_END(pr_qword) 493 494.macro print_reg a, b, c, d, r 495 movb $\a, %al 496 ANNOTATE_RETPOLINE_SAFE 497 call *%rsi 498 movb $\b, %al 499 ANNOTATE_RETPOLINE_SAFE 500 call *%rsi 501 movb $\c, %al 502 ANNOTATE_RETPOLINE_SAFE 503 call *%rsi 504 movb $\d, %al 505 ANNOTATE_RETPOLINE_SAFE 506 call *%rsi 507 movq \r, %rbx 508 call pr_qword 509.endm 510 511SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors) 512 /* Each of these is 6 bytes. */ 513.macro vec_err exc 514 UNWIND_HINT_ENTRY 515 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) 516 nop 517 nop 518 pushq $\exc 519 jmp exc_handler 520.endm 521 522.macro vec_noerr exc 523 UNWIND_HINT_ENTRY 524 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) 525 pushq $0 526 pushq $\exc 527 jmp exc_handler 528.endm 529 530 ANNOTATE_NOENDBR 531 vec_noerr 0 // #DE 532 vec_noerr 1 // #DB 533 vec_noerr 2 // #NMI 534 vec_noerr 3 // #BP 535 vec_noerr 4 // #OF 536 vec_noerr 5 // #BR 537 vec_noerr 6 // #UD 538 vec_noerr 7 // #NM 539 vec_err 8 // #DF 540 vec_noerr 9 541 vec_err 10 // #TS 542 vec_err 11 // #NP 543 vec_err 12 // #SS 544 vec_err 13 // #GP 545 vec_err 14 // #PF 546 vec_noerr 15 547SYM_CODE_END(kexec_debug_exc_vectors) 548 549SYM_CODE_START_LOCAL_NOALIGN(exc_handler) 550 /* No need for RET mitigations during kexec */ 551 VALIDATE_UNRET_END 552 553 pushq %rax 554 pushq %rbx 555 pushq %rcx 556 pushq %rdx 557 pushq %rsi 558 559 /* Stack frame */ 560#define EXC_SS 0x58 /* Architectural... */ 561#define EXC_RSP 0x50 562#define EXC_EFLAGS 0x48 563#define EXC_CS 0x40 564#define EXC_RIP 0x38 565#define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */ 566#define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */ 567#define EXC_RAX 0x20 /* Pushed just above in exc_handler */ 568#define EXC_RBX 0x18 569#define EXC_RCX 0x10 570#define EXC_RDX 0x08 571#define EXC_RSI 0x00 572 573 /* Set up %rdx/%rsi for debug output */ 574 pr_setup 575 576 /* rip and exception info */ 577 print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp) 578 print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp) 579 print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp) 580 print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp) 581 582 /* We spilled these to the stack */ 583 print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp) 584 print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp) 585 print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp) 586 print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp) 587 print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp) 588 589 /* Other registers untouched */ 590 print_reg 'r', 'd', 'i', ':', %rdi 591 print_reg 'r', '8', ' ', ':', %r8 592 print_reg 'r', '9', ' ', ':', %r9 593 print_reg 'r', '1', '0', ':', %r10 594 print_reg 'r', '1', '1', ':', %r11 595 print_reg 'r', '1', '2', ':', %r12 596 print_reg 'r', '1', '3', ':', %r13 597 print_reg 'r', '1', '4', ':', %r14 598 print_reg 'r', '1', '5', ':', %r15 599 print_reg 'c', 'r', '2', ':', %cr2 600 601 /* Only return from INT3 */ 602 cmpq $3, EXC_EXCEPTION(%rsp) 603 jne .Ldie 604 605 popq %rsi 606 popq %rdx 607 popq %rcx 608 popq %rbx 609 popq %rax 610 611 addq $16, %rsp 612 iretq 613 614.Ldie: 615 hlt 616 jmp .Ldie 617 618SYM_CODE_END(exc_handler) 619