1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16#include <asm/asm-offsets.h> 17 18/* 19 * Must be relocatable PIC code callable as a C function, in particular 20 * there must be a plain RET and not jump to return thunk. 21 */ 22 23#define PTR(x) (x << 3) 24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25 26/* 27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied 28 * into the control page, and the remainder of the page is used as the stack. 29 */ 30 31 .section .data..relocate_kernel,"a"; 32/* Minimal CPU state */ 33SYM_DATA_LOCAL(saved_rsp, .quad 0) 34SYM_DATA_LOCAL(saved_cr0, .quad 0) 35SYM_DATA_LOCAL(saved_cr3, .quad 0) 36SYM_DATA_LOCAL(saved_cr4, .quad 0) 37 /* other data */ 38SYM_DATA(kexec_va_control_page, .quad 0) 39SYM_DATA(kexec_pa_table_page, .quad 0) 40SYM_DATA(kexec_pa_swap_page, .quad 0) 41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) 42SYM_DATA(kexec_debug_8250_mmio32, .quad 0) 43SYM_DATA(kexec_debug_8250_port, .word 0) 44 45 .balign 16 46SYM_DATA_START_LOCAL(kexec_debug_gdt) 47 .word kexec_debug_gdt_end - kexec_debug_gdt - 1 48 .long 0 49 .word 0 50 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 51 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 52 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 53SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) 54 55 .balign 8 56SYM_DATA_START(kexec_debug_idt) 57 .skip 0x100, 0x00 58SYM_DATA_END(kexec_debug_idt) 59 60 .section .text..relocate_kernel,"ax"; 61 .code64 62SYM_CODE_START_NOALIGN(relocate_kernel) 63 UNWIND_HINT_END_OF_STACK 64 ANNOTATE_NOENDBR 65 /* 66 * %rdi indirection_page 67 * %rsi pa_control_page 68 * %rdx start address 69 * %rcx preserve_context 70 * %r8 host_mem_enc_active 71 */ 72 73 /* Save the CPU context, used for jumping back */ 74 pushq %rbx 75 pushq %rbp 76 pushq %r12 77 pushq %r13 78 pushq %r14 79 pushq %r15 80 pushf 81 82 /* Invalidate GDT/IDT, zero out flags */ 83 pushq $0 84 pushq $0 85 86 lidt (%rsp) 87 lgdt (%rsp) 88 addq $8, %rsp 89 popfq 90 91 /* Switch to the identity mapped page tables */ 92 movq %cr3, %rax 93 movq kexec_pa_table_page(%rip), %r9 94 movq %r9, %cr3 95 96 /* Leave CR4 in %r13 to enable the right paging mode later. */ 97 movq %cr4, %r13 98 99 /* Disable global pages immediately to ensure this mapping is RWX */ 100 movq %r13, %r12 101 andq $~(X86_CR4_PGE), %r12 102 movq %r12, %cr4 103 104 /* Save %rsp and CRs. */ 105 movq %r13, saved_cr4(%rip) 106 movq %rsp, saved_rsp(%rip) 107 movq %rax, saved_cr3(%rip) 108 movq %cr0, %rax 109 movq %rax, saved_cr0(%rip) 110 111 /* save indirection list for jumping back */ 112 movq %rdi, pa_backup_pages_map(%rip) 113 114 /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ 115 movq %rcx, %r11 116 117 /* setup a new stack at the end of the physical control page */ 118 lea PAGE_SIZE(%rsi), %rsp 119 120 /* jump to identity mapped page */ 1210: addq $identity_mapped - 0b, %rsi 122 subq $__relocate_kernel_start - 0b, %rsi 123 ANNOTATE_RETPOLINE_SAFE 124 jmp *%rsi 125SYM_CODE_END(relocate_kernel) 126 127SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 128 UNWIND_HINT_END_OF_STACK 129 /* 130 * %rdi indirection page 131 * %rdx start address 132 * %r8 host_mem_enc_active 133 * %r9 page table page 134 * %r11 preserve_context 135 * %r13 original CR4 when relocate_kernel() was invoked 136 */ 137 138 /* store the start address on the stack */ 139 pushq %rdx 140 141 /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ 142 leaq kexec_debug_gdt(%rip), %rax 143 pushq %rax 144 pushw (%rax) 145 146 /* Load the GDT, put the stack back */ 147 lgdt (%rsp) 148 addq $10, %rsp 149 150 /* Test that we can load segments */ 151 movq %ds, %rax 152 movq %rax, %ds 153 154 /* Now an IDTR on the stack to load the IDT the kernel created */ 155 leaq kexec_debug_idt(%rip), %rsi 156 pushq %rsi 157 pushw $0xff 158 lidt (%rsp) 159 addq $10, %rsp 160 161 //int3 162 163 /* 164 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 165 * below. 166 */ 167 movq %cr4, %rax 168 andq $~(X86_CR4_CET), %rax 169 movq %rax, %cr4 170 171 /* 172 * Set cr0 to a known state: 173 * - Paging enabled 174 * - Alignment check disabled 175 * - Write protect disabled 176 * - No task switch 177 * - Don't do FP software emulation. 178 * - Protected mode enabled 179 */ 180 movq %cr0, %rax 181 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 182 orl $(X86_CR0_PG | X86_CR0_PE), %eax 183 movq %rax, %cr0 184 185 /* 186 * Set cr4 to a known state: 187 * - physical address extension enabled 188 * - 5-level paging, if it was enabled before 189 * - Machine check exception on TDX guest, if it was enabled before. 190 * Clearing MCE might not be allowed in TDX guests, depending on setup. 191 * 192 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 193 * PAE is always set in the original CR4. 194 */ 195 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 196 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 197 movq %r13, %cr4 198 199 /* Flush the TLB (needed?) */ 200 movq %r9, %cr3 201 202 /* 203 * If SME is active, there could be old encrypted cache line 204 * entries that will conflict with the now unencrypted memory 205 * used by kexec. Flush the caches before copying the kernel. 206 */ 207 testq %r8, %r8 208 jz .Lsme_off 209 wbinvd 210.Lsme_off: 211 212 call swap_pages 213 214 /* 215 * To be certain of avoiding problems with self-modifying code 216 * I need to execute a serializing instruction here. 217 * So I flush the TLB by reloading %cr3 here, it's handy, 218 * and not processor dependent. 219 */ 220 movq %cr3, %rax 221 movq %rax, %cr3 222 223 testq %r11, %r11 /* preserve_context */ 224 jnz .Lrelocate 225 226 /* 227 * set all of the registers to known values 228 * leave %rsp alone 229 */ 230 231 xorl %eax, %eax 232 xorl %ebx, %ebx 233 xorl %ecx, %ecx 234 xorl %edx, %edx 235 xorl %esi, %esi 236 xorl %edi, %edi 237 xorl %ebp, %ebp 238 xorl %r8d, %r8d 239 xorl %r9d, %r9d 240 xorl %r10d, %r10d 241 xorl %r11d, %r11d 242 xorl %r12d, %r12d 243 xorl %r13d, %r13d 244 xorl %r14d, %r14d 245 xorl %r15d, %r15d 246 247 ANNOTATE_UNRET_SAFE 248 ret 249 int3 250 251.Lrelocate: 252 popq %rdx 253 254 /* Use the swap page for the callee's stack */ 255 movq kexec_pa_swap_page(%rip), %r10 256 leaq PAGE_SIZE(%r10), %rsp 257 258 /* push the existing entry point onto the callee's stack */ 259 pushq %rdx 260 261 ANNOTATE_RETPOLINE_SAFE 262 call *%rdx 263 264 /* get the re-entry point of the peer system */ 265 popq %rbp 266 movq kexec_pa_swap_page(%rip), %r10 267 movq pa_backup_pages_map(%rip), %rdi 268 movq kexec_pa_table_page(%rip), %rax 269 movq %rax, %cr3 270 271 /* Find start (and end) of this physical mapping of control page */ 272 leaq (%rip), %r8 273 ANNOTATE_NOENDBR 274 andq $PAGE_MASK, %r8 275 lea PAGE_SIZE(%r8), %rsp 276 movl $1, %r11d /* Ensure preserve_context flag is set */ 277 call swap_pages 278 movq kexec_va_control_page(%rip), %rax 2790: addq $virtual_mapped - 0b, %rax 280 subq $__relocate_kernel_start - 0b, %rax 281 pushq %rax 282 ANNOTATE_UNRET_SAFE 283 ret 284 int3 285SYM_CODE_END(identity_mapped) 286 287SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 288 UNWIND_HINT_END_OF_STACK 289 ANNOTATE_NOENDBR // RET target, above 290 movq saved_rsp(%rip), %rsp 291 movq saved_cr4(%rip), %rax 292 movq %rax, %cr4 293 movq saved_cr3(%rip), %rax 294 movq saved_cr0(%rip), %r8 295 movq %rax, %cr3 296 movq %r8, %cr0 297 298#ifdef CONFIG_KEXEC_JUMP 299 /* Saved in save_processor_state. */ 300 movq $saved_context, %rax 301 lgdt saved_context_gdt_desc(%rax) 302#endif 303 304 /* relocate_kernel() returns the re-entry point for next time */ 305 movq %rbp, %rax 306 307 popf 308 popq %r15 309 popq %r14 310 popq %r13 311 popq %r12 312 popq %rbp 313 popq %rbx 314 ANNOTATE_UNRET_SAFE 315 ret 316 int3 317SYM_CODE_END(virtual_mapped) 318 319 /* Do the copies */ 320SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 321 UNWIND_HINT_END_OF_STACK 322 /* 323 * %rdi indirection page 324 * %r11 preserve_context 325 */ 326 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 327 xorl %edi, %edi 328 xorl %esi, %esi 329 jmp .Lstart /* Should start with an indirection record */ 330 331.Lloop: /* top, read another word for the indirection page */ 332 333 movq (%rbx), %rcx 334 addq $8, %rbx 335.Lstart: 336 testb $0x1, %cl /* is it a destination page? */ 337 jz .Lnotdest 338 movq %rcx, %rdi 339 andq $0xfffffffffffff000, %rdi 340 jmp .Lloop 341.Lnotdest: 342 testb $0x2, %cl /* is it an indirection page? */ 343 jz .Lnotind 344 movq %rcx, %rbx 345 andq $0xfffffffffffff000, %rbx 346 jmp .Lloop 347.Lnotind: 348 testb $0x4, %cl /* is it the done indicator? */ 349 jz .Lnotdone 350 jmp .Ldone 351.Lnotdone: 352 testb $0x8, %cl /* is it the source indicator? */ 353 jz .Lloop /* Ignore it otherwise */ 354 movq %rcx, %rsi /* For ever source page do a copy */ 355 andq $0xfffffffffffff000, %rsi 356 357 movq %rdi, %rdx /* Save destination page to %rdx */ 358 movq %rsi, %rax /* Save source page to %rax */ 359 360 testq %r11, %r11 /* Only actually swap for ::preserve_context */ 361 jz .Lnoswap 362 363 /* copy source page to swap page */ 364 movq kexec_pa_swap_page(%rip), %rdi 365 movl $512, %ecx 366 rep movsq 367 368 /* copy destination page to source page */ 369 movq %rax, %rdi 370 movq %rdx, %rsi 371 movl $512, %ecx 372 rep movsq 373 374 /* copy swap page to destination page */ 375 movq %rdx, %rdi 376 movq kexec_pa_swap_page(%rip), %rsi 377.Lnoswap: 378 movl $512, %ecx 379 rep movsq 380 381 lea PAGE_SIZE(%rax), %rsi 382 jmp .Lloop 383.Ldone: 384 ANNOTATE_UNRET_SAFE 385 ret 386 int3 387SYM_CODE_END(swap_pages) 388 389/* 390 * Generic 'print character' routine 391 * - %al: Character to be printed (may clobber %rax) 392 * - %rdx: MMIO address or port. 393 */ 394#define XMTRDY 0x20 395 396#define TXR 0 /* Transmit register (WRITE) */ 397#define LSR 5 /* Line Status */ 398 399SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250) 400 UNWIND_HINT_FUNC 401 ANNOTATE_NOENDBR 402 addw $LSR, %dx 403 xchg %al, %ah 404.Lxmtrdy_loop: 405 inb %dx, %al 406 testb $XMTRDY, %al 407 jnz .Lready 408 pause 409 jmp .Lxmtrdy_loop 410 411.Lready: 412 subw $LSR, %dx 413 xchg %al, %ah 414 outb %al, %dx 415pr_char_null: 416 ANNOTATE_NOENDBR 417 418 ANNOTATE_UNRET_SAFE 419 ret 420SYM_CODE_END(pr_char_8250) 421 422SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32) 423 UNWIND_HINT_FUNC 424 ANNOTATE_NOENDBR 425.Lxmtrdy_loop_mmio: 426 movb (LSR*4)(%rdx), %ah 427 testb $XMTRDY, %ah 428 jnz .Lready_mmio 429 pause 430 jmp .Lxmtrdy_loop_mmio 431 432.Lready_mmio: 433 movb %al, (%rdx) 434 ANNOTATE_UNRET_SAFE 435 ret 436SYM_CODE_END(pr_char_8250_mmio32) 437 438/* 439 * Load pr_char function pointer into %rsi and load %rdx with whatever 440 * that function wants to see there (typically port/MMIO address). 441 */ 442.macro pr_setup 443 leaq pr_char_8250(%rip), %rsi 444 movw kexec_debug_8250_port(%rip), %dx 445 testw %dx, %dx 446 jnz 1f 447 448 leaq pr_char_8250_mmio32(%rip), %rsi 449 movq kexec_debug_8250_mmio32(%rip), %rdx 450 testq %rdx, %rdx 451 jnz 1f 452 453 leaq pr_char_null(%rip), %rsi 4541: 455.endm 456 457/* Print the nybble in %bl, clobber %rax */ 458SYM_CODE_START_LOCAL_NOALIGN(pr_nybble) 459 UNWIND_HINT_FUNC 460 movb %bl, %al 461 nop 462 andb $0x0f, %al 463 addb $0x30, %al 464 cmpb $0x3a, %al 465 jb 1f 466 addb $('a' - '0' - 10), %al 467 ANNOTATE_RETPOLINE_SAFE 4681: jmp *%rsi 469SYM_CODE_END(pr_nybble) 470 471SYM_CODE_START_LOCAL_NOALIGN(pr_qword) 472 UNWIND_HINT_FUNC 473 movq $16, %rcx 4741: rolq $4, %rbx 475 call pr_nybble 476 loop 1b 477 movb $'\n', %al 478 ANNOTATE_RETPOLINE_SAFE 479 jmp *%rsi 480SYM_CODE_END(pr_qword) 481 482.macro print_reg a, b, c, d, r 483 movb $\a, %al 484 ANNOTATE_RETPOLINE_SAFE 485 call *%rsi 486 movb $\b, %al 487 ANNOTATE_RETPOLINE_SAFE 488 call *%rsi 489 movb $\c, %al 490 ANNOTATE_RETPOLINE_SAFE 491 call *%rsi 492 movb $\d, %al 493 ANNOTATE_RETPOLINE_SAFE 494 call *%rsi 495 movq \r, %rbx 496 call pr_qword 497.endm 498 499SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors) 500 /* Each of these is 6 bytes. */ 501.macro vec_err exc 502 UNWIND_HINT_ENTRY 503 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) 504 nop 505 nop 506 pushq $\exc 507 jmp exc_handler 508.endm 509 510.macro vec_noerr exc 511 UNWIND_HINT_ENTRY 512 . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) 513 pushq $0 514 pushq $\exc 515 jmp exc_handler 516.endm 517 518 ANNOTATE_NOENDBR 519 vec_noerr 0 // #DE 520 vec_noerr 1 // #DB 521 vec_noerr 2 // #NMI 522 vec_noerr 3 // #BP 523 vec_noerr 4 // #OF 524 vec_noerr 5 // #BR 525 vec_noerr 6 // #UD 526 vec_noerr 7 // #NM 527 vec_err 8 // #DF 528 vec_noerr 9 529 vec_err 10 // #TS 530 vec_err 11 // #NP 531 vec_err 12 // #SS 532 vec_err 13 // #GP 533 vec_err 14 // #PF 534 vec_noerr 15 535SYM_CODE_END(kexec_debug_exc_vectors) 536 537SYM_CODE_START_LOCAL_NOALIGN(exc_handler) 538 /* No need for RET mitigations during kexec */ 539 VALIDATE_UNRET_END 540 541 pushq %rax 542 pushq %rbx 543 pushq %rcx 544 pushq %rdx 545 pushq %rsi 546 547 /* Stack frame */ 548#define EXC_SS 0x58 /* Architectural... */ 549#define EXC_RSP 0x50 550#define EXC_EFLAGS 0x48 551#define EXC_CS 0x40 552#define EXC_RIP 0x38 553#define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */ 554#define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */ 555#define EXC_RAX 0x20 /* Pushed just above in exc_handler */ 556#define EXC_RBX 0x18 557#define EXC_RCX 0x10 558#define EXC_RDX 0x08 559#define EXC_RSI 0x00 560 561 /* Set up %rdx/%rsi for debug output */ 562 pr_setup 563 564 /* rip and exception info */ 565 print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp) 566 print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp) 567 print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp) 568 print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp) 569 570 /* We spilled these to the stack */ 571 print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp) 572 print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp) 573 print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp) 574 print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp) 575 print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp) 576 577 /* Other registers untouched */ 578 print_reg 'r', 'd', 'i', ':', %rdi 579 print_reg 'r', '8', ' ', ':', %r8 580 print_reg 'r', '9', ' ', ':', %r9 581 print_reg 'r', '1', '0', ':', %r10 582 print_reg 'r', '1', '1', ':', %r11 583 print_reg 'r', '1', '2', ':', %r12 584 print_reg 'r', '1', '3', ':', %r13 585 print_reg 'r', '1', '4', ':', %r14 586 print_reg 'r', '1', '5', ':', %r15 587 print_reg 'c', 'r', '2', ':', %cr2 588 589 /* Only return from INT3 */ 590 cmpq $3, EXC_EXCEPTION(%rsp) 591 jne .Ldie 592 593 popq %rsi 594 popq %rdx 595 popq %rcx 596 popq %rbx 597 popq %rax 598 599 addq $16, %rsp 600 iretq 601 602.Ldie: 603 hlt 604 jmp .Ldie 605 606SYM_CODE_END(exc_handler) 607