1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16#include <asm/asm-offsets.h> 17 18/* 19 * Must be relocatable PIC code callable as a C function, in particular 20 * there must be a plain RET and not jump to return thunk. 21 */ 22 23#define PTR(x) (x << 3) 24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25 26/* 27 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE 28 * ~ control_page + PAGE_SIZE are used as data storage and stack for 29 * jumping back 30 */ 31#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) 32 33/* Minimal CPU state */ 34#define RSP DATA(0x0) 35#define CR0 DATA(0x8) 36#define CR3 DATA(0x10) 37#define CR4 DATA(0x18) 38 39/* other data */ 40#define CP_PA_TABLE_PAGE DATA(0x20) 41#define CP_PA_SWAP_PAGE DATA(0x28) 42#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) 43 44 .text 45 .align PAGE_SIZE 46 .code64 47SYM_CODE_START_NOALIGN(relocate_range) 48SYM_CODE_START_NOALIGN(relocate_kernel) 49 UNWIND_HINT_END_OF_STACK 50 ANNOTATE_NOENDBR 51 /* 52 * %rdi indirection_page 53 * %rsi page_list 54 * %rdx start address 55 * %rcx preserve_context 56 * %r8 host_mem_enc_active 57 */ 58 59 /* Save the CPU context, used for jumping back */ 60 pushq %rbx 61 pushq %rbp 62 pushq %r12 63 pushq %r13 64 pushq %r14 65 pushq %r15 66 pushf 67 68 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 69 movq %rsp, RSP(%r11) 70 movq %cr0, %rax 71 movq %rax, CR0(%r11) 72 movq %cr3, %rax 73 movq %rax, CR3(%r11) 74 movq %cr4, %rax 75 movq %rax, CR4(%r11) 76 77 /* Save CR4. Required to enable the right paging mode later. */ 78 movq %rax, %r13 79 80 /* zero out flags, and disable interrupts */ 81 pushq $0 82 popfq 83 84 /* Save SME active flag */ 85 movq %r8, %r12 86 87 /* 88 * get physical address of control page now 89 * this is impossible after page table switch 90 */ 91 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 92 93 /* get physical address of page table now too */ 94 movq PTR(PA_TABLE_PAGE)(%rsi), %r9 95 96 /* get physical address of swap page now */ 97 movq PTR(PA_SWAP_PAGE)(%rsi), %r10 98 99 /* save some information for jumping back */ 100 movq %r9, CP_PA_TABLE_PAGE(%r11) 101 movq %r10, CP_PA_SWAP_PAGE(%r11) 102 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) 103 104 /* Switch to the identity mapped page tables */ 105 movq %r9, %cr3 106 107 /* setup a new stack at the end of the physical control page */ 108 lea PAGE_SIZE(%r8), %rsp 109 110 /* jump to identity mapped page */ 111 addq $(identity_mapped - relocate_kernel), %r8 112 pushq %r8 113 ANNOTATE_UNRET_SAFE 114 ret 115 int3 116SYM_CODE_END(relocate_kernel) 117 118SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 119 UNWIND_HINT_END_OF_STACK 120 /* set return address to 0 if not preserving context */ 121 pushq $0 122 /* store the start address on the stack */ 123 pushq %rdx 124 125 /* 126 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 127 * below. 128 */ 129 movq %cr4, %rax 130 andq $~(X86_CR4_CET), %rax 131 movq %rax, %cr4 132 133 /* 134 * Set cr0 to a known state: 135 * - Paging enabled 136 * - Alignment check disabled 137 * - Write protect disabled 138 * - No task switch 139 * - Don't do FP software emulation. 140 * - Protected mode enabled 141 */ 142 movq %cr0, %rax 143 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 144 orl $(X86_CR0_PG | X86_CR0_PE), %eax 145 movq %rax, %cr0 146 147 /* 148 * Set cr4 to a known state: 149 * - physical address extension enabled 150 * - 5-level paging, if it was enabled before 151 * - Machine check exception on TDX guest, if it was enabled before. 152 * Clearing MCE might not be allowed in TDX guests, depending on setup. 153 * 154 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 155 * PAE is always set in the original CR4. 156 */ 157 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 158 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 159 movq %r13, %cr4 160 161 /* Flush the TLB (needed?) */ 162 movq %r9, %cr3 163 164 /* 165 * If SME is active, there could be old encrypted cache line 166 * entries that will conflict with the now unencrypted memory 167 * used by kexec. Flush the caches before copying the kernel. 168 */ 169 testq %r12, %r12 170 jz .Lsme_off 171 wbinvd 172.Lsme_off: 173 174 /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ 175 movq %rcx, %r11 176 call swap_pages 177 178 /* 179 * To be certain of avoiding problems with self-modifying code 180 * I need to execute a serializing instruction here. 181 * So I flush the TLB by reloading %cr3 here, it's handy, 182 * and not processor dependent. 183 */ 184 movq %cr3, %rax 185 movq %rax, %cr3 186 187 /* 188 * set all of the registers to known values 189 * leave %rsp alone 190 */ 191 192 testq %r11, %r11 193 jnz .Lrelocate 194 xorl %eax, %eax 195 xorl %ebx, %ebx 196 xorl %ecx, %ecx 197 xorl %edx, %edx 198 xorl %esi, %esi 199 xorl %edi, %edi 200 xorl %ebp, %ebp 201 xorl %r8d, %r8d 202 xorl %r9d, %r9d 203 xorl %r10d, %r10d 204 xorl %r11d, %r11d 205 xorl %r12d, %r12d 206 xorl %r13d, %r13d 207 xorl %r14d, %r14d 208 xorl %r15d, %r15d 209 210 ANNOTATE_UNRET_SAFE 211 ret 212 int3 213 214.Lrelocate: 215 popq %rdx 216 leaq PAGE_SIZE(%r10), %rsp 217 ANNOTATE_RETPOLINE_SAFE 218 call *%rdx 219 220 /* get the re-entry point of the peer system */ 221 movq 0(%rsp), %rbp 222 leaq relocate_kernel(%rip), %r8 223 movq CP_PA_SWAP_PAGE(%r8), %r10 224 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi 225 movq CP_PA_TABLE_PAGE(%r8), %rax 226 movq %rax, %cr3 227 lea PAGE_SIZE(%r8), %rsp 228 call swap_pages 229 movq $virtual_mapped, %rax 230 pushq %rax 231 ANNOTATE_UNRET_SAFE 232 ret 233 int3 234SYM_CODE_END(identity_mapped) 235 236SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 237 UNWIND_HINT_END_OF_STACK 238 ANNOTATE_NOENDBR // RET target, above 239 movq RSP(%r8), %rsp 240 movq CR4(%r8), %rax 241 movq %rax, %cr4 242 movq CR3(%r8), %rax 243 movq CR0(%r8), %r8 244 movq %rax, %cr3 245 movq %r8, %cr0 246 247#ifdef CONFIG_KEXEC_JUMP 248 /* Saved in save_processor_state. */ 249 movq $saved_context, %rax 250 lgdt saved_context_gdt_desc(%rax) 251#endif 252 253 movq %rbp, %rax 254 255 popf 256 popq %r15 257 popq %r14 258 popq %r13 259 popq %r12 260 popq %rbp 261 popq %rbx 262 ANNOTATE_UNRET_SAFE 263 ret 264 int3 265SYM_CODE_END(virtual_mapped) 266 267 /* Do the copies */ 268SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 269 UNWIND_HINT_END_OF_STACK 270 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 271 xorl %edi, %edi 272 xorl %esi, %esi 273 jmp 1f 274 2750: /* top, read another word for the indirection page */ 276 277 movq (%rbx), %rcx 278 addq $8, %rbx 2791: 280 testb $0x1, %cl /* is it a destination page? */ 281 jz 2f 282 movq %rcx, %rdi 283 andq $0xfffffffffffff000, %rdi 284 jmp 0b 2852: 286 testb $0x2, %cl /* is it an indirection page? */ 287 jz 2f 288 movq %rcx, %rbx 289 andq $0xfffffffffffff000, %rbx 290 jmp 0b 2912: 292 testb $0x4, %cl /* is it the done indicator? */ 293 jz 2f 294 jmp 3f 2952: 296 testb $0x8, %cl /* is it the source indicator? */ 297 jz 0b /* Ignore it otherwise */ 298 movq %rcx, %rsi /* For ever source page do a copy */ 299 andq $0xfffffffffffff000, %rsi 300 301 movq %rdi, %rdx /* Save destination page to %rdx */ 302 movq %rsi, %rax /* Save source page to %rax */ 303 304 /* copy source page to swap page */ 305 movq %r10, %rdi 306 movl $512, %ecx 307 rep ; movsq 308 309 /* copy destination page to source page */ 310 movq %rax, %rdi 311 movq %rdx, %rsi 312 movl $512, %ecx 313 rep ; movsq 314 315 /* copy swap page to destination page */ 316 movq %rdx, %rdi 317 movq %r10, %rsi 318 movl $512, %ecx 319 rep ; movsq 320 321 lea PAGE_SIZE(%rax), %rsi 322 jmp 0b 3233: 324 ANNOTATE_UNRET_SAFE 325 ret 326 int3 327SYM_CODE_END(swap_pages) 328 329 .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc 330SYM_CODE_END(relocate_range); 331